{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.4817558807186515, "global_step": 8000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.0009999978838190456, "loss": 2.9794, "step": 10 }, { "epoch": 0.0, "learning_rate": 0.0009999915352940948, "loss": 2.3885, "step": 20 }, { "epoch": 0.01, "learning_rate": 0.000999980954478887, "loss": 2.3057, "step": 30 }, { "epoch": 0.01, "learning_rate": 0.000999966141462985, "loss": 2.2692, "step": 40 }, { "epoch": 0.01, "learning_rate": 0.000999947096371777, "loss": 2.2576, "step": 50 }, { "epoch": 0.01, "learning_rate": 0.0009999238193664748, "loss": 2.2388, "step": 60 }, { "epoch": 0.01, "learning_rate": 0.0009998963106441117, "loss": 2.2523, "step": 70 }, { "epoch": 0.01, "learning_rate": 0.0009998645704375414, "loss": 2.218, "step": 80 }, { "epoch": 0.02, "learning_rate": 0.000999828599015436, "loss": 2.2457, "step": 90 }, { "epoch": 0.02, "learning_rate": 0.0009997883966822835, "loss": 2.198, "step": 100 }, { "epoch": 0.02, "learning_rate": 0.0009997439637783859, "loss": 2.2013, "step": 110 }, { "epoch": 0.02, "learning_rate": 0.000999695300679855, "loss": 2.1765, "step": 120 }, { "epoch": 0.02, "learning_rate": 0.0009996424077986109, "loss": 2.1741, "step": 130 }, { "epoch": 0.03, "learning_rate": 0.000999585285582377, "loss": 2.1898, "step": 140 }, { "epoch": 0.03, "learning_rate": 0.0009995239345146772, "loss": 2.1466, "step": 150 }, { "epoch": 0.03, "learning_rate": 0.0009994583551148314, "loss": 2.1423, "step": 160 }, { "epoch": 0.03, "learning_rate": 0.0009993885479379506, "loss": 2.1451, "step": 170 }, { "epoch": 0.03, "learning_rate": 0.000999314513574934, "loss": 2.202, "step": 180 }, { "epoch": 0.04, "learning_rate": 0.0009992362526524616, "loss": 2.1208, "step": 190 }, { "epoch": 0.04, "learning_rate": 0.0009991537658329906, "loss": 2.1591, "step": 200 }, { "epoch": 0.04, "learning_rate": 0.000999067053814749, "loss": 2.1788, "step": 210 }, { "epoch": 0.04, "learning_rate": 0.0009989761173317304, "loss": 2.147, "step": 220 }, { "epoch": 0.04, "learning_rate": 0.000998880957153687, "loss": 2.1249, "step": 230 }, { "epoch": 0.04, "learning_rate": 0.000998781574086123, "loss": 2.165, "step": 240 }, { "epoch": 0.05, "learning_rate": 0.000998677968970289, "loss": 2.1428, "step": 250 }, { "epoch": 0.05, "learning_rate": 0.0009985701426831735, "loss": 2.1384, "step": 260 }, { "epoch": 0.05, "learning_rate": 0.0009984580961374964, "loss": 2.1585, "step": 270 }, { "epoch": 0.05, "learning_rate": 0.0009983418302817008, "loss": 2.1156, "step": 280 }, { "epoch": 0.05, "learning_rate": 0.0009982213460999448, "loss": 2.0811, "step": 290 }, { "epoch": 0.06, "learning_rate": 0.000998096644612094, "loss": 2.1081, "step": 300 }, { "epoch": 0.06, "learning_rate": 0.0009979677268737118, "loss": 2.1246, "step": 310 }, { "epoch": 0.06, "learning_rate": 0.0009978345939760515, "loss": 2.1229, "step": 320 }, { "epoch": 0.06, "learning_rate": 0.000997697247046046, "loss": 2.1033, "step": 330 }, { "epoch": 0.06, "learning_rate": 0.0009975556872462994, "loss": 2.0931, "step": 340 }, { "epoch": 0.06, "learning_rate": 0.000997409915775076, "loss": 2.1206, "step": 350 }, { "epoch": 0.07, "learning_rate": 0.0009972599338662915, "loss": 2.0537, "step": 360 }, { "epoch": 0.07, "learning_rate": 0.0009971057427895012, "loss": 2.0762, "step": 370 }, { "epoch": 0.07, "learning_rate": 0.0009969473438498897, "loss": 2.0883, "step": 380 }, { "epoch": 0.07, "learning_rate": 0.0009967847383882604, "loss": 2.1174, "step": 390 }, { "epoch": 0.07, "learning_rate": 0.0009966179277810239, "loss": 2.1111, "step": 400 }, { "epoch": 0.08, "learning_rate": 0.0009964469134401855, "loss": 2.1288, "step": 410 }, { "epoch": 0.08, "learning_rate": 0.0009962716968133346, "loss": 2.0967, "step": 420 }, { "epoch": 0.08, "learning_rate": 0.0009960922793836318, "loss": 2.1216, "step": 430 }, { "epoch": 0.08, "learning_rate": 0.0009959086626697955, "loss": 2.0924, "step": 440 }, { "epoch": 0.08, "learning_rate": 0.0009957208482260908, "loss": 2.0809, "step": 450 }, { "epoch": 0.09, "learning_rate": 0.0009955288376423152, "loss": 2.1082, "step": 460 }, { "epoch": 0.09, "learning_rate": 0.0009953326325437852, "loss": 2.0885, "step": 470 }, { "epoch": 0.09, "learning_rate": 0.0009951322345913224, "loss": 2.1133, "step": 480 }, { "epoch": 0.09, "learning_rate": 0.0009949276454812408, "loss": 2.0844, "step": 490 }, { "epoch": 0.09, "learning_rate": 0.00099471886694533, "loss": 2.0796, "step": 500 }, { "epoch": 0.09, "learning_rate": 0.0009945059007508434, "loss": 2.1255, "step": 510 }, { "epoch": 0.1, "learning_rate": 0.0009942887487004804, "loss": 2.0913, "step": 520 }, { "epoch": 0.1, "learning_rate": 0.0009940674126323733, "loss": 2.1003, "step": 530 }, { "epoch": 0.1, "learning_rate": 0.0009938418944200709, "loss": 2.0541, "step": 540 }, { "epoch": 0.1, "learning_rate": 0.0009936121959725223, "loss": 2.0523, "step": 550 }, { "epoch": 0.1, "learning_rate": 0.0009933783192340618, "loss": 2.1225, "step": 560 }, { "epoch": 0.11, "learning_rate": 0.0009931402661843911, "loss": 2.0446, "step": 570 }, { "epoch": 0.11, "learning_rate": 0.000992898038838564, "loss": 2.0921, "step": 580 }, { "epoch": 0.11, "learning_rate": 0.0009926516392469674, "loss": 2.1081, "step": 590 }, { "epoch": 0.11, "learning_rate": 0.0009924010694953064, "loss": 2.0734, "step": 600 }, { "epoch": 0.11, "learning_rate": 0.0009921463317045843, "loss": 2.0652, "step": 610 }, { "epoch": 0.11, "learning_rate": 0.0009918874280310862, "loss": 2.0818, "step": 620 }, { "epoch": 0.12, "learning_rate": 0.0009916243606663605, "loss": 2.0776, "step": 630 }, { "epoch": 0.12, "learning_rate": 0.0009913571318371994, "loss": 2.1025, "step": 640 }, { "epoch": 0.12, "learning_rate": 0.0009910857438056215, "loss": 2.066, "step": 650 }, { "epoch": 0.12, "learning_rate": 0.0009908101988688512, "loss": 2.0575, "step": 660 }, { "epoch": 0.12, "learning_rate": 0.0009905304993593008, "loss": 2.1269, "step": 670 }, { "epoch": 0.13, "learning_rate": 0.0009902466476445486, "loss": 2.0518, "step": 680 }, { "epoch": 0.13, "learning_rate": 0.0009899586461273218, "loss": 2.0698, "step": 690 }, { "epoch": 0.13, "learning_rate": 0.000989666497245473, "loss": 2.0988, "step": 700 }, { "epoch": 0.13, "learning_rate": 0.0009893702034719624, "loss": 2.0986, "step": 710 }, { "epoch": 0.13, "learning_rate": 0.0009890697673148345, "loss": 2.0237, "step": 720 }, { "epoch": 0.14, "learning_rate": 0.0009887651913171986, "loss": 2.0027, "step": 730 }, { "epoch": 0.14, "learning_rate": 0.0009884564780572064, "loss": 2.0563, "step": 740 }, { "epoch": 0.14, "learning_rate": 0.0009881436301480305, "loss": 2.0624, "step": 750 }, { "epoch": 0.14, "learning_rate": 0.000987826650237842, "loss": 2.0926, "step": 760 }, { "epoch": 0.14, "learning_rate": 0.000987505541009788, "loss": 2.0585, "step": 770 }, { "epoch": 0.14, "learning_rate": 0.0009871803051819696, "loss": 2.0494, "step": 780 }, { "epoch": 0.15, "learning_rate": 0.0009868509455074183, "loss": 2.0106, "step": 790 }, { "epoch": 0.15, "learning_rate": 0.0009865174647740729, "loss": 2.0861, "step": 800 }, { "epoch": 0.15, "learning_rate": 0.0009861798658047556, "loss": 2.0478, "step": 810 }, { "epoch": 0.15, "learning_rate": 0.0009858381514571484, "loss": 2.0469, "step": 820 }, { "epoch": 0.15, "learning_rate": 0.000985492324623769, "loss": 2.0671, "step": 830 }, { "epoch": 0.16, "learning_rate": 0.0009851423882319458, "loss": 2.0808, "step": 840 }, { "epoch": 0.16, "learning_rate": 0.0009847883452437937, "loss": 2.0331, "step": 850 }, { "epoch": 0.16, "learning_rate": 0.0009844301986561893, "loss": 2.0295, "step": 860 }, { "epoch": 0.16, "learning_rate": 0.000984067951500744, "loss": 2.0873, "step": 870 }, { "epoch": 0.16, "learning_rate": 0.00098370160684378, "loss": 2.1038, "step": 880 }, { "epoch": 0.16, "learning_rate": 0.0009833311677863042, "loss": 2.0337, "step": 890 }, { "epoch": 0.17, "learning_rate": 0.0009829566374639801, "loss": 2.0407, "step": 900 }, { "epoch": 0.17, "learning_rate": 0.0009825780190471042, "loss": 2.1049, "step": 910 }, { "epoch": 0.17, "learning_rate": 0.000982195315740576, "loss": 2.0475, "step": 920 }, { "epoch": 0.17, "learning_rate": 0.0009818085307838741, "loss": 2.0624, "step": 930 }, { "epoch": 0.17, "learning_rate": 0.000981417667451026, "loss": 2.0714, "step": 940 }, { "epoch": 0.18, "learning_rate": 0.0009810227290505816, "loss": 2.0947, "step": 950 }, { "epoch": 0.18, "learning_rate": 0.0009806237189255859, "loss": 2.0591, "step": 960 }, { "epoch": 0.18, "learning_rate": 0.0009802206404535489, "loss": 2.0301, "step": 970 }, { "epoch": 0.18, "learning_rate": 0.000979813497046419, "loss": 2.0556, "step": 980 }, { "epoch": 0.18, "learning_rate": 0.0009794022921505523, "loss": 2.0753, "step": 990 }, { "epoch": 0.19, "learning_rate": 0.000978987029246685, "loss": 2.0898, "step": 1000 }, { "epoch": 0.19, "learning_rate": 0.0009785677118499029, "loss": 2.0464, "step": 1010 }, { "epoch": 0.19, "learning_rate": 0.0009781443435096116, "loss": 2.0828, "step": 1020 }, { "epoch": 0.19, "learning_rate": 0.0009777169278095074, "loss": 2.1137, "step": 1030 }, { "epoch": 0.19, "learning_rate": 0.0009772854683675462, "loss": 2.0167, "step": 1040 }, { "epoch": 0.19, "learning_rate": 0.000976849968835913, "loss": 2.07, "step": 1050 }, { "epoch": 0.2, "learning_rate": 0.0009764104329009909, "loss": 2.0409, "step": 1060 }, { "epoch": 0.2, "learning_rate": 0.0009759668642833304, "loss": 2.015, "step": 1070 }, { "epoch": 0.2, "learning_rate": 0.0009755192667376173, "loss": 2.0175, "step": 1080 }, { "epoch": 0.2, "learning_rate": 0.0009750676440526411, "loss": 2.0773, "step": 1090 }, { "epoch": 0.2, "learning_rate": 0.0009746120000512632, "loss": 2.0245, "step": 1100 }, { "epoch": 0.21, "learning_rate": 0.0009741523385903841, "loss": 2.094, "step": 1110 }, { "epoch": 0.21, "learning_rate": 0.0009736886635609112, "loss": 2.0506, "step": 1120 }, { "epoch": 0.21, "learning_rate": 0.0009732209788877258, "loss": 2.0287, "step": 1130 }, { "epoch": 0.21, "learning_rate": 0.0009727492885296489, "loss": 2.1162, "step": 1140 }, { "epoch": 0.21, "learning_rate": 0.0009722735964794099, "loss": 2.1096, "step": 1150 }, { "epoch": 0.21, "learning_rate": 0.0009717939067636099, "loss": 2.0621, "step": 1160 }, { "epoch": 0.22, "learning_rate": 0.0009713102234426903, "loss": 2.0796, "step": 1170 }, { "epoch": 0.22, "learning_rate": 0.0009708225506108965, "loss": 2.0565, "step": 1180 }, { "epoch": 0.22, "learning_rate": 0.0009703308923962447, "loss": 2.0669, "step": 1190 }, { "epoch": 0.22, "learning_rate": 0.0009698352529604857, "loss": 2.0638, "step": 1200 }, { "epoch": 0.22, "learning_rate": 0.0009693356364990705, "loss": 2.0358, "step": 1210 }, { "epoch": 0.23, "learning_rate": 0.0009688320472411143, "loss": 2.0859, "step": 1220 }, { "epoch": 0.23, "learning_rate": 0.0009683244894493613, "loss": 2.0932, "step": 1230 }, { "epoch": 0.23, "learning_rate": 0.0009678129674201479, "loss": 2.0129, "step": 1240 }, { "epoch": 0.23, "learning_rate": 0.0009672974854833669, "loss": 2.055, "step": 1250 }, { "epoch": 0.23, "learning_rate": 0.0009667780480024304, "loss": 2.0665, "step": 1260 }, { "epoch": 0.24, "learning_rate": 0.0009662546593742334, "loss": 2.0488, "step": 1270 }, { "epoch": 0.24, "learning_rate": 0.0009657273240291159, "loss": 2.0543, "step": 1280 }, { "epoch": 0.24, "learning_rate": 0.0009651960464308261, "loss": 2.0418, "step": 1290 }, { "epoch": 0.24, "learning_rate": 0.0009646608310764819, "loss": 2.033, "step": 1300 }, { "epoch": 0.24, "learning_rate": 0.0009641216824965338, "loss": 2.1034, "step": 1310 }, { "epoch": 0.24, "learning_rate": 0.0009635786052547253, "loss": 2.0866, "step": 1320 }, { "epoch": 0.25, "learning_rate": 0.0009630316039480556, "loss": 2.0607, "step": 1330 }, { "epoch": 0.25, "learning_rate": 0.0009624806832067394, "loss": 2.0457, "step": 1340 }, { "epoch": 0.25, "learning_rate": 0.0009619258476941686, "loss": 2.032, "step": 1350 }, { "epoch": 0.25, "learning_rate": 0.000961367102106873, "loss": 2.0519, "step": 1360 }, { "epoch": 0.25, "learning_rate": 0.0009608044511744791, "loss": 2.0449, "step": 1370 }, { "epoch": 0.26, "learning_rate": 0.0009602378996596721, "loss": 1.9949, "step": 1380 }, { "epoch": 0.26, "learning_rate": 0.0009596674523581539, "loss": 2.0394, "step": 1390 }, { "epoch": 0.26, "learning_rate": 0.0009590931140986035, "loss": 2.0386, "step": 1400 }, { "epoch": 0.26, "learning_rate": 0.0009585148897426354, "loss": 2.0254, "step": 1410 }, { "epoch": 0.26, "learning_rate": 0.0009579327841847593, "loss": 2.0238, "step": 1420 }, { "epoch": 0.26, "learning_rate": 0.000957346802352338, "loss": 2.0509, "step": 1430 }, { "epoch": 0.27, "learning_rate": 0.0009567569492055456, "loss": 2.0004, "step": 1440 }, { "epoch": 0.27, "learning_rate": 0.0009561632297373263, "loss": 2.0203, "step": 1450 }, { "epoch": 0.27, "learning_rate": 0.0009555656489733513, "loss": 2.0182, "step": 1460 }, { "epoch": 0.27, "learning_rate": 0.000954964211971977, "loss": 1.9754, "step": 1470 }, { "epoch": 0.27, "learning_rate": 0.0009543589238242012, "loss": 2.0374, "step": 1480 }, { "epoch": 0.28, "learning_rate": 0.000953749789653621, "loss": 2.0367, "step": 1490 }, { "epoch": 0.28, "learning_rate": 0.000953136814616389, "loss": 2.0866, "step": 1500 }, { "epoch": 0.28, "learning_rate": 0.0009525200039011694, "loss": 2.0083, "step": 1510 }, { "epoch": 0.28, "learning_rate": 0.0009518993627290948, "loss": 2.0525, "step": 1520 }, { "epoch": 0.28, "learning_rate": 0.0009512748963537212, "loss": 2.0636, "step": 1530 }, { "epoch": 0.29, "learning_rate": 0.000950646610060984, "loss": 2.0522, "step": 1540 }, { "epoch": 0.29, "learning_rate": 0.0009500145091691532, "loss": 2.05, "step": 1550 }, { "epoch": 0.29, "learning_rate": 0.0009493785990287882, "loss": 1.9887, "step": 1560 }, { "epoch": 0.29, "learning_rate": 0.0009487388850226926, "loss": 2.0309, "step": 1570 }, { "epoch": 0.29, "learning_rate": 0.000948095372565869, "loss": 1.9954, "step": 1580 }, { "epoch": 0.29, "learning_rate": 0.0009474480671054726, "loss": 2.0078, "step": 1590 }, { "epoch": 0.3, "learning_rate": 0.0009467969741207652, "loss": 2.0395, "step": 1600 }, { "epoch": 0.3, "learning_rate": 0.0009461420991230693, "loss": 2.0415, "step": 1610 }, { "epoch": 0.3, "learning_rate": 0.0009454834476557207, "loss": 2.0308, "step": 1620 }, { "epoch": 0.3, "learning_rate": 0.0009448210252940223, "loss": 2.0826, "step": 1630 }, { "epoch": 0.3, "learning_rate": 0.0009441548376451963, "loss": 2.0424, "step": 1640 }, { "epoch": 0.31, "learning_rate": 0.0009434848903483373, "loss": 2.0125, "step": 1650 }, { "epoch": 0.31, "learning_rate": 0.0009428111890743639, "loss": 2.0139, "step": 1660 }, { "epoch": 0.31, "learning_rate": 0.0009421337395259717, "loss": 2.0682, "step": 1670 }, { "epoch": 0.31, "learning_rate": 0.0009414525474375837, "loss": 2.0577, "step": 1680 }, { "epoch": 0.31, "learning_rate": 0.0009407676185753029, "loss": 2.0262, "step": 1690 }, { "epoch": 0.31, "learning_rate": 0.0009400789587368632, "loss": 2.0515, "step": 1700 }, { "epoch": 0.32, "learning_rate": 0.0009393865737515794, "loss": 2.0398, "step": 1710 }, { "epoch": 0.32, "learning_rate": 0.0009386904694802997, "loss": 2.0146, "step": 1720 }, { "epoch": 0.32, "learning_rate": 0.0009379906518153543, "loss": 2.0438, "step": 1730 }, { "epoch": 0.32, "learning_rate": 0.0009372871266805063, "loss": 2.0377, "step": 1740 }, { "epoch": 0.32, "learning_rate": 0.000936579900030902, "loss": 2.0789, "step": 1750 }, { "epoch": 0.33, "learning_rate": 0.0009358689778530193, "loss": 2.0201, "step": 1760 }, { "epoch": 0.33, "learning_rate": 0.0009351543661646185, "loss": 2.0114, "step": 1770 }, { "epoch": 0.33, "learning_rate": 0.0009344360710146898, "loss": 2.0242, "step": 1780 }, { "epoch": 0.33, "learning_rate": 0.0009337140984834034, "loss": 2.0436, "step": 1790 }, { "epoch": 0.33, "learning_rate": 0.0009329884546820572, "loss": 2.0452, "step": 1800 }, { "epoch": 0.34, "learning_rate": 0.000932259145753026, "loss": 2.0254, "step": 1810 }, { "epoch": 0.34, "learning_rate": 0.0009315261778697083, "loss": 2.0409, "step": 1820 }, { "epoch": 0.34, "learning_rate": 0.0009307895572364746, "loss": 2.0301, "step": 1830 }, { "epoch": 0.34, "learning_rate": 0.0009300492900886154, "loss": 2.0078, "step": 1840 }, { "epoch": 0.34, "learning_rate": 0.0009293053826922873, "loss": 1.9851, "step": 1850 }, { "epoch": 0.34, "learning_rate": 0.0009285578413444613, "loss": 1.9947, "step": 1860 }, { "epoch": 0.35, "learning_rate": 0.0009278066723728682, "loss": 2.0331, "step": 1870 }, { "epoch": 0.35, "learning_rate": 0.0009270518821359461, "loss": 2.0058, "step": 1880 }, { "epoch": 0.35, "learning_rate": 0.0009262934770227858, "loss": 2.05, "step": 1890 }, { "epoch": 0.35, "learning_rate": 0.0009255314634530771, "loss": 2.0444, "step": 1900 }, { "epoch": 0.35, "learning_rate": 0.0009247658478770543, "loss": 2.0045, "step": 1910 }, { "epoch": 0.36, "learning_rate": 0.000923996636775442, "loss": 2.0211, "step": 1920 }, { "epoch": 0.36, "learning_rate": 0.0009232238366593997, "loss": 2.0124, "step": 1930 }, { "epoch": 0.36, "learning_rate": 0.0009224474540704671, "loss": 2.0067, "step": 1940 }, { "epoch": 0.36, "learning_rate": 0.0009216674955805079, "loss": 2.0247, "step": 1950 }, { "epoch": 0.36, "learning_rate": 0.0009208839677916557, "loss": 2.0314, "step": 1960 }, { "epoch": 0.36, "learning_rate": 0.0009200968773362568, "loss": 2.067, "step": 1970 }, { "epoch": 0.37, "learning_rate": 0.0009193062308768145, "loss": 2.0168, "step": 1980 }, { "epoch": 0.37, "learning_rate": 0.0009185120351059326, "loss": 2.0649, "step": 1990 }, { "epoch": 0.37, "learning_rate": 0.0009177142967462591, "loss": 2.0208, "step": 2000 }, { "epoch": 0.37, "learning_rate": 0.0009169130225504289, "loss": 2.016, "step": 2010 }, { "epoch": 0.37, "learning_rate": 0.0009161082193010066, "loss": 2.0331, "step": 2020 }, { "epoch": 0.38, "learning_rate": 0.0009152998938104296, "loss": 2.0757, "step": 2030 }, { "epoch": 0.38, "learning_rate": 0.0009144880529209498, "loss": 2.0093, "step": 2040 }, { "epoch": 0.38, "learning_rate": 0.0009136727035045765, "loss": 1.954, "step": 2050 }, { "epoch": 0.38, "learning_rate": 0.0009128538524630167, "loss": 2.0062, "step": 2060 }, { "epoch": 0.38, "learning_rate": 0.0009120315067276187, "loss": 2.0179, "step": 2070 }, { "epoch": 0.39, "learning_rate": 0.0009112056732593119, "loss": 2.0301, "step": 2080 }, { "epoch": 0.39, "learning_rate": 0.0009103763590485488, "loss": 2.0223, "step": 2090 }, { "epoch": 0.39, "learning_rate": 0.0009095435711152449, "loss": 2.0208, "step": 2100 }, { "epoch": 0.39, "learning_rate": 0.00090870731650872, "loss": 2.0492, "step": 2110 }, { "epoch": 0.39, "learning_rate": 0.0009078676023076385, "loss": 1.9917, "step": 2120 }, { "epoch": 0.39, "learning_rate": 0.0009070244356199492, "loss": 2.0235, "step": 2130 }, { "epoch": 0.4, "learning_rate": 0.0009061778235828253, "loss": 2.01, "step": 2140 }, { "epoch": 0.4, "learning_rate": 0.0009054129328980229, "loss": 1.9999, "step": 2150 }, { "epoch": 0.4, "learning_rate": 0.000904559794464287, "loss": 2.0022, "step": 2160 }, { "epoch": 0.4, "learning_rate": 0.0009037032315436229, "loss": 2.0004, "step": 2170 }, { "epoch": 0.4, "learning_rate": 0.0009028432513865994, "loss": 1.9936, "step": 2180 }, { "epoch": 0.41, "learning_rate": 0.000901979861272711, "loss": 2.0098, "step": 2190 }, { "epoch": 0.41, "learning_rate": 0.0009011130685103166, "loss": 1.9829, "step": 2200 }, { "epoch": 0.41, "learning_rate": 0.0009002428804365775, "loss": 2.0135, "step": 2210 }, { "epoch": 0.41, "learning_rate": 0.0008993693044173954, "loss": 2.0526, "step": 2220 }, { "epoch": 0.41, "learning_rate": 0.0008984923478473499, "loss": 2.0145, "step": 2230 }, { "epoch": 0.41, "learning_rate": 0.0008976120181496365, "loss": 2.0364, "step": 2240 }, { "epoch": 0.42, "learning_rate": 0.0008967283227760028, "loss": 2.019, "step": 2250 }, { "epoch": 0.42, "learning_rate": 0.000895841269206686, "loss": 1.9874, "step": 2260 }, { "epoch": 0.42, "learning_rate": 0.0008949508649503495, "loss": 2.0699, "step": 2270 }, { "epoch": 0.42, "learning_rate": 0.0008940571175440196, "loss": 2.0407, "step": 2280 }, { "epoch": 0.42, "learning_rate": 0.0008931600345530213, "loss": 2.0176, "step": 2290 }, { "epoch": 0.43, "learning_rate": 0.0008922596235709142, "loss": 2.0235, "step": 2300 }, { "epoch": 0.43, "learning_rate": 0.0008913558922194287, "loss": 2.0268, "step": 2310 }, { "epoch": 0.43, "learning_rate": 0.000890448848148401, "loss": 2.0279, "step": 2320 }, { "epoch": 0.43, "learning_rate": 0.0008895384990357087, "loss": 2.0073, "step": 2330 }, { "epoch": 0.43, "learning_rate": 0.0008886248525872056, "loss": 2.0262, "step": 2340 }, { "epoch": 0.44, "learning_rate": 0.0008877079165366566, "loss": 2.0112, "step": 2350 }, { "epoch": 0.44, "learning_rate": 0.000886787698645672, "loss": 2.042, "step": 2360 }, { "epoch": 0.44, "learning_rate": 0.0008858642067036424, "loss": 2.0129, "step": 2370 }, { "epoch": 0.44, "learning_rate": 0.0008849374485276715, "loss": 2.0264, "step": 2380 }, { "epoch": 0.44, "learning_rate": 0.000884007431962512, "loss": 2.0211, "step": 2390 }, { "epoch": 0.44, "learning_rate": 0.0008830741648804967, "loss": 2.0175, "step": 2400 }, { "epoch": 0.45, "learning_rate": 0.0008821376551814739, "loss": 2.0423, "step": 2410 }, { "epoch": 0.45, "learning_rate": 0.0008811979107927396, "loss": 2.0112, "step": 2420 }, { "epoch": 0.45, "learning_rate": 0.0008802549396689705, "loss": 2.0422, "step": 2430 }, { "epoch": 0.45, "learning_rate": 0.0008793087497921566, "loss": 2.0075, "step": 2440 }, { "epoch": 0.45, "learning_rate": 0.0008784544334882426, "loss": 1.9958, "step": 2450 }, { "epoch": 0.46, "learning_rate": 0.000877502150068553, "loss": 2.0348, "step": 2460 }, { "epoch": 0.46, "learning_rate": 0.0008765466711974225, "loss": 2.0232, "step": 2470 }, { "epoch": 0.46, "learning_rate": 0.0008755880049627157, "loss": 1.9953, "step": 2480 }, { "epoch": 0.46, "learning_rate": 0.0008746261594792776, "loss": 2.0223, "step": 2490 }, { "epoch": 0.46, "learning_rate": 0.0008736611428888644, "loss": 2.0332, "step": 2500 }, { "epoch": 0.46, "learning_rate": 0.0008726929633600753, "loss": 2.0075, "step": 2510 }, { "epoch": 0.47, "learning_rate": 0.0008717216290882824, "loss": 2.002, "step": 2520 }, { "epoch": 0.47, "learning_rate": 0.0008707471482955622, "loss": 2.0594, "step": 2530 }, { "epoch": 0.47, "learning_rate": 0.0008697695292306253, "loss": 2.0214, "step": 2540 }, { "epoch": 0.47, "learning_rate": 0.0008687887801687473, "loss": 1.994, "step": 2550 }, { "epoch": 0.47, "learning_rate": 0.0008678049094116979, "loss": 2.0338, "step": 2560 }, { "epoch": 0.48, "learning_rate": 0.0008668179252876714, "loss": 2.0176, "step": 2570 }, { "epoch": 0.48, "learning_rate": 0.0008658278361512161, "loss": 2.0013, "step": 2580 }, { "epoch": 0.48, "learning_rate": 0.0008648346503831627, "loss": 2.0141, "step": 2590 }, { "epoch": 0.48, "learning_rate": 0.0008638383763905546, "loss": 1.9655, "step": 2600 }, { "epoch": 0.48, "learning_rate": 0.0008628390226065761, "loss": 2.039, "step": 2610 }, { "epoch": 0.49, "learning_rate": 0.0008618365974904808, "loss": 1.9418, "step": 2620 }, { "epoch": 0.49, "learning_rate": 0.0008608311095275205, "loss": 2.011, "step": 2630 }, { "epoch": 0.49, "learning_rate": 0.0008598225672288731, "loss": 2.0174, "step": 2640 }, { "epoch": 0.49, "learning_rate": 0.0008588109791315707, "loss": 1.984, "step": 2650 }, { "epoch": 0.49, "learning_rate": 0.000857796353798427, "loss": 2.0437, "step": 2660 }, { "epoch": 0.49, "learning_rate": 0.0008567786998179654, "loss": 2.0255, "step": 2670 }, { "epoch": 0.5, "learning_rate": 0.0008557580258043459, "loss": 2.0067, "step": 2680 }, { "epoch": 0.5, "learning_rate": 0.0008547343403972918, "loss": 2.0201, "step": 2690 }, { "epoch": 0.5, "learning_rate": 0.0008537076522620175, "loss": 2.0258, "step": 2700 }, { "epoch": 0.5, "learning_rate": 0.0008526779700891545, "loss": 1.9977, "step": 2710 }, { "epoch": 0.5, "learning_rate": 0.0008516453025946781, "loss": 1.9835, "step": 2720 }, { "epoch": 0.51, "learning_rate": 0.0008506096585198333, "loss": 2.0428, "step": 2730 }, { "epoch": 0.51, "learning_rate": 0.0008495710466310613, "loss": 1.9554, "step": 2740 }, { "epoch": 0.51, "learning_rate": 0.0008485294757199248, "loss": 2.0497, "step": 2750 }, { "epoch": 0.51, "learning_rate": 0.0008474849546030339, "loss": 2.001, "step": 2760 }, { "epoch": 0.51, "learning_rate": 0.0008464374921219714, "loss": 1.9824, "step": 2770 }, { "epoch": 0.51, "learning_rate": 0.0008453870971432179, "loss": 2.0378, "step": 2780 }, { "epoch": 0.52, "learning_rate": 0.0008443337785580769, "loss": 1.9756, "step": 2790 }, { "epoch": 0.52, "learning_rate": 0.0008432775452825992, "loss": 2.0563, "step": 2800 }, { "epoch": 0.52, "learning_rate": 0.0008422184062575077, "loss": 2.0115, "step": 2810 }, { "epoch": 0.52, "learning_rate": 0.0008411563704481217, "loss": 2.0301, "step": 2820 }, { "epoch": 0.52, "learning_rate": 0.0008400914468442814, "loss": 1.9912, "step": 2830 }, { "epoch": 0.53, "learning_rate": 0.0008390236444602706, "loss": 2.0669, "step": 2840 }, { "epoch": 0.53, "learning_rate": 0.0008379529723347417, "loss": 2.0138, "step": 2850 }, { "epoch": 0.53, "learning_rate": 0.0008368794395306386, "loss": 2.0486, "step": 2860 }, { "epoch": 0.53, "learning_rate": 0.0008358030551351199, "loss": 2.0342, "step": 2870 }, { "epoch": 0.53, "learning_rate": 0.0008347238282594823, "loss": 2.0474, "step": 2880 }, { "epoch": 0.54, "learning_rate": 0.0008336417680390829, "loss": 2.006, "step": 2890 }, { "epoch": 0.54, "learning_rate": 0.0008325568836332633, "loss": 1.99, "step": 2900 }, { "epoch": 0.54, "learning_rate": 0.00083146918422527, "loss": 2.0148, "step": 2910 }, { "epoch": 0.54, "learning_rate": 0.0008303786790221778, "loss": 2.0381, "step": 2920 }, { "epoch": 0.54, "learning_rate": 0.0008292853772548126, "loss": 2.0591, "step": 2930 }, { "epoch": 0.54, "learning_rate": 0.0008281892881776714, "loss": 2.0238, "step": 2940 }, { "epoch": 0.55, "learning_rate": 0.0008270904210688459, "loss": 1.9939, "step": 2950 }, { "epoch": 0.55, "learning_rate": 0.0008259887852299427, "loss": 1.9941, "step": 2960 }, { "epoch": 0.55, "learning_rate": 0.0008248843899860045, "loss": 2.0193, "step": 2970 }, { "epoch": 0.55, "learning_rate": 0.0008237772446854325, "loss": 2.0166, "step": 2980 }, { "epoch": 0.55, "learning_rate": 0.0008226673586999058, "loss": 2.0219, "step": 2990 }, { "epoch": 0.56, "learning_rate": 0.0008215547414243025, "loss": 2.0078, "step": 3000 }, { "epoch": 0.56, "learning_rate": 0.0008204394022766208, "loss": 2.0022, "step": 3010 }, { "epoch": 0.56, "learning_rate": 0.0008193213506978983, "loss": 2.0009, "step": 3020 }, { "epoch": 0.56, "learning_rate": 0.0008182005961521331, "loss": 2.0197, "step": 3030 }, { "epoch": 0.56, "learning_rate": 0.0008170771481262027, "loss": 2.0101, "step": 3040 }, { "epoch": 0.56, "learning_rate": 0.0008159510161297846, "loss": 2.0307, "step": 3050 }, { "epoch": 0.57, "learning_rate": 0.0008148222096952748, "loss": 2.0075, "step": 3060 }, { "epoch": 0.57, "learning_rate": 0.0008136907383777081, "loss": 2.0415, "step": 3070 }, { "epoch": 0.57, "learning_rate": 0.000812556611754677, "loss": 2.0031, "step": 3080 }, { "epoch": 0.57, "learning_rate": 0.0008114198394262497, "loss": 2.0025, "step": 3090 }, { "epoch": 0.57, "learning_rate": 0.0008102804310148905, "loss": 1.983, "step": 3100 }, { "epoch": 0.58, "learning_rate": 0.0008091383961653765, "loss": 2.0056, "step": 3110 }, { "epoch": 0.58, "learning_rate": 0.0008079937445447175, "loss": 1.9679, "step": 3120 }, { "epoch": 0.58, "learning_rate": 0.0008068464858420732, "loss": 1.9897, "step": 3130 }, { "epoch": 0.58, "learning_rate": 0.0008056966297686717, "loss": 1.9833, "step": 3140 }, { "epoch": 0.58, "learning_rate": 0.0008045441860577273, "loss": 2.0088, "step": 3150 }, { "epoch": 0.59, "learning_rate": 0.0008033891644643576, "loss": 1.9985, "step": 3160 }, { "epoch": 0.59, "learning_rate": 0.0008022315747655011, "loss": 2.013, "step": 3170 }, { "epoch": 0.59, "learning_rate": 0.0008010714267598354, "loss": 1.9902, "step": 3180 }, { "epoch": 0.59, "learning_rate": 0.0007999087302676926, "loss": 2.0133, "step": 3190 }, { "epoch": 0.59, "learning_rate": 0.0007987434951309774, "loss": 2.0092, "step": 3200 }, { "epoch": 0.59, "learning_rate": 0.0007975757312130836, "loss": 1.9894, "step": 3210 }, { "epoch": 0.6, "learning_rate": 0.0007964054483988103, "loss": 1.9742, "step": 3220 }, { "epoch": 0.6, "learning_rate": 0.000795232656594278, "loss": 2.016, "step": 3230 }, { "epoch": 0.6, "learning_rate": 0.0007940573657268458, "loss": 2.0386, "step": 3240 }, { "epoch": 0.6, "learning_rate": 0.0007928795857450259, "loss": 1.9602, "step": 3250 }, { "epoch": 0.6, "learning_rate": 0.0007916993266184007, "loss": 1.9681, "step": 3260 }, { "epoch": 0.61, "learning_rate": 0.0007905165983375379, "loss": 2.0326, "step": 3270 }, { "epoch": 0.61, "learning_rate": 0.0007893314109139058, "loss": 1.9958, "step": 3280 }, { "epoch": 0.61, "learning_rate": 0.0007881437743797882, "loss": 2.0077, "step": 3290 }, { "epoch": 0.61, "learning_rate": 0.0007869536987882008, "loss": 1.9813, "step": 3300 }, { "epoch": 0.61, "learning_rate": 0.0007857611942128044, "loss": 1.9538, "step": 3310 }, { "epoch": 0.61, "learning_rate": 0.0007845662707478214, "loss": 1.9919, "step": 3320 }, { "epoch": 0.62, "learning_rate": 0.0007833689385079485, "loss": 1.9922, "step": 3330 }, { "epoch": 0.62, "learning_rate": 0.0007821692076282725, "loss": 1.9973, "step": 3340 }, { "epoch": 0.62, "learning_rate": 0.0007809670882641842, "loss": 2.0231, "step": 3350 }, { "epoch": 0.62, "learning_rate": 0.0007797625905912915, "loss": 1.9971, "step": 3360 }, { "epoch": 0.62, "learning_rate": 0.0007785557248053351, "loss": 1.9934, "step": 3370 }, { "epoch": 0.63, "learning_rate": 0.0007773465011221002, "loss": 1.9855, "step": 3380 }, { "epoch": 0.63, "learning_rate": 0.0007761349297773315, "loss": 2.0421, "step": 3390 }, { "epoch": 0.63, "learning_rate": 0.0007749210210266457, "loss": 1.9795, "step": 3400 }, { "epoch": 0.63, "learning_rate": 0.0007737047851454451, "loss": 2.0006, "step": 3410 }, { "epoch": 0.63, "learning_rate": 0.0007724862324288309, "loss": 2.011, "step": 3420 }, { "epoch": 0.64, "learning_rate": 0.0007712653731915147, "loss": 1.9969, "step": 3430 }, { "epoch": 0.64, "learning_rate": 0.0007700422177677331, "loss": 1.9581, "step": 3440 }, { "epoch": 0.64, "learning_rate": 0.0007688167765111592, "loss": 1.9858, "step": 3450 }, { "epoch": 0.64, "learning_rate": 0.0007675890597948141, "loss": 1.9945, "step": 3460 }, { "epoch": 0.64, "learning_rate": 0.0007663590780109814, "loss": 2.0116, "step": 3470 }, { "epoch": 0.64, "learning_rate": 0.0007651268415711168, "loss": 1.9831, "step": 3480 }, { "epoch": 0.65, "learning_rate": 0.0007638923609057616, "loss": 2.0113, "step": 3490 }, { "epoch": 0.65, "learning_rate": 0.0007626556464644538, "loss": 1.9927, "step": 3500 }, { "epoch": 0.65, "learning_rate": 0.0007614167087156393, "loss": 2.0209, "step": 3510 }, { "epoch": 0.65, "learning_rate": 0.0007601755581465844, "loss": 1.9941, "step": 3520 }, { "epoch": 0.65, "learning_rate": 0.0007589322052632855, "loss": 2.0443, "step": 3530 }, { "epoch": 0.66, "learning_rate": 0.0007576866605903812, "loss": 1.975, "step": 3540 }, { "epoch": 0.66, "learning_rate": 0.0007564389346710638, "loss": 1.9655, "step": 3550 }, { "epoch": 0.66, "learning_rate": 0.0007551890380669879, "loss": 2.0034, "step": 3560 }, { "epoch": 0.66, "learning_rate": 0.0007539369813581836, "loss": 1.953, "step": 3570 }, { "epoch": 0.66, "learning_rate": 0.000752682775142965, "loss": 2.0162, "step": 3580 }, { "epoch": 0.66, "learning_rate": 0.0007514264300378411, "loss": 2.0096, "step": 3590 }, { "epoch": 0.67, "learning_rate": 0.0007501679566774267, "loss": 2.0431, "step": 3600 }, { "epoch": 0.67, "learning_rate": 0.0007489073657143506, "loss": 1.961, "step": 3610 }, { "epoch": 0.67, "learning_rate": 0.0007476446678191676, "loss": 2.0179, "step": 3620 }, { "epoch": 0.67, "learning_rate": 0.0007463798736802665, "loss": 1.9885, "step": 3630 }, { "epoch": 0.67, "learning_rate": 0.0007451129940037804, "loss": 1.9955, "step": 3640 }, { "epoch": 0.68, "learning_rate": 0.000743844039513496, "loss": 1.9671, "step": 3650 }, { "epoch": 0.68, "learning_rate": 0.0007425730209507622, "loss": 1.9866, "step": 3660 }, { "epoch": 0.68, "learning_rate": 0.0007412999490744008, "loss": 1.9508, "step": 3670 }, { "epoch": 0.68, "learning_rate": 0.0007400248346606129, "loss": 2.0261, "step": 3680 }, { "epoch": 0.68, "learning_rate": 0.0007387476885028902, "loss": 1.9946, "step": 3690 }, { "epoch": 0.69, "learning_rate": 0.0007374685214119221, "loss": 1.969, "step": 3700 }, { "epoch": 0.69, "learning_rate": 0.0007361873442155049, "loss": 1.9802, "step": 3710 }, { "epoch": 0.69, "learning_rate": 0.0007349041677584496, "loss": 1.9739, "step": 3720 }, { "epoch": 0.69, "learning_rate": 0.0007336190029024907, "loss": 1.964, "step": 3730 }, { "epoch": 0.69, "learning_rate": 0.0007323318605261933, "loss": 2.0009, "step": 3740 }, { "epoch": 0.69, "learning_rate": 0.0007310427515248626, "loss": 1.9825, "step": 3750 }, { "epoch": 0.7, "learning_rate": 0.0007297516868104501, "loss": 1.9744, "step": 3760 }, { "epoch": 0.7, "learning_rate": 0.000728458677311462, "loss": 2.0197, "step": 3770 }, { "epoch": 0.7, "learning_rate": 0.0007271637339728667, "loss": 2.0016, "step": 3780 }, { "epoch": 0.7, "learning_rate": 0.0007258668677560019, "loss": 1.9721, "step": 3790 }, { "epoch": 0.7, "learning_rate": 0.0007245680896384818, "loss": 1.9591, "step": 3800 }, { "epoch": 0.71, "learning_rate": 0.0007232674106141048, "loss": 1.9762, "step": 3810 }, { "epoch": 0.71, "learning_rate": 0.0007219648416927592, "loss": 2.0026, "step": 3820 }, { "epoch": 0.71, "learning_rate": 0.0007206603939003314, "loss": 1.9876, "step": 3830 }, { "epoch": 0.71, "learning_rate": 0.0007193540782786117, "loss": 1.9991, "step": 3840 }, { "epoch": 0.71, "learning_rate": 0.0007180459058852011, "loss": 1.9897, "step": 3850 }, { "epoch": 0.71, "learning_rate": 0.0007167358877934176, "loss": 2.0581, "step": 3860 }, { "epoch": 0.72, "learning_rate": 0.0007154240350922025, "loss": 1.9588, "step": 3870 }, { "epoch": 0.72, "learning_rate": 0.0007141103588860265, "loss": 2.0151, "step": 3880 }, { "epoch": 0.72, "learning_rate": 0.000712794870294796, "loss": 1.9494, "step": 3890 }, { "epoch": 0.72, "learning_rate": 0.0007114775804537586, "loss": 1.9936, "step": 3900 }, { "epoch": 0.72, "learning_rate": 0.0007101585005134088, "loss": 2.0016, "step": 3910 }, { "epoch": 0.73, "learning_rate": 0.0007088376416393944, "loss": 1.9628, "step": 3920 }, { "epoch": 0.73, "learning_rate": 0.0007075150150124208, "loss": 1.9955, "step": 3930 }, { "epoch": 0.73, "learning_rate": 0.0007061906318281568, "loss": 1.9731, "step": 3940 }, { "epoch": 0.73, "learning_rate": 0.0007048645032971407, "loss": 1.9554, "step": 3950 }, { "epoch": 0.73, "learning_rate": 0.0007035366406446839, "loss": 1.985, "step": 3960 }, { "epoch": 0.74, "learning_rate": 0.0007022070551107774, "loss": 1.9961, "step": 3970 }, { "epoch": 0.74, "learning_rate": 0.0007008757579499954, "loss": 2.0112, "step": 3980 }, { "epoch": 0.74, "learning_rate": 0.0006995427604314004, "loss": 2.0086, "step": 3990 }, { "epoch": 0.74, "learning_rate": 0.0006982080738384487, "loss": 2.019, "step": 4000 }, { "epoch": 0.74, "learning_rate": 0.0006968717094688933, "loss": 1.9983, "step": 4010 }, { "epoch": 0.74, "learning_rate": 0.0006955336786346898, "loss": 1.981, "step": 4020 }, { "epoch": 0.75, "learning_rate": 0.0006941939926618997, "loss": 1.9878, "step": 4030 }, { "epoch": 0.75, "learning_rate": 0.0006928526628905942, "loss": 2.0067, "step": 4040 }, { "epoch": 0.75, "learning_rate": 0.0006915097006747601, "loss": 2.035, "step": 4050 }, { "epoch": 0.75, "learning_rate": 0.0006901651173822013, "loss": 1.9614, "step": 4060 }, { "epoch": 0.75, "learning_rate": 0.0006888189243944439, "loss": 1.9655, "step": 4070 }, { "epoch": 0.76, "learning_rate": 0.0006874711331066401, "loss": 1.9692, "step": 4080 }, { "epoch": 0.76, "learning_rate": 0.0006861217549274705, "loss": 1.9791, "step": 4090 }, { "epoch": 0.76, "learning_rate": 0.0006847708012790492, "loss": 1.9804, "step": 4100 }, { "epoch": 0.76, "learning_rate": 0.0006834182835968253, "loss": 1.9973, "step": 4110 }, { "epoch": 0.76, "learning_rate": 0.0006820642133294876, "loss": 2.0153, "step": 4120 }, { "epoch": 0.76, "learning_rate": 0.0006807086019388669, "loss": 1.9639, "step": 4130 }, { "epoch": 0.77, "learning_rate": 0.0006793514608998393, "loss": 2.0162, "step": 4140 }, { "epoch": 0.77, "learning_rate": 0.000677992801700229, "loss": 1.994, "step": 4150 }, { "epoch": 0.77, "learning_rate": 0.0006766326358407104, "loss": 1.9992, "step": 4160 }, { "epoch": 0.77, "learning_rate": 0.0006752709748347125, "loss": 1.976, "step": 4170 }, { "epoch": 0.77, "learning_rate": 0.000673907830208319, "loss": 1.9452, "step": 4180 }, { "epoch": 0.78, "learning_rate": 0.0006725432135001732, "loss": 1.9982, "step": 4190 }, { "epoch": 0.78, "learning_rate": 0.0006711771362613785, "loss": 1.9766, "step": 4200 }, { "epoch": 0.78, "learning_rate": 0.0006698096100554014, "loss": 1.9838, "step": 4210 }, { "epoch": 0.78, "learning_rate": 0.0006684406464579735, "loss": 1.9746, "step": 4220 }, { "epoch": 0.78, "learning_rate": 0.0006670702570569937, "loss": 1.9838, "step": 4230 }, { "epoch": 0.79, "learning_rate": 0.0006656984534524296, "loss": 1.9747, "step": 4240 }, { "epoch": 0.79, "learning_rate": 0.0006643252472562201, "loss": 1.9896, "step": 4250 }, { "epoch": 0.79, "learning_rate": 0.000662950650092176, "loss": 1.9607, "step": 4260 }, { "epoch": 0.79, "learning_rate": 0.0006615746735958832, "loss": 2.0075, "step": 4270 }, { "epoch": 0.79, "learning_rate": 0.0006603351250463683, "loss": 1.9918, "step": 4280 }, { "epoch": 0.79, "learning_rate": 0.000658956559916577, "loss": 1.9669, "step": 4290 }, { "epoch": 0.8, "learning_rate": 0.0006575766492634068, "loss": 1.9645, "step": 4300 }, { "epoch": 0.8, "learning_rate": 0.0006561954047674206, "loss": 1.9511, "step": 4310 }, { "epoch": 0.8, "learning_rate": 0.0006548128381204714, "loss": 1.9934, "step": 4320 }, { "epoch": 0.8, "learning_rate": 0.0006534289610256039, "loss": 1.9973, "step": 4330 }, { "epoch": 0.8, "learning_rate": 0.0006520437851969557, "loss": 1.9983, "step": 4340 }, { "epoch": 0.81, "learning_rate": 0.0006506573223596575, "loss": 1.9906, "step": 4350 }, { "epoch": 0.81, "learning_rate": 0.0006492695842497347, "loss": 1.9392, "step": 4360 }, { "epoch": 0.81, "learning_rate": 0.0006478805826140066, "loss": 2.0015, "step": 4370 }, { "epoch": 0.81, "learning_rate": 0.0006464903292099886, "loss": 2.0168, "step": 4380 }, { "epoch": 0.81, "learning_rate": 0.0006450988358057919, "loss": 1.9676, "step": 4390 }, { "epoch": 0.81, "learning_rate": 0.0006437061141800238, "loss": 1.9798, "step": 4400 }, { "epoch": 0.82, "learning_rate": 0.0006423121761216881, "loss": 1.9753, "step": 4410 }, { "epoch": 0.82, "learning_rate": 0.0006409170334300858, "loss": 1.9965, "step": 4420 }, { "epoch": 0.82, "learning_rate": 0.0006395206979147141, "loss": 1.9532, "step": 4430 }, { "epoch": 0.82, "learning_rate": 0.0006381231813951678, "loss": 1.9962, "step": 4440 }, { "epoch": 0.82, "learning_rate": 0.0006367244957010379, "loss": 1.9758, "step": 4450 }, { "epoch": 0.83, "learning_rate": 0.000635324652671813, "loss": 1.9606, "step": 4460 }, { "epoch": 0.83, "learning_rate": 0.0006339236641567774, "loss": 1.9785, "step": 4470 }, { "epoch": 0.83, "learning_rate": 0.0006325215420149119, "loss": 1.9334, "step": 4480 }, { "epoch": 0.83, "learning_rate": 0.0006311182981147935, "loss": 1.9829, "step": 4490 }, { "epoch": 0.83, "learning_rate": 0.000629713944334494, "loss": 1.9671, "step": 4500 }, { "epoch": 0.84, "learning_rate": 0.0006283084925614805, "loss": 2.0108, "step": 4510 }, { "epoch": 0.84, "learning_rate": 0.0006269019546925139, "loss": 1.9341, "step": 4520 }, { "epoch": 0.84, "learning_rate": 0.0006254943426335488, "loss": 1.9378, "step": 4530 }, { "epoch": 0.84, "learning_rate": 0.0006240856682996326, "loss": 1.9619, "step": 4540 }, { "epoch": 0.84, "learning_rate": 0.0006226759436148044, "loss": 1.9516, "step": 4550 }, { "epoch": 0.84, "learning_rate": 0.0006212651805119946, "loss": 1.9882, "step": 4560 }, { "epoch": 0.85, "learning_rate": 0.000619853390932923, "loss": 1.9636, "step": 4570 }, { "epoch": 0.85, "learning_rate": 0.0006184405868279985, "loss": 1.9894, "step": 4580 }, { "epoch": 0.85, "learning_rate": 0.0006170267801562176, "loss": 1.9365, "step": 4590 }, { "epoch": 0.85, "learning_rate": 0.0006156119828850633, "loss": 2.0264, "step": 4600 }, { "epoch": 0.85, "learning_rate": 0.0006141962069904041, "loss": 1.9659, "step": 4610 }, { "epoch": 0.86, "learning_rate": 0.0006127794644563915, "loss": 1.9368, "step": 4620 }, { "epoch": 0.86, "learning_rate": 0.0006113617672753598, "loss": 1.959, "step": 4630 }, { "epoch": 0.86, "learning_rate": 0.0006099431274477245, "loss": 1.9832, "step": 4640 }, { "epoch": 0.86, "learning_rate": 0.0006085235569818795, "loss": 1.9572, "step": 4650 }, { "epoch": 0.86, "learning_rate": 0.0006071030678940969, "loss": 1.99, "step": 4660 }, { "epoch": 0.86, "learning_rate": 0.0006056816722084248, "loss": 1.9963, "step": 4670 }, { "epoch": 0.87, "learning_rate": 0.0006042593819565846, "loss": 1.9997, "step": 4680 }, { "epoch": 0.87, "learning_rate": 0.0006028362091778706, "loss": 1.9221, "step": 4690 }, { "epoch": 0.87, "learning_rate": 0.0006014121659190476, "loss": 1.9623, "step": 4700 }, { "epoch": 0.87, "learning_rate": 0.0005999872642342484, "loss": 1.9949, "step": 4710 }, { "epoch": 0.87, "learning_rate": 0.0005985615161848721, "loss": 1.9688, "step": 4720 }, { "epoch": 0.88, "learning_rate": 0.000597134933839482, "loss": 1.975, "step": 4730 }, { "epoch": 0.88, "learning_rate": 0.0005957075292737042, "loss": 2.0048, "step": 4740 }, { "epoch": 0.88, "learning_rate": 0.0005942793145701235, "loss": 1.9591, "step": 4750 }, { "epoch": 0.88, "learning_rate": 0.0005928503018181833, "loss": 1.9957, "step": 4760 }, { "epoch": 0.88, "learning_rate": 0.0005914205031140819, "loss": 1.9823, "step": 4770 }, { "epoch": 0.89, "learning_rate": 0.0005899899305606703, "loss": 2.0032, "step": 4780 }, { "epoch": 0.89, "learning_rate": 0.0005885585962673501, "loss": 1.9722, "step": 4790 }, { "epoch": 0.89, "learning_rate": 0.0005871265123499708, "loss": 1.9903, "step": 4800 }, { "epoch": 0.89, "learning_rate": 0.0005856936909307273, "loss": 2.0258, "step": 4810 }, { "epoch": 0.89, "learning_rate": 0.000584260144138057, "loss": 2.0444, "step": 4820 }, { "epoch": 0.89, "learning_rate": 0.0005828258841065377, "loss": 1.968, "step": 4830 }, { "epoch": 0.9, "learning_rate": 0.0005813909229767846, "loss": 1.9598, "step": 4840 }, { "epoch": 0.9, "learning_rate": 0.0005799552728953472, "loss": 2.0047, "step": 4850 }, { "epoch": 0.9, "learning_rate": 0.000578518946014607, "loss": 1.939, "step": 4860 }, { "epoch": 0.9, "learning_rate": 0.0005770819544926741, "loss": 1.9835, "step": 4870 }, { "epoch": 0.9, "learning_rate": 0.0005756443104932852, "loss": 2.028, "step": 4880 }, { "epoch": 0.91, "learning_rate": 0.0005742060261856996, "loss": 1.973, "step": 4890 }, { "epoch": 0.91, "learning_rate": 0.0005727671137445967, "loss": 1.9734, "step": 4900 }, { "epoch": 0.91, "learning_rate": 0.0005713275853499729, "loss": 1.9921, "step": 4910 }, { "epoch": 0.91, "learning_rate": 0.0005698874531870386, "loss": 1.9607, "step": 4920 }, { "epoch": 0.91, "learning_rate": 0.0005684467294461146, "loss": 1.9468, "step": 4930 }, { "epoch": 0.91, "learning_rate": 0.0005670054263225297, "loss": 1.9524, "step": 4940 }, { "epoch": 0.92, "learning_rate": 0.0005655635560165166, "loss": 1.9737, "step": 4950 }, { "epoch": 0.92, "learning_rate": 0.0005641211307331093, "loss": 1.9532, "step": 4960 }, { "epoch": 0.92, "learning_rate": 0.0005626781626820395, "loss": 2.0048, "step": 4970 }, { "epoch": 0.92, "learning_rate": 0.0005612346640776331, "loss": 2.0276, "step": 4980 }, { "epoch": 0.92, "learning_rate": 0.0005597906471387074, "loss": 1.9476, "step": 4990 }, { "epoch": 0.93, "learning_rate": 0.0005583461240884666, "loss": 1.943, "step": 5000 }, { "epoch": 0.93, "learning_rate": 0.0005569011071543997, "loss": 1.9859, "step": 5010 }, { "epoch": 0.93, "learning_rate": 0.0005554556085681757, "loss": 1.9607, "step": 5020 }, { "epoch": 0.93, "learning_rate": 0.0005540096405655409, "loss": 1.9825, "step": 5030 }, { "epoch": 0.93, "learning_rate": 0.0005525632153862152, "loss": 1.9875, "step": 5040 }, { "epoch": 0.94, "learning_rate": 0.0005511163452737884, "loss": 2.0051, "step": 5050 }, { "epoch": 0.94, "learning_rate": 0.0005496690424756164, "loss": 2.0089, "step": 5060 }, { "epoch": 0.94, "learning_rate": 0.0005482213192427174, "loss": 1.9495, "step": 5070 }, { "epoch": 0.94, "learning_rate": 0.0005467731878296688, "loss": 1.9701, "step": 5080 }, { "epoch": 0.94, "learning_rate": 0.0005453246604945034, "loss": 1.9588, "step": 5090 }, { "epoch": 0.94, "learning_rate": 0.0005438757494986044, "loss": 1.9833, "step": 5100 }, { "epoch": 0.95, "learning_rate": 0.0005424264671066038, "loss": 1.9974, "step": 5110 }, { "epoch": 0.95, "learning_rate": 0.0005409768255862766, "loss": 1.9746, "step": 5120 }, { "epoch": 0.95, "learning_rate": 0.0005395268372084378, "loss": 1.9757, "step": 5130 }, { "epoch": 0.95, "learning_rate": 0.0005380765142468387, "loss": 1.9908, "step": 5140 }, { "epoch": 0.95, "learning_rate": 0.0005366258689780625, "loss": 2.006, "step": 5150 }, { "epoch": 0.96, "learning_rate": 0.0005351749136814208, "loss": 1.9957, "step": 5160 }, { "epoch": 0.96, "learning_rate": 0.0005337236606388495, "loss": 1.959, "step": 5170 }, { "epoch": 0.96, "learning_rate": 0.0005322721221348047, "loss": 2.0027, "step": 5180 }, { "epoch": 0.96, "learning_rate": 0.000530820310456159, "loss": 1.9248, "step": 5190 }, { "epoch": 0.96, "learning_rate": 0.0005293682378920974, "loss": 1.9742, "step": 5200 }, { "epoch": 0.96, "learning_rate": 0.000527915916734013, "loss": 1.9844, "step": 5210 }, { "epoch": 0.97, "learning_rate": 0.0005264633592754035, "loss": 1.9985, "step": 5220 }, { "epoch": 0.97, "learning_rate": 0.0005250105778117661, "loss": 2.0074, "step": 5230 }, { "epoch": 0.97, "learning_rate": 0.0005235575846404952, "loss": 1.9901, "step": 5240 }, { "epoch": 0.97, "learning_rate": 0.0005221043920607763, "loss": 1.9861, "step": 5250 }, { "epoch": 0.97, "learning_rate": 0.0005206510123734837, "loss": 1.9576, "step": 5260 }, { "epoch": 0.98, "learning_rate": 0.0005191974578810746, "loss": 1.9741, "step": 5270 }, { "epoch": 0.98, "learning_rate": 0.0005177437408874868, "loss": 1.9794, "step": 5280 }, { "epoch": 0.98, "learning_rate": 0.0005162898736980327, "loss": 1.9883, "step": 5290 }, { "epoch": 0.98, "learning_rate": 0.0005148358686192967, "loss": 1.969, "step": 5300 }, { "epoch": 0.98, "learning_rate": 0.0005133817379590302, "loss": 2.0018, "step": 5310 }, { "epoch": 0.99, "learning_rate": 0.0005119274940260476, "loss": 1.971, "step": 5320 }, { "epoch": 0.99, "learning_rate": 0.0005104731491301225, "loss": 1.9564, "step": 5330 }, { "epoch": 0.99, "learning_rate": 0.0005090187155818823, "loss": 1.9798, "step": 5340 }, { "epoch": 0.99, "learning_rate": 0.0005075642056927057, "loss": 2.0331, "step": 5350 }, { "epoch": 0.99, "learning_rate": 0.000506109631774617, "loss": 1.9505, "step": 5360 }, { "epoch": 0.99, "learning_rate": 0.0005046550061401825, "loss": 1.9348, "step": 5370 }, { "epoch": 1.0, "learning_rate": 0.0005032003411024067, "loss": 2.0099, "step": 5380 }, { "epoch": 1.0, "learning_rate": 0.0005017456489746275, "loss": 1.9756, "step": 5390 }, { "epoch": 1.0, "learning_rate": 0.0005002909420704116, "loss": 1.9444, "step": 5400 }, { "epoch": 1.0, "learning_rate": 0.0004988362327034516, "loss": 1.976, "step": 5410 }, { "epoch": 1.0, "learning_rate": 0.0004973815331874603, "loss": 1.9593, "step": 5420 }, { "epoch": 1.01, "learning_rate": 0.0004959268558360675, "loss": 1.9251, "step": 5430 }, { "epoch": 1.01, "learning_rate": 0.0004944722129627151, "loss": 1.972, "step": 5440 }, { "epoch": 1.01, "learning_rate": 0.0004930176168805531, "loss": 1.9307, "step": 5450 }, { "epoch": 1.01, "learning_rate": 0.0004915630799023359, "loss": 1.9541, "step": 5460 }, { "epoch": 1.01, "learning_rate": 0.0004901086143403172, "loss": 1.9212, "step": 5470 }, { "epoch": 1.02, "learning_rate": 0.0004886542325061464, "loss": 1.965, "step": 5480 }, { "epoch": 1.02, "learning_rate": 0.0004871999467107639, "loss": 2.0083, "step": 5490 }, { "epoch": 1.02, "learning_rate": 0.0004857457692642974, "loss": 1.9676, "step": 5500 }, { "epoch": 1.02, "learning_rate": 0.000484291712475957, "loss": 1.9384, "step": 5510 }, { "epoch": 1.02, "learning_rate": 0.00048283778865393243, "loss": 1.9339, "step": 5520 }, { "epoch": 1.02, "learning_rate": 0.0004813840101052869, "loss": 2.0182, "step": 5530 }, { "epoch": 1.03, "learning_rate": 0.0004799303891358545, "loss": 1.9262, "step": 5540 }, { "epoch": 1.03, "learning_rate": 0.0004784769380501352, "loss": 1.9397, "step": 5550 }, { "epoch": 1.03, "learning_rate": 0.0004770236691511908, "loss": 1.9404, "step": 5560 }, { "epoch": 1.03, "learning_rate": 0.0004755705947405417, "loss": 1.9415, "step": 5570 }, { "epoch": 1.03, "learning_rate": 0.00047411772711806105, "loss": 1.9292, "step": 5580 }, { "epoch": 1.04, "learning_rate": 0.000472665078581872, "loss": 1.9487, "step": 5590 }, { "epoch": 1.04, "learning_rate": 0.00047121266142824354, "loss": 1.9435, "step": 5600 }, { "epoch": 1.04, "learning_rate": 0.0004697604879514853, "loss": 1.9145, "step": 5610 }, { "epoch": 1.04, "learning_rate": 0.0004683085704438451, "loss": 1.9975, "step": 5620 }, { "epoch": 1.04, "learning_rate": 0.00046685692119540357, "loss": 1.9696, "step": 5630 }, { "epoch": 1.04, "learning_rate": 0.00046540555249397056, "loss": 1.9598, "step": 5640 }, { "epoch": 1.05, "learning_rate": 0.00046395447662498117, "loss": 1.9366, "step": 5650 }, { "epoch": 1.05, "learning_rate": 0.00046250370587139206, "loss": 1.9748, "step": 5660 }, { "epoch": 1.05, "learning_rate": 0.00046105325251357695, "loss": 1.9125, "step": 5670 }, { "epoch": 1.05, "learning_rate": 0.00045960312882922306, "loss": 1.9595, "step": 5680 }, { "epoch": 1.05, "learning_rate": 0.00045815334709322665, "loss": 1.9369, "step": 5690 }, { "epoch": 1.06, "learning_rate": 0.0004567039195775899, "loss": 1.9564, "step": 5700 }, { "epoch": 1.06, "learning_rate": 0.000455254858551316, "loss": 1.9274, "step": 5710 }, { "epoch": 1.06, "learning_rate": 0.00045380617628030693, "loss": 1.9366, "step": 5720 }, { "epoch": 1.06, "learning_rate": 0.00045235788502725777, "loss": 1.9446, "step": 5730 }, { "epoch": 1.06, "learning_rate": 0.00045090999705155383, "loss": 1.9236, "step": 5740 }, { "epoch": 1.07, "learning_rate": 0.0004494625246091672, "loss": 1.9397, "step": 5750 }, { "epoch": 1.07, "learning_rate": 0.00044801547995255194, "loss": 1.9849, "step": 5760 }, { "epoch": 1.07, "learning_rate": 0.00044656887533054193, "loss": 1.9367, "step": 5770 }, { "epoch": 1.07, "learning_rate": 0.0004451227229882455, "loss": 1.9451, "step": 5780 }, { "epoch": 1.07, "learning_rate": 0.0004436770351669429, "loss": 1.9578, "step": 5790 }, { "epoch": 1.07, "learning_rate": 0.0004422318241039821, "loss": 1.9095, "step": 5800 }, { "epoch": 1.08, "learning_rate": 0.00044078710203267576, "loss": 1.9337, "step": 5810 }, { "epoch": 1.08, "learning_rate": 0.0004393428811821971, "loss": 1.9904, "step": 5820 }, { "epoch": 1.08, "learning_rate": 0.00043789917377747695, "loss": 1.9569, "step": 5830 }, { "epoch": 1.08, "learning_rate": 0.0004364559920390996, "loss": 1.9423, "step": 5840 }, { "epoch": 1.08, "learning_rate": 0.00043501334818320004, "loss": 2.0013, "step": 5850 }, { "epoch": 1.09, "learning_rate": 0.0004335712544213597, "loss": 1.9585, "step": 5860 }, { "epoch": 1.09, "learning_rate": 0.0004321297229605044, "loss": 1.9148, "step": 5870 }, { "epoch": 1.09, "learning_rate": 0.00043068876600279964, "loss": 1.9558, "step": 5880 }, { "epoch": 1.09, "learning_rate": 0.0004292483957455482, "loss": 1.9899, "step": 5890 }, { "epoch": 1.09, "learning_rate": 0.00042780862438108655, "loss": 1.9597, "step": 5900 }, { "epoch": 1.09, "learning_rate": 0.00042636946409668123, "loss": 1.9603, "step": 5910 }, { "epoch": 1.1, "learning_rate": 0.00042493092707442717, "loss": 1.9771, "step": 5920 }, { "epoch": 1.1, "learning_rate": 0.0004234930254911427, "loss": 1.9416, "step": 5930 }, { "epoch": 1.1, "learning_rate": 0.00042205577151826747, "loss": 1.9417, "step": 5940 }, { "epoch": 1.1, "learning_rate": 0.00042061917732175966, "loss": 1.9636, "step": 5950 }, { "epoch": 1.1, "learning_rate": 0.000419183255061992, "loss": 1.966, "step": 5960 }, { "epoch": 1.11, "learning_rate": 0.0004177480168936504, "loss": 1.9885, "step": 5970 }, { "epoch": 1.11, "learning_rate": 0.00041631347496562913, "loss": 1.9525, "step": 5980 }, { "epoch": 1.11, "learning_rate": 0.0004148796414209297, "loss": 1.9809, "step": 5990 }, { "epoch": 1.11, "learning_rate": 0.0004134465283965568, "loss": 1.8982, "step": 6000 }, { "epoch": 1.11, "learning_rate": 0.00041201414802341643, "loss": 1.8978, "step": 6010 }, { "epoch": 1.12, "learning_rate": 0.00041058251242621295, "loss": 1.9406, "step": 6020 }, { "epoch": 1.12, "learning_rate": 0.0004091516337233464, "loss": 1.919, "step": 6030 }, { "epoch": 1.12, "learning_rate": 0.00040772152402680973, "loss": 1.9897, "step": 6040 }, { "epoch": 1.12, "learning_rate": 0.00040629219544208644, "loss": 1.9473, "step": 6050 }, { "epoch": 1.12, "learning_rate": 0.00040486366006804817, "loss": 1.9589, "step": 6060 }, { "epoch": 1.12, "learning_rate": 0.00040343592999685264, "loss": 1.8972, "step": 6070 }, { "epoch": 1.13, "learning_rate": 0.0004020090173138404, "loss": 1.9611, "step": 6080 }, { "epoch": 1.13, "learning_rate": 0.00040058293409743316, "loss": 1.9164, "step": 6090 }, { "epoch": 1.13, "learning_rate": 0.00039915769241903175, "loss": 1.939, "step": 6100 }, { "epoch": 1.13, "learning_rate": 0.0003977333043429129, "loss": 1.9233, "step": 6110 }, { "epoch": 1.13, "learning_rate": 0.0003963097819261288, "loss": 1.9742, "step": 6120 }, { "epoch": 1.14, "learning_rate": 0.0003948871372184033, "loss": 1.9719, "step": 6130 }, { "epoch": 1.14, "learning_rate": 0.0003934653822620309, "loss": 1.9861, "step": 6140 }, { "epoch": 1.14, "learning_rate": 0.0003920445290917747, "loss": 1.9514, "step": 6150 }, { "epoch": 1.14, "learning_rate": 0.0003906245897347643, "loss": 2.0047, "step": 6160 }, { "epoch": 1.14, "learning_rate": 0.0003892055762103943, "loss": 1.934, "step": 6170 }, { "epoch": 1.14, "learning_rate": 0.0003877875005302223, "loss": 1.9619, "step": 6180 }, { "epoch": 1.15, "learning_rate": 0.00038637037469786727, "loss": 1.9085, "step": 6190 }, { "epoch": 1.15, "learning_rate": 0.00038495421070890796, "loss": 1.9222, "step": 6200 }, { "epoch": 1.15, "learning_rate": 0.0003835390205507812, "loss": 1.9409, "step": 6210 }, { "epoch": 1.15, "learning_rate": 0.00038212481620268125, "loss": 1.9439, "step": 6220 }, { "epoch": 1.15, "learning_rate": 0.000380711609635457, "loss": 1.974, "step": 6230 }, { "epoch": 1.16, "learning_rate": 0.000379299412811512, "loss": 1.9141, "step": 6240 }, { "epoch": 1.16, "learning_rate": 0.00037788823768470227, "loss": 1.8982, "step": 6250 }, { "epoch": 1.16, "learning_rate": 0.0003764780962002352, "loss": 1.9384, "step": 6260 }, { "epoch": 1.16, "learning_rate": 0.0003750690002945695, "loss": 1.9174, "step": 6270 }, { "epoch": 1.16, "learning_rate": 0.00037366096189531253, "loss": 1.927, "step": 6280 }, { "epoch": 1.17, "learning_rate": 0.0003722539929211205, "loss": 1.9449, "step": 6290 }, { "epoch": 1.17, "learning_rate": 0.0003708481052815973, "loss": 2.008, "step": 6300 }, { "epoch": 1.17, "learning_rate": 0.0003694433108771933, "loss": 1.9345, "step": 6310 }, { "epoch": 1.17, "learning_rate": 0.00036803962159910543, "loss": 1.8619, "step": 6320 }, { "epoch": 1.17, "learning_rate": 0.0003666370493291757, "loss": 1.9612, "step": 6330 }, { "epoch": 1.17, "learning_rate": 0.0003652356059397909, "loss": 1.9371, "step": 6340 }, { "epoch": 1.18, "learning_rate": 0.00036383530329378225, "loss": 1.9193, "step": 6350 }, { "epoch": 1.18, "learning_rate": 0.00036243615324432494, "loss": 1.9509, "step": 6360 }, { "epoch": 1.18, "learning_rate": 0.00036103816763483775, "loss": 1.9517, "step": 6370 }, { "epoch": 1.18, "learning_rate": 0.00035964135829888287, "loss": 1.9141, "step": 6380 }, { "epoch": 1.18, "learning_rate": 0.00035824573706006547, "loss": 1.9985, "step": 6390 }, { "epoch": 1.19, "learning_rate": 0.00035685131573193386, "loss": 1.9791, "step": 6400 }, { "epoch": 1.19, "learning_rate": 0.00035545810611787925, "loss": 1.8865, "step": 6410 }, { "epoch": 1.19, "learning_rate": 0.0003540661200110367, "loss": 1.9409, "step": 6420 }, { "epoch": 1.19, "learning_rate": 0.00035267536919418395, "loss": 1.9928, "step": 6430 }, { "epoch": 1.19, "learning_rate": 0.00035128586543964243, "loss": 1.9149, "step": 6440 }, { "epoch": 1.19, "learning_rate": 0.00034989762050917784, "loss": 1.9837, "step": 6450 }, { "epoch": 1.2, "learning_rate": 0.0003485106461538999, "loss": 1.9142, "step": 6460 }, { "epoch": 1.2, "learning_rate": 0.00034712495411416385, "loss": 1.9284, "step": 6470 }, { "epoch": 1.2, "learning_rate": 0.00034574055611946984, "loss": 1.8848, "step": 6480 }, { "epoch": 1.2, "learning_rate": 0.00034435746388836457, "loss": 1.9295, "step": 6490 }, { "epoch": 1.2, "learning_rate": 0.00034297568912834177, "loss": 1.9635, "step": 6500 }, { "epoch": 1.21, "learning_rate": 0.0003415952435357431, "loss": 1.9794, "step": 6510 }, { "epoch": 1.21, "learning_rate": 0.00034021613879565944, "loss": 1.9347, "step": 6520 }, { "epoch": 1.21, "learning_rate": 0.00033883838658183154, "loss": 1.9282, "step": 6530 }, { "epoch": 1.21, "learning_rate": 0.00033746199855655125, "loss": 1.9973, "step": 6540 }, { "epoch": 1.21, "learning_rate": 0.000336086986370563, "loss": 1.9718, "step": 6550 }, { "epoch": 1.22, "learning_rate": 0.0003347133616629652, "loss": 1.9157, "step": 6560 }, { "epoch": 1.22, "learning_rate": 0.00033334113606111194, "loss": 1.9528, "step": 6570 }, { "epoch": 1.22, "learning_rate": 0.0003319703211805135, "loss": 1.9369, "step": 6580 }, { "epoch": 1.22, "learning_rate": 0.00033060092862473967, "loss": 1.9552, "step": 6590 }, { "epoch": 1.22, "learning_rate": 0.00032923296998532003, "loss": 1.9581, "step": 6600 }, { "epoch": 1.22, "learning_rate": 0.0003278664568416465, "loss": 1.9406, "step": 6610 }, { "epoch": 1.23, "learning_rate": 0.0003265014007608758, "loss": 1.9672, "step": 6620 }, { "epoch": 1.23, "learning_rate": 0.0003251378132978304, "loss": 1.9166, "step": 6630 }, { "epoch": 1.23, "learning_rate": 0.00032377570599490164, "loss": 1.9356, "step": 6640 }, { "epoch": 1.23, "learning_rate": 0.00032241509038195165, "loss": 1.9092, "step": 6650 }, { "epoch": 1.23, "learning_rate": 0.0003210559779762157, "loss": 1.9343, "step": 6660 }, { "epoch": 1.24, "learning_rate": 0.00031969838028220513, "loss": 1.9796, "step": 6670 }, { "epoch": 1.24, "learning_rate": 0.00031834230879160947, "loss": 1.8932, "step": 6680 }, { "epoch": 1.24, "learning_rate": 0.0003169877749831993, "loss": 1.9279, "step": 6690 }, { "epoch": 1.24, "learning_rate": 0.000315634790322729, "loss": 1.9444, "step": 6700 }, { "epoch": 1.24, "learning_rate": 0.00031428336626284037, "loss": 1.9296, "step": 6710 }, { "epoch": 1.24, "learning_rate": 0.00031293351424296467, "loss": 1.9341, "step": 6720 }, { "epoch": 1.25, "learning_rate": 0.00031158524568922663, "loss": 1.9589, "step": 6730 }, { "epoch": 1.25, "learning_rate": 0.00031023857201434703, "loss": 1.8997, "step": 6740 }, { "epoch": 1.25, "learning_rate": 0.0003088935046175466, "loss": 1.9417, "step": 6750 }, { "epoch": 1.25, "learning_rate": 0.0003075500548844492, "loss": 1.9058, "step": 6760 }, { "epoch": 1.25, "learning_rate": 0.0003062082341869862, "loss": 1.9511, "step": 6770 }, { "epoch": 1.26, "learning_rate": 0.00030486805388329894, "loss": 1.922, "step": 6780 }, { "epoch": 1.26, "learning_rate": 0.00030352952531764363, "loss": 1.9634, "step": 6790 }, { "epoch": 1.26, "learning_rate": 0.0003021926598202949, "loss": 1.9555, "step": 6800 }, { "epoch": 1.26, "learning_rate": 0.0003008574687074498, "loss": 1.9254, "step": 6810 }, { "epoch": 1.26, "learning_rate": 0.00029952396328113265, "loss": 1.9211, "step": 6820 }, { "epoch": 1.27, "learning_rate": 0.00029819215482909846, "loss": 1.9504, "step": 6830 }, { "epoch": 1.27, "learning_rate": 0.00029686205462473784, "loss": 1.9174, "step": 6840 }, { "epoch": 1.27, "learning_rate": 0.00029553367392698177, "loss": 1.9368, "step": 6850 }, { "epoch": 1.27, "learning_rate": 0.00029420702398020595, "loss": 1.8939, "step": 6860 }, { "epoch": 1.27, "learning_rate": 0.00029288211601413586, "loss": 1.9097, "step": 6870 }, { "epoch": 1.27, "learning_rate": 0.00029155896124375156, "loss": 1.9885, "step": 6880 }, { "epoch": 1.28, "learning_rate": 0.0002902375708691925, "loss": 1.9752, "step": 6890 }, { "epoch": 1.28, "learning_rate": 0.0002889179560756634, "loss": 1.9321, "step": 6900 }, { "epoch": 1.28, "learning_rate": 0.0002876001280333391, "loss": 1.9345, "step": 6910 }, { "epoch": 1.28, "learning_rate": 0.0002862840978972698, "loss": 1.9192, "step": 6920 }, { "epoch": 1.28, "learning_rate": 0.0002849698768072877, "loss": 1.9499, "step": 6930 }, { "epoch": 1.29, "learning_rate": 0.0002836574758879107, "loss": 1.9217, "step": 6940 }, { "epoch": 1.29, "learning_rate": 0.0002823469062482503, "loss": 1.9318, "step": 6950 }, { "epoch": 1.29, "learning_rate": 0.0002810381789819167, "loss": 1.9422, "step": 6960 }, { "epoch": 1.29, "learning_rate": 0.0002797313051669247, "loss": 1.9562, "step": 6970 }, { "epoch": 1.29, "learning_rate": 0.0002784262958656003, "loss": 1.9189, "step": 6980 }, { "epoch": 1.29, "learning_rate": 0.00027712316212448663, "loss": 1.9453, "step": 6990 }, { "epoch": 1.3, "learning_rate": 0.00027582191497425114, "loss": 1.9335, "step": 7000 }, { "epoch": 1.3, "learning_rate": 0.000274522565429591, "loss": 1.9724, "step": 7010 }, { "epoch": 1.3, "learning_rate": 0.0002732251244891421, "loss": 1.9236, "step": 7020 }, { "epoch": 1.3, "learning_rate": 0.00027192960313538307, "loss": 1.9407, "step": 7030 }, { "epoch": 1.3, "learning_rate": 0.0002706360123345445, "loss": 1.8948, "step": 7040 }, { "epoch": 1.31, "learning_rate": 0.00026934436303651554, "loss": 1.9354, "step": 7050 }, { "epoch": 1.31, "learning_rate": 0.00026805466617475007, "loss": 1.955, "step": 7060 }, { "epoch": 1.31, "learning_rate": 0.0002667669326661767, "loss": 1.9595, "step": 7070 }, { "epoch": 1.31, "learning_rate": 0.0002654811734111033, "loss": 1.9766, "step": 7080 }, { "epoch": 1.31, "learning_rate": 0.00026419739929312713, "loss": 1.9626, "step": 7090 }, { "epoch": 1.32, "learning_rate": 0.0002629156211790417, "loss": 1.9096, "step": 7100 }, { "epoch": 1.32, "learning_rate": 0.00026163584991874413, "loss": 1.9522, "step": 7110 }, { "epoch": 1.32, "learning_rate": 0.00026035809634514543, "loss": 1.9387, "step": 7120 }, { "epoch": 1.32, "learning_rate": 0.00025908237127407626, "loss": 1.8788, "step": 7130 }, { "epoch": 1.32, "learning_rate": 0.0002578086855041973, "loss": 1.9326, "step": 7140 }, { "epoch": 1.32, "learning_rate": 0.0002565370498169064, "loss": 1.92, "step": 7150 }, { "epoch": 1.33, "learning_rate": 0.0002552674749762487, "loss": 1.9131, "step": 7160 }, { "epoch": 1.33, "learning_rate": 0.0002539999717288246, "loss": 1.9263, "step": 7170 }, { "epoch": 1.33, "learning_rate": 0.00025286099888629365, "loss": 1.919, "step": 7180 }, { "epoch": 1.33, "learning_rate": 0.000251597461210021, "loss": 1.9281, "step": 7190 }, { "epoch": 1.33, "learning_rate": 0.00025033602619263507, "loss": 1.9831, "step": 7200 }, { "epoch": 1.34, "learning_rate": 0.00024907670451183454, "loss": 1.9535, "step": 7210 }, { "epoch": 1.34, "learning_rate": 0.00024781950682742983, "loss": 1.8798, "step": 7220 }, { "epoch": 1.34, "learning_rate": 0.00024656444378125204, "loss": 1.9124, "step": 7230 }, { "epoch": 1.34, "learning_rate": 0.00024531152599706346, "loss": 1.9472, "step": 7240 }, { "epoch": 1.34, "learning_rate": 0.0002440607640804668, "loss": 1.9697, "step": 7250 }, { "epoch": 1.34, "learning_rate": 0.00024281216861881633, "loss": 1.938, "step": 7260 }, { "epoch": 1.35, "learning_rate": 0.00024156575018112787, "loss": 1.9035, "step": 7270 }, { "epoch": 1.35, "learning_rate": 0.00024032151931798918, "loss": 1.9379, "step": 7280 }, { "epoch": 1.35, "learning_rate": 0.0002390794865614711, "loss": 1.9176, "step": 7290 }, { "epoch": 1.35, "learning_rate": 0.0002378396624250375, "loss": 1.9392, "step": 7300 }, { "epoch": 1.35, "learning_rate": 0.0002366020574034576, "loss": 1.9235, "step": 7310 }, { "epoch": 1.36, "learning_rate": 0.00023536668197271588, "loss": 1.939, "step": 7320 }, { "epoch": 1.36, "learning_rate": 0.0002341335465899243, "loss": 1.889, "step": 7330 }, { "epoch": 1.36, "learning_rate": 0.00023290266169323354, "loss": 1.9247, "step": 7340 }, { "epoch": 1.36, "learning_rate": 0.0002316740377017438, "loss": 1.935, "step": 7350 }, { "epoch": 1.36, "learning_rate": 0.0002304476850154183, "loss": 1.9198, "step": 7360 }, { "epoch": 1.37, "learning_rate": 0.00022922361401499325, "loss": 1.9068, "step": 7370 }, { "epoch": 1.37, "learning_rate": 0.00022800183506189238, "loss": 1.9505, "step": 7380 }, { "epoch": 1.37, "learning_rate": 0.00022678235849813645, "loss": 1.9459, "step": 7390 }, { "epoch": 1.37, "learning_rate": 0.00022556519464625807, "loss": 1.9302, "step": 7400 }, { "epoch": 1.37, "learning_rate": 0.00022435035380921321, "loss": 1.9345, "step": 7410 }, { "epoch": 1.37, "learning_rate": 0.0002231378462702935, "loss": 1.9304, "step": 7420 }, { "epoch": 1.38, "learning_rate": 0.00022192768229304107, "loss": 1.9457, "step": 7430 }, { "epoch": 1.38, "learning_rate": 0.0002207198721211593, "loss": 1.9395, "step": 7440 }, { "epoch": 1.38, "learning_rate": 0.00021951442597842785, "loss": 1.9867, "step": 7450 }, { "epoch": 1.38, "learning_rate": 0.00021831135406861558, "loss": 1.9217, "step": 7460 }, { "epoch": 1.38, "learning_rate": 0.00021711066657539342, "loss": 1.991, "step": 7470 }, { "epoch": 1.39, "learning_rate": 0.00021591237366225008, "loss": 1.9492, "step": 7480 }, { "epoch": 1.39, "learning_rate": 0.00021471648547240365, "loss": 1.9376, "step": 7490 }, { "epoch": 1.39, "learning_rate": 0.00021352301212871762, "loss": 1.9436, "step": 7500 }, { "epoch": 1.39, "learning_rate": 0.00021233196373361397, "loss": 1.9267, "step": 7510 }, { "epoch": 1.39, "learning_rate": 0.00021114335036898852, "loss": 1.9483, "step": 7520 }, { "epoch": 1.39, "learning_rate": 0.0002099571820961252, "loss": 1.9485, "step": 7530 }, { "epoch": 1.4, "learning_rate": 0.00020877346895561082, "loss": 1.9607, "step": 7540 }, { "epoch": 1.4, "learning_rate": 0.00020759222096725034, "loss": 1.9665, "step": 7550 }, { "epoch": 1.4, "learning_rate": 0.0002064134481299814, "loss": 1.9202, "step": 7560 }, { "epoch": 1.4, "learning_rate": 0.00020523716042179075, "loss": 1.9375, "step": 7570 }, { "epoch": 1.4, "learning_rate": 0.00020406336779962888, "loss": 1.9406, "step": 7580 }, { "epoch": 1.41, "learning_rate": 0.00020289208019932635, "loss": 1.9357, "step": 7590 }, { "epoch": 1.41, "learning_rate": 0.0002017233075355089, "loss": 1.9432, "step": 7600 }, { "epoch": 1.41, "learning_rate": 0.0002005570597015145, "loss": 1.962, "step": 7610 }, { "epoch": 1.41, "learning_rate": 0.00019939334656930892, "loss": 1.9237, "step": 7620 }, { "epoch": 1.41, "learning_rate": 0.00019823217798940247, "loss": 1.9688, "step": 7630 }, { "epoch": 1.42, "learning_rate": 0.00019707356379076668, "loss": 1.9524, "step": 7640 }, { "epoch": 1.42, "learning_rate": 0.00019591751378075035, "loss": 1.981, "step": 7650 }, { "epoch": 1.42, "learning_rate": 0.00019476403774499774, "loss": 1.9405, "step": 7660 }, { "epoch": 1.42, "learning_rate": 0.0001936131454473649, "loss": 1.9, "step": 7670 }, { "epoch": 1.42, "learning_rate": 0.00019246484662983733, "loss": 1.8852, "step": 7680 }, { "epoch": 1.42, "learning_rate": 0.00019131915101244752, "loss": 1.9447, "step": 7690 }, { "epoch": 1.43, "learning_rate": 0.00019017606829319205, "loss": 1.9221, "step": 7700 }, { "epoch": 1.43, "learning_rate": 0.00018903560814795086, "loss": 1.9481, "step": 7710 }, { "epoch": 1.43, "learning_rate": 0.00018789778023040355, "loss": 1.8919, "step": 7720 }, { "epoch": 1.43, "learning_rate": 0.0001867625941719499, "loss": 1.94, "step": 7730 }, { "epoch": 1.43, "learning_rate": 0.00018563005958162587, "loss": 1.9089, "step": 7740 }, { "epoch": 1.44, "learning_rate": 0.00018450018604602414, "loss": 1.9105, "step": 7750 }, { "epoch": 1.44, "learning_rate": 0.0001833729831292123, "loss": 1.9568, "step": 7760 }, { "epoch": 1.44, "learning_rate": 0.00018224846037265125, "loss": 1.9156, "step": 7770 }, { "epoch": 1.44, "learning_rate": 0.0001811266272951162, "loss": 1.9445, "step": 7780 }, { "epoch": 1.44, "learning_rate": 0.0001800074933926138, "loss": 1.8871, "step": 7790 }, { "epoch": 1.44, "learning_rate": 0.00017889106813830363, "loss": 1.9684, "step": 7800 }, { "epoch": 1.45, "learning_rate": 0.0001777773609824173, "loss": 1.9411, "step": 7810 }, { "epoch": 1.45, "learning_rate": 0.00017666638135217783, "loss": 1.9295, "step": 7820 }, { "epoch": 1.45, "learning_rate": 0.0001755581386517216, "loss": 1.9499, "step": 7830 }, { "epoch": 1.45, "learning_rate": 0.0001744526422620165, "loss": 1.9499, "step": 7840 }, { "epoch": 1.45, "learning_rate": 0.00017334990154078446, "loss": 1.9262, "step": 7850 }, { "epoch": 1.46, "learning_rate": 0.00017224992582242076, "loss": 1.9554, "step": 7860 }, { "epoch": 1.46, "learning_rate": 0.00017115272441791635, "loss": 1.9685, "step": 7870 }, { "epoch": 1.46, "learning_rate": 0.000170058306614778, "loss": 1.9021, "step": 7880 }, { "epoch": 1.46, "learning_rate": 0.00016896668167695018, "loss": 1.8804, "step": 7890 }, { "epoch": 1.46, "learning_rate": 0.00016787785884473665, "loss": 1.9164, "step": 7900 }, { "epoch": 1.47, "learning_rate": 0.00016679184733472163, "loss": 1.9409, "step": 7910 }, { "epoch": 1.47, "learning_rate": 0.00016570865633969283, "loss": 1.9329, "step": 7920 }, { "epoch": 1.47, "learning_rate": 0.00016462829502856286, "loss": 1.9518, "step": 7930 }, { "epoch": 1.47, "learning_rate": 0.00016355077254629196, "loss": 1.9494, "step": 7940 }, { "epoch": 1.47, "learning_rate": 0.00016247609801381003, "loss": 1.9376, "step": 7950 }, { "epoch": 1.47, "learning_rate": 0.00016140428052794042, "loss": 1.9312, "step": 7960 }, { "epoch": 1.48, "learning_rate": 0.00016033532916132203, "loss": 1.9528, "step": 7970 }, { "epoch": 1.48, "learning_rate": 0.00015926925296233296, "loss": 1.9698, "step": 7980 }, { "epoch": 1.48, "learning_rate": 0.00015820606095501405, "loss": 1.936, "step": 7990 }, { "epoch": 1.48, "learning_rate": 0.00015714576213899167, "loss": 1.962, "step": 8000 } ], "max_steps": 10798, "num_train_epochs": 2, "total_flos": 7.590046841631146e+18, "trial_name": null, "trial_params": null }