|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 500, |
|
"global_step": 4955, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0020181634712411706, |
|
"grad_norm": 84.60143280029297, |
|
"learning_rate": 2.0161290322580646e-06, |
|
"loss": 7.1336, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.004036326942482341, |
|
"grad_norm": 15.496525764465332, |
|
"learning_rate": 4.032258064516129e-06, |
|
"loss": 5.515, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.006054490413723511, |
|
"grad_norm": 10.33206844329834, |
|
"learning_rate": 6.048387096774194e-06, |
|
"loss": 4.8214, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.008072653884964682, |
|
"grad_norm": 19.034317016601562, |
|
"learning_rate": 8.064516129032258e-06, |
|
"loss": 4.4868, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.010090817356205853, |
|
"grad_norm": 4.963449001312256, |
|
"learning_rate": 1.0080645161290323e-05, |
|
"loss": 4.1247, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.012108980827447022, |
|
"grad_norm": 4.877665042877197, |
|
"learning_rate": 1.2096774193548388e-05, |
|
"loss": 3.756, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.014127144298688193, |
|
"grad_norm": 4.284060955047607, |
|
"learning_rate": 1.4112903225806454e-05, |
|
"loss": 3.4368, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.016145307769929364, |
|
"grad_norm": 3.7732603549957275, |
|
"learning_rate": 1.6129032258064517e-05, |
|
"loss": 3.2998, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.018163471241170535, |
|
"grad_norm": 3.365959405899048, |
|
"learning_rate": 1.8145161290322583e-05, |
|
"loss": 3.1228, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.020181634712411706, |
|
"grad_norm": 2.8467206954956055, |
|
"learning_rate": 2.0161290322580645e-05, |
|
"loss": 2.9786, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.022199798183652877, |
|
"grad_norm": 3.936636209487915, |
|
"learning_rate": 2.217741935483871e-05, |
|
"loss": 2.815, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.024217961654894045, |
|
"grad_norm": 2.843057155609131, |
|
"learning_rate": 2.4193548387096777e-05, |
|
"loss": 2.7479, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.026236125126135216, |
|
"grad_norm": 2.4136409759521484, |
|
"learning_rate": 2.620967741935484e-05, |
|
"loss": 2.6495, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.028254288597376387, |
|
"grad_norm": 4.50978946685791, |
|
"learning_rate": 2.822580645161291e-05, |
|
"loss": 2.6704, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.030272452068617558, |
|
"grad_norm": 2.8832032680511475, |
|
"learning_rate": 3.024193548387097e-05, |
|
"loss": 2.5093, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.03229061553985873, |
|
"grad_norm": 3.834477424621582, |
|
"learning_rate": 3.2258064516129034e-05, |
|
"loss": 2.515, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.034308779011099896, |
|
"grad_norm": 2.2256412506103516, |
|
"learning_rate": 3.427419354838709e-05, |
|
"loss": 2.4173, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.03632694248234107, |
|
"grad_norm": 2.659034013748169, |
|
"learning_rate": 3.6290322580645165e-05, |
|
"loss": 2.3726, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.03834510595358224, |
|
"grad_norm": 2.66379714012146, |
|
"learning_rate": 3.8306451612903224e-05, |
|
"loss": 2.4307, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.04036326942482341, |
|
"grad_norm": 2.4209799766540527, |
|
"learning_rate": 4.032258064516129e-05, |
|
"loss": 2.4125, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.04238143289606458, |
|
"grad_norm": 2.361922025680542, |
|
"learning_rate": 4.2338709677419356e-05, |
|
"loss": 2.331, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.044399596367305755, |
|
"grad_norm": 2.37805438041687, |
|
"learning_rate": 4.435483870967742e-05, |
|
"loss": 2.3345, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.04641775983854692, |
|
"grad_norm": 2.6350855827331543, |
|
"learning_rate": 4.637096774193548e-05, |
|
"loss": 2.3108, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.04843592330978809, |
|
"grad_norm": 2.274717092514038, |
|
"learning_rate": 4.8387096774193554e-05, |
|
"loss": 2.3286, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.050454086781029264, |
|
"grad_norm": 2.4076735973358154, |
|
"learning_rate": 5.040322580645161e-05, |
|
"loss": 2.2257, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.05247225025227043, |
|
"grad_norm": 2.0606577396392822, |
|
"learning_rate": 5.241935483870968e-05, |
|
"loss": 2.2772, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.054490413723511606, |
|
"grad_norm": 2.362431287765503, |
|
"learning_rate": 5.443548387096774e-05, |
|
"loss": 2.2675, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.056508577194752774, |
|
"grad_norm": 2.4200310707092285, |
|
"learning_rate": 5.645161290322582e-05, |
|
"loss": 2.236, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.05852674066599395, |
|
"grad_norm": 2.1776976585388184, |
|
"learning_rate": 5.8467741935483876e-05, |
|
"loss": 2.1248, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.060544904137235116, |
|
"grad_norm": 2.3392269611358643, |
|
"learning_rate": 6.048387096774194e-05, |
|
"loss": 2.1562, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.06256306760847628, |
|
"grad_norm": 2.3293347358703613, |
|
"learning_rate": 6.25e-05, |
|
"loss": 2.1715, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.06458123107971746, |
|
"grad_norm": 1.9923762083053589, |
|
"learning_rate": 6.451612903225807e-05, |
|
"loss": 2.1212, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.06659939455095863, |
|
"grad_norm": 2.042971611022949, |
|
"learning_rate": 6.653225806451613e-05, |
|
"loss": 2.0921, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.06861755802219979, |
|
"grad_norm": 2.2022523880004883, |
|
"learning_rate": 6.854838709677419e-05, |
|
"loss": 2.0906, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.07063572149344097, |
|
"grad_norm": 2.102933406829834, |
|
"learning_rate": 7.056451612903226e-05, |
|
"loss": 2.2652, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.07265388496468214, |
|
"grad_norm": 2.3307838439941406, |
|
"learning_rate": 7.258064516129033e-05, |
|
"loss": 2.0891, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.07467204843592332, |
|
"grad_norm": 1.9409267902374268, |
|
"learning_rate": 7.45967741935484e-05, |
|
"loss": 2.0769, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.07669021190716448, |
|
"grad_norm": 1.8024511337280273, |
|
"learning_rate": 7.661290322580645e-05, |
|
"loss": 2.0587, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.07870837537840565, |
|
"grad_norm": 2.0050477981567383, |
|
"learning_rate": 7.862903225806451e-05, |
|
"loss": 2.0375, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.08072653884964683, |
|
"grad_norm": 3.634002923965454, |
|
"learning_rate": 8.064516129032258e-05, |
|
"loss": 2.0089, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.08274470232088799, |
|
"grad_norm": 2.52044415473938, |
|
"learning_rate": 8.266129032258066e-05, |
|
"loss": 2.0246, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.08476286579212916, |
|
"grad_norm": 2.014629364013672, |
|
"learning_rate": 8.467741935483871e-05, |
|
"loss": 2.0291, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.08678102926337034, |
|
"grad_norm": 2.003462791442871, |
|
"learning_rate": 8.669354838709678e-05, |
|
"loss": 2.0223, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.08879919273461151, |
|
"grad_norm": 2.210601329803467, |
|
"learning_rate": 8.870967741935484e-05, |
|
"loss": 2.0178, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.09081735620585267, |
|
"grad_norm": 2.3255221843719482, |
|
"learning_rate": 9.072580645161291e-05, |
|
"loss": 2.0062, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.09283551967709384, |
|
"grad_norm": 2.130826950073242, |
|
"learning_rate": 9.274193548387096e-05, |
|
"loss": 1.9744, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.09485368314833502, |
|
"grad_norm": 2.3463449478149414, |
|
"learning_rate": 9.475806451612904e-05, |
|
"loss": 2.1566, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.09687184661957618, |
|
"grad_norm": 1.9628673791885376, |
|
"learning_rate": 9.677419354838711e-05, |
|
"loss": 2.046, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.09889001009081735, |
|
"grad_norm": 1.8357641696929932, |
|
"learning_rate": 9.879032258064517e-05, |
|
"loss": 2.121, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.10090817356205853, |
|
"grad_norm": 2.052020788192749, |
|
"learning_rate": 9.999995545373623e-05, |
|
"loss": 1.9792, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.1029263370332997, |
|
"grad_norm": 2.158012866973877, |
|
"learning_rate": 9.999945430918042e-05, |
|
"loss": 1.8667, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.10494450050454086, |
|
"grad_norm": 1.946934461593628, |
|
"learning_rate": 9.999839634283869e-05, |
|
"loss": 2.0411, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.10696266397578204, |
|
"grad_norm": 2.1030478477478027, |
|
"learning_rate": 9.999678156649317e-05, |
|
"loss": 2.0485, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.10898082744702321, |
|
"grad_norm": 1.9499808549880981, |
|
"learning_rate": 9.999460999812691e-05, |
|
"loss": 1.9301, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.11099899091826437, |
|
"grad_norm": 1.789939045906067, |
|
"learning_rate": 9.999188166192368e-05, |
|
"loss": 1.924, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.11301715438950555, |
|
"grad_norm": 1.7510501146316528, |
|
"learning_rate": 9.998859658826777e-05, |
|
"loss": 1.9682, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.11503531786074672, |
|
"grad_norm": 2.5464296340942383, |
|
"learning_rate": 9.998475481374358e-05, |
|
"loss": 2.0132, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.1170534813319879, |
|
"grad_norm": 1.796861171722412, |
|
"learning_rate": 9.998035638113527e-05, |
|
"loss": 1.9089, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.11907164480322906, |
|
"grad_norm": 2.1990606784820557, |
|
"learning_rate": 9.997540133942624e-05, |
|
"loss": 1.8077, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.12108980827447023, |
|
"grad_norm": 1.6746110916137695, |
|
"learning_rate": 9.996988974379857e-05, |
|
"loss": 1.9023, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.1231079717457114, |
|
"grad_norm": 1.7681794166564941, |
|
"learning_rate": 9.996382165563247e-05, |
|
"loss": 1.8878, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.12512613521695257, |
|
"grad_norm": 1.6506789922714233, |
|
"learning_rate": 9.995719714250556e-05, |
|
"loss": 1.993, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.12714429868819374, |
|
"grad_norm": 2.155871868133545, |
|
"learning_rate": 9.995001627819211e-05, |
|
"loss": 1.847, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.12916246215943492, |
|
"grad_norm": 1.7361620664596558, |
|
"learning_rate": 9.99422791426622e-05, |
|
"loss": 1.8265, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.1311806256306761, |
|
"grad_norm": 2.3019630908966064, |
|
"learning_rate": 9.993398582208093e-05, |
|
"loss": 1.8233, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.13319878910191726, |
|
"grad_norm": 1.9044718742370605, |
|
"learning_rate": 9.99251364088073e-05, |
|
"loss": 1.9208, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.13521695257315844, |
|
"grad_norm": 2.070833683013916, |
|
"learning_rate": 9.991573100139334e-05, |
|
"loss": 1.8743, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.13723511604439959, |
|
"grad_norm": 1.8902406692504883, |
|
"learning_rate": 9.990576970458285e-05, |
|
"loss": 1.8131, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.13925327951564076, |
|
"grad_norm": 2.179497718811035, |
|
"learning_rate": 9.989525262931045e-05, |
|
"loss": 1.816, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.14127144298688193, |
|
"grad_norm": 2.6633455753326416, |
|
"learning_rate": 9.988417989270011e-05, |
|
"loss": 1.8408, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1432896064581231, |
|
"grad_norm": 1.8275139331817627, |
|
"learning_rate": 9.987255161806402e-05, |
|
"loss": 1.7789, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.14530776992936428, |
|
"grad_norm": 1.9536734819412231, |
|
"learning_rate": 9.986036793490112e-05, |
|
"loss": 1.9016, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.14732593340060546, |
|
"grad_norm": 1.7954963445663452, |
|
"learning_rate": 9.984762897889568e-05, |
|
"loss": 1.8267, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.14934409687184663, |
|
"grad_norm": 1.7522506713867188, |
|
"learning_rate": 9.983433489191581e-05, |
|
"loss": 1.8596, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.15136226034308778, |
|
"grad_norm": 1.799124836921692, |
|
"learning_rate": 9.98204858220119e-05, |
|
"loss": 1.8224, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.15338042381432895, |
|
"grad_norm": 1.8433665037155151, |
|
"learning_rate": 9.980608192341488e-05, |
|
"loss": 1.8241, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.15539858728557013, |
|
"grad_norm": 1.764257550239563, |
|
"learning_rate": 9.979112335653462e-05, |
|
"loss": 1.8093, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.1574167507568113, |
|
"grad_norm": 1.8525418043136597, |
|
"learning_rate": 9.977561028795803e-05, |
|
"loss": 1.7784, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.15943491422805248, |
|
"grad_norm": 2.0689351558685303, |
|
"learning_rate": 9.97595428904473e-05, |
|
"loss": 1.8073, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.16145307769929365, |
|
"grad_norm": 1.7697125673294067, |
|
"learning_rate": 9.974292134293792e-05, |
|
"loss": 1.804, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.16347124117053483, |
|
"grad_norm": 1.8966635465621948, |
|
"learning_rate": 9.97257458305367e-05, |
|
"loss": 1.8198, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.16548940464177597, |
|
"grad_norm": 1.6123894453048706, |
|
"learning_rate": 9.970801654451973e-05, |
|
"loss": 1.7637, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.16750756811301715, |
|
"grad_norm": 1.6604522466659546, |
|
"learning_rate": 9.968973368233022e-05, |
|
"loss": 1.7528, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.16952573158425832, |
|
"grad_norm": 1.8791446685791016, |
|
"learning_rate": 9.96708974475763e-05, |
|
"loss": 1.7687, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.1715438950554995, |
|
"grad_norm": 2.045793056488037, |
|
"learning_rate": 9.965150805002878e-05, |
|
"loss": 1.8325, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.17356205852674067, |
|
"grad_norm": 1.8702943325042725, |
|
"learning_rate": 9.963156570561878e-05, |
|
"loss": 1.712, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.17558022199798184, |
|
"grad_norm": 1.9760271310806274, |
|
"learning_rate": 9.96110706364354e-05, |
|
"loss": 1.8057, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.17759838546922302, |
|
"grad_norm": 1.9553672075271606, |
|
"learning_rate": 9.959002307072312e-05, |
|
"loss": 1.7453, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.17961654894046417, |
|
"grad_norm": 1.989668607711792, |
|
"learning_rate": 9.956842324287936e-05, |
|
"loss": 1.9109, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.18163471241170534, |
|
"grad_norm": 1.766709566116333, |
|
"learning_rate": 9.954627139345186e-05, |
|
"loss": 1.7539, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.18365287588294651, |
|
"grad_norm": 1.933042287826538, |
|
"learning_rate": 9.952356776913594e-05, |
|
"loss": 1.8372, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.1856710393541877, |
|
"grad_norm": 1.717915415763855, |
|
"learning_rate": 9.950031262277183e-05, |
|
"loss": 1.8199, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.18768920282542886, |
|
"grad_norm": 9.502272605895996, |
|
"learning_rate": 9.947650621334179e-05, |
|
"loss": 1.814, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.18970736629667004, |
|
"grad_norm": 2.0936007499694824, |
|
"learning_rate": 9.945214880596725e-05, |
|
"loss": 1.7535, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.1917255297679112, |
|
"grad_norm": 2.1896564960479736, |
|
"learning_rate": 9.94272406719059e-05, |
|
"loss": 1.772, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.19374369323915236, |
|
"grad_norm": 1.951393723487854, |
|
"learning_rate": 9.940178208854858e-05, |
|
"loss": 1.7976, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.19576185671039353, |
|
"grad_norm": 1.8681849241256714, |
|
"learning_rate": 9.937577333941626e-05, |
|
"loss": 1.662, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.1977800201816347, |
|
"grad_norm": 1.8085284233093262, |
|
"learning_rate": 9.934921471415687e-05, |
|
"loss": 1.7778, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.19979818365287588, |
|
"grad_norm": 2.0100769996643066, |
|
"learning_rate": 9.932210650854205e-05, |
|
"loss": 1.7732, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.20181634712411706, |
|
"grad_norm": 1.6803241968154907, |
|
"learning_rate": 9.929444902446392e-05, |
|
"loss": 1.7685, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.20383451059535823, |
|
"grad_norm": 1.8448835611343384, |
|
"learning_rate": 9.92662425699316e-05, |
|
"loss": 1.7719, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 0.2058526740665994, |
|
"grad_norm": 1.8499823808670044, |
|
"learning_rate": 9.923748745906789e-05, |
|
"loss": 1.8417, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 0.20787083753784055, |
|
"grad_norm": 1.7802287340164185, |
|
"learning_rate": 9.920818401210574e-05, |
|
"loss": 1.7677, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 0.20988900100908173, |
|
"grad_norm": 2.019920587539673, |
|
"learning_rate": 9.917833255538467e-05, |
|
"loss": 1.8071, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 0.2119071644803229, |
|
"grad_norm": 1.838690161705017, |
|
"learning_rate": 9.914793342134711e-05, |
|
"loss": 1.7962, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 0.21392532795156408, |
|
"grad_norm": 2.0018012523651123, |
|
"learning_rate": 9.911698694853477e-05, |
|
"loss": 1.7135, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 0.21594349142280525, |
|
"grad_norm": 1.905208706855774, |
|
"learning_rate": 9.908549348158485e-05, |
|
"loss": 1.9039, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 0.21796165489404642, |
|
"grad_norm": 2.0252766609191895, |
|
"learning_rate": 9.905345337122609e-05, |
|
"loss": 1.8479, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 0.2199798183652876, |
|
"grad_norm": 1.7580780982971191, |
|
"learning_rate": 9.902086697427504e-05, |
|
"loss": 1.8081, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 0.22199798183652875, |
|
"grad_norm": 1.7557222843170166, |
|
"learning_rate": 9.8987734653632e-05, |
|
"loss": 1.6225, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.22401614530776992, |
|
"grad_norm": 1.779552936553955, |
|
"learning_rate": 9.895405677827692e-05, |
|
"loss": 1.674, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 0.2260343087790111, |
|
"grad_norm": 1.6549618244171143, |
|
"learning_rate": 9.89198337232654e-05, |
|
"loss": 1.6844, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 0.22805247225025227, |
|
"grad_norm": 1.816671371459961, |
|
"learning_rate": 9.888506586972446e-05, |
|
"loss": 1.7142, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 0.23007063572149344, |
|
"grad_norm": 1.7347807884216309, |
|
"learning_rate": 9.884975360484827e-05, |
|
"loss": 1.8952, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 0.23208879919273462, |
|
"grad_norm": 1.9305518865585327, |
|
"learning_rate": 9.881389732189392e-05, |
|
"loss": 1.7851, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 0.2341069626639758, |
|
"grad_norm": 1.5616772174835205, |
|
"learning_rate": 9.877749742017694e-05, |
|
"loss": 1.7088, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 0.23612512613521694, |
|
"grad_norm": 1.8221651315689087, |
|
"learning_rate": 9.874055430506691e-05, |
|
"loss": 1.6248, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 0.23814328960645811, |
|
"grad_norm": 1.7022042274475098, |
|
"learning_rate": 9.870306838798297e-05, |
|
"loss": 1.669, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 0.2401614530776993, |
|
"grad_norm": 1.7719357013702393, |
|
"learning_rate": 9.866504008638917e-05, |
|
"loss": 1.7587, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 0.24217961654894046, |
|
"grad_norm": 1.9335434436798096, |
|
"learning_rate": 9.862646982378987e-05, |
|
"loss": 1.8279, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.24419778002018164, |
|
"grad_norm": 1.7225123643875122, |
|
"learning_rate": 9.8587358029725e-05, |
|
"loss": 1.7519, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 0.2462159434914228, |
|
"grad_norm": 1.6799899339675903, |
|
"learning_rate": 9.854770513976531e-05, |
|
"loss": 1.6876, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 0.248234106962664, |
|
"grad_norm": 1.7735378742218018, |
|
"learning_rate": 9.850751159550746e-05, |
|
"loss": 1.687, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 0.25025227043390513, |
|
"grad_norm": 1.9135595560073853, |
|
"learning_rate": 9.846677784456918e-05, |
|
"loss": 1.785, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 0.2522704339051463, |
|
"grad_norm": 1.684078335762024, |
|
"learning_rate": 9.842550434058421e-05, |
|
"loss": 1.6777, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 0.2542885973763875, |
|
"grad_norm": 1.8692346811294556, |
|
"learning_rate": 9.838369154319728e-05, |
|
"loss": 1.8198, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 0.25630676084762866, |
|
"grad_norm": 1.526811957359314, |
|
"learning_rate": 9.8341339918059e-05, |
|
"loss": 1.6035, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 0.25832492431886983, |
|
"grad_norm": 1.8684613704681396, |
|
"learning_rate": 9.82984499368207e-05, |
|
"loss": 1.7229, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 0.260343087790111, |
|
"grad_norm": 1.9074355363845825, |
|
"learning_rate": 9.825502207712909e-05, |
|
"loss": 1.6801, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 0.2623612512613522, |
|
"grad_norm": 1.5898246765136719, |
|
"learning_rate": 9.821105682262099e-05, |
|
"loss": 1.6945, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.26437941473259335, |
|
"grad_norm": 1.633530616760254, |
|
"learning_rate": 9.816655466291803e-05, |
|
"loss": 1.723, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 0.26639757820383453, |
|
"grad_norm": 1.637403130531311, |
|
"learning_rate": 9.812151609362102e-05, |
|
"loss": 1.6019, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 0.2684157416750757, |
|
"grad_norm": 1.7958626747131348, |
|
"learning_rate": 9.807594161630458e-05, |
|
"loss": 1.5707, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 0.2704339051463169, |
|
"grad_norm": 1.7324177026748657, |
|
"learning_rate": 9.802983173851149e-05, |
|
"loss": 1.6786, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 0.272452068617558, |
|
"grad_norm": 1.55552339553833, |
|
"learning_rate": 9.798318697374702e-05, |
|
"loss": 1.6684, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 0.27447023208879917, |
|
"grad_norm": 1.7341140508651733, |
|
"learning_rate": 9.79360078414733e-05, |
|
"loss": 1.6447, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 0.27648839556004035, |
|
"grad_norm": 1.8685839176177979, |
|
"learning_rate": 9.78882948671034e-05, |
|
"loss": 1.6794, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 0.2785065590312815, |
|
"grad_norm": 1.783153772354126, |
|
"learning_rate": 9.784004858199563e-05, |
|
"loss": 1.7196, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 0.2805247225025227, |
|
"grad_norm": 1.7783135175704956, |
|
"learning_rate": 9.779126952344748e-05, |
|
"loss": 1.6273, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 0.28254288597376387, |
|
"grad_norm": 1.6643409729003906, |
|
"learning_rate": 9.774195823468973e-05, |
|
"loss": 1.6116, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.28456104944500504, |
|
"grad_norm": 1.7578685283660889, |
|
"learning_rate": 9.769211526488038e-05, |
|
"loss": 1.6021, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 0.2865792129162462, |
|
"grad_norm": 1.5535609722137451, |
|
"learning_rate": 9.764174116909852e-05, |
|
"loss": 1.7308, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 0.2885973763874874, |
|
"grad_norm": 1.8649767637252808, |
|
"learning_rate": 9.759083650833815e-05, |
|
"loss": 1.7038, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 0.29061553985872857, |
|
"grad_norm": 1.589314341545105, |
|
"learning_rate": 9.753940184950192e-05, |
|
"loss": 1.6144, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 0.29263370332996974, |
|
"grad_norm": 1.9105316400527954, |
|
"learning_rate": 9.748743776539488e-05, |
|
"loss": 1.7696, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 0.2946518668012109, |
|
"grad_norm": 1.9593696594238281, |
|
"learning_rate": 9.743494483471801e-05, |
|
"loss": 1.6077, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 0.2966700302724521, |
|
"grad_norm": 1.767230749130249, |
|
"learning_rate": 9.738192364206185e-05, |
|
"loss": 1.6627, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 0.29868819374369326, |
|
"grad_norm": 1.7512140274047852, |
|
"learning_rate": 9.732837477789993e-05, |
|
"loss": 1.6611, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 0.3007063572149344, |
|
"grad_norm": 1.9849492311477661, |
|
"learning_rate": 9.727429883858227e-05, |
|
"loss": 1.6493, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 0.30272452068617556, |
|
"grad_norm": 1.7700562477111816, |
|
"learning_rate": 9.721969642632865e-05, |
|
"loss": 1.5956, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.30474268415741673, |
|
"grad_norm": 1.5900918245315552, |
|
"learning_rate": 9.716456814922196e-05, |
|
"loss": 1.5717, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 0.3067608476286579, |
|
"grad_norm": 1.5321190357208252, |
|
"learning_rate": 9.710891462120141e-05, |
|
"loss": 1.6783, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 0.3087790110998991, |
|
"grad_norm": 1.8479689359664917, |
|
"learning_rate": 9.70527364620557e-05, |
|
"loss": 1.7176, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 0.31079717457114026, |
|
"grad_norm": 1.5251414775848389, |
|
"learning_rate": 9.699603429741615e-05, |
|
"loss": 1.6911, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 0.31281533804238143, |
|
"grad_norm": 1.658829927444458, |
|
"learning_rate": 9.693880875874961e-05, |
|
"loss": 1.8104, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 0.3148335015136226, |
|
"grad_norm": 1.8885022401809692, |
|
"learning_rate": 9.68810604833516e-05, |
|
"loss": 1.5729, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 0.3168516649848638, |
|
"grad_norm": 1.5589392185211182, |
|
"learning_rate": 9.682279011433908e-05, |
|
"loss": 1.7939, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 0.31886982845610495, |
|
"grad_norm": 1.6108678579330444, |
|
"learning_rate": 9.676399830064339e-05, |
|
"loss": 1.5793, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 0.32088799192734613, |
|
"grad_norm": 1.651929259300232, |
|
"learning_rate": 9.670468569700288e-05, |
|
"loss": 1.6892, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 0.3229061553985873, |
|
"grad_norm": 1.899056077003479, |
|
"learning_rate": 9.664485296395578e-05, |
|
"loss": 1.5486, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.3249243188698285, |
|
"grad_norm": 1.6485919952392578, |
|
"learning_rate": 9.658450076783274e-05, |
|
"loss": 1.6536, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 0.32694248234106965, |
|
"grad_norm": 1.6275869607925415, |
|
"learning_rate": 9.652362978074947e-05, |
|
"loss": 1.6136, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 0.32896064581231077, |
|
"grad_norm": 1.6739528179168701, |
|
"learning_rate": 9.646224068059917e-05, |
|
"loss": 1.7081, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 0.33097880928355194, |
|
"grad_norm": 1.6004359722137451, |
|
"learning_rate": 9.640033415104508e-05, |
|
"loss": 1.6391, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 0.3329969727547931, |
|
"grad_norm": 1.5969157218933105, |
|
"learning_rate": 9.633791088151283e-05, |
|
"loss": 1.5738, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 0.3350151362260343, |
|
"grad_norm": 1.7182707786560059, |
|
"learning_rate": 9.627497156718271e-05, |
|
"loss": 1.7195, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 0.33703329969727547, |
|
"grad_norm": 1.680814504623413, |
|
"learning_rate": 9.621151690898203e-05, |
|
"loss": 1.626, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 0.33905146316851664, |
|
"grad_norm": 2.010774612426758, |
|
"learning_rate": 9.614754761357718e-05, |
|
"loss": 1.8037, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 0.3410696266397578, |
|
"grad_norm": 1.6631429195404053, |
|
"learning_rate": 9.608306439336592e-05, |
|
"loss": 1.7439, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 0.343087790110999, |
|
"grad_norm": 1.5902904272079468, |
|
"learning_rate": 9.60180679664693e-05, |
|
"loss": 1.6691, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.34510595358224017, |
|
"grad_norm": 1.7272005081176758, |
|
"learning_rate": 9.595255905672377e-05, |
|
"loss": 1.5521, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 0.34712411705348134, |
|
"grad_norm": 1.6618432998657227, |
|
"learning_rate": 9.588653839367302e-05, |
|
"loss": 1.5553, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 0.3491422805247225, |
|
"grad_norm": 1.6251617670059204, |
|
"learning_rate": 9.582000671256e-05, |
|
"loss": 1.6653, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 0.3511604439959637, |
|
"grad_norm": 2.123147487640381, |
|
"learning_rate": 9.575296475431855e-05, |
|
"loss": 1.6303, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 0.35317860746720486, |
|
"grad_norm": 1.7185384035110474, |
|
"learning_rate": 9.568541326556527e-05, |
|
"loss": 1.5665, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 0.35519677093844604, |
|
"grad_norm": 1.6353096961975098, |
|
"learning_rate": 9.56173529985912e-05, |
|
"loss": 1.6406, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 0.35721493440968716, |
|
"grad_norm": 1.5363441705703735, |
|
"learning_rate": 9.554878471135339e-05, |
|
"loss": 1.6211, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 0.35923309788092833, |
|
"grad_norm": 1.7037582397460938, |
|
"learning_rate": 9.547970916746649e-05, |
|
"loss": 1.617, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 0.3612512613521695, |
|
"grad_norm": 1.7400474548339844, |
|
"learning_rate": 9.541012713619428e-05, |
|
"loss": 1.5177, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 0.3632694248234107, |
|
"grad_norm": 1.3855012655258179, |
|
"learning_rate": 9.5340039392441e-05, |
|
"loss": 1.5964, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.36528758829465185, |
|
"grad_norm": 1.669772744178772, |
|
"learning_rate": 9.526944671674286e-05, |
|
"loss": 1.5476, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 0.36730575176589303, |
|
"grad_norm": 1.547868251800537, |
|
"learning_rate": 9.51983498952592e-05, |
|
"loss": 1.7272, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 0.3693239152371342, |
|
"grad_norm": 1.9002543687820435, |
|
"learning_rate": 9.512674971976385e-05, |
|
"loss": 1.4606, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 0.3713420787083754, |
|
"grad_norm": 1.597686767578125, |
|
"learning_rate": 9.505464698763629e-05, |
|
"loss": 1.5597, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 0.37336024217961655, |
|
"grad_norm": 1.425994634628296, |
|
"learning_rate": 9.49820425018527e-05, |
|
"loss": 1.5451, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 0.3753784056508577, |
|
"grad_norm": 2.0066096782684326, |
|
"learning_rate": 9.49089370709771e-05, |
|
"loss": 1.6121, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 0.3773965691220989, |
|
"grad_norm": 1.5784003734588623, |
|
"learning_rate": 9.483533150915229e-05, |
|
"loss": 1.6218, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 0.3794147325933401, |
|
"grad_norm": 1.843019723892212, |
|
"learning_rate": 9.476122663609086e-05, |
|
"loss": 1.6694, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 0.38143289606458125, |
|
"grad_norm": 1.7118667364120483, |
|
"learning_rate": 9.468662327706594e-05, |
|
"loss": 1.5564, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 0.3834510595358224, |
|
"grad_norm": 1.6396056413650513, |
|
"learning_rate": 9.461152226290212e-05, |
|
"loss": 1.6626, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.3854692230070636, |
|
"grad_norm": 1.4227858781814575, |
|
"learning_rate": 9.453592442996614e-05, |
|
"loss": 1.5375, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 0.3874873864783047, |
|
"grad_norm": 1.6100040674209595, |
|
"learning_rate": 9.445983062015761e-05, |
|
"loss": 1.4965, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 0.3895055499495459, |
|
"grad_norm": 1.6516296863555908, |
|
"learning_rate": 9.43832416808996e-05, |
|
"loss": 1.5941, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 0.39152371342078707, |
|
"grad_norm": 1.6929583549499512, |
|
"learning_rate": 9.430615846512923e-05, |
|
"loss": 1.6325, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 0.39354187689202824, |
|
"grad_norm": 1.5503323078155518, |
|
"learning_rate": 9.422858183128808e-05, |
|
"loss": 1.6673, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 0.3955600403632694, |
|
"grad_norm": 1.7053431272506714, |
|
"learning_rate": 9.415051264331285e-05, |
|
"loss": 1.4846, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 0.3975782038345106, |
|
"grad_norm": 1.7111166715621948, |
|
"learning_rate": 9.407195177062549e-05, |
|
"loss": 1.598, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 0.39959636730575177, |
|
"grad_norm": 1.728541374206543, |
|
"learning_rate": 9.399290008812365e-05, |
|
"loss": 1.4917, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 0.40161453077699294, |
|
"grad_norm": 1.6542996168136597, |
|
"learning_rate": 9.391335847617093e-05, |
|
"loss": 1.6358, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 0.4036326942482341, |
|
"grad_norm": 1.4176194667816162, |
|
"learning_rate": 9.383332782058705e-05, |
|
"loss": 1.6766, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.4056508577194753, |
|
"grad_norm": 1.4830904006958008, |
|
"learning_rate": 9.375280901263796e-05, |
|
"loss": 1.5902, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 0.40766902119071646, |
|
"grad_norm": 1.6302123069763184, |
|
"learning_rate": 9.367180294902603e-05, |
|
"loss": 1.613, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 0.40968718466195764, |
|
"grad_norm": 1.4098937511444092, |
|
"learning_rate": 9.359031053187988e-05, |
|
"loss": 1.5243, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 0.4117053481331988, |
|
"grad_norm": 2.8872201442718506, |
|
"learning_rate": 9.350833266874451e-05, |
|
"loss": 1.5927, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 0.41372351160444, |
|
"grad_norm": 2.0234012603759766, |
|
"learning_rate": 9.342587027257104e-05, |
|
"loss": 1.7196, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 0.4157416750756811, |
|
"grad_norm": 1.7716432809829712, |
|
"learning_rate": 9.334292426170672e-05, |
|
"loss": 1.6426, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 0.4177598385469223, |
|
"grad_norm": 1.5563617944717407, |
|
"learning_rate": 9.325949555988452e-05, |
|
"loss": 1.6418, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 0.41977800201816345, |
|
"grad_norm": 1.4168179035186768, |
|
"learning_rate": 9.317558509621296e-05, |
|
"loss": 1.6293, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 0.42179616548940463, |
|
"grad_norm": 1.492793321609497, |
|
"learning_rate": 9.309119380516573e-05, |
|
"loss": 1.5355, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 0.4238143289606458, |
|
"grad_norm": 1.7277498245239258, |
|
"learning_rate": 9.300632262657128e-05, |
|
"loss": 1.6541, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.425832492431887, |
|
"grad_norm": 1.7189927101135254, |
|
"learning_rate": 9.292097250560232e-05, |
|
"loss": 1.7026, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 0.42785065590312815, |
|
"grad_norm": 1.4795873165130615, |
|
"learning_rate": 9.283514439276539e-05, |
|
"loss": 1.5959, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 0.4298688193743693, |
|
"grad_norm": 1.5818445682525635, |
|
"learning_rate": 9.274883924389018e-05, |
|
"loss": 1.6147, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 0.4318869828456105, |
|
"grad_norm": 1.4752700328826904, |
|
"learning_rate": 9.266205802011892e-05, |
|
"loss": 1.6281, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 0.4339051463168517, |
|
"grad_norm": 1.4389899969100952, |
|
"learning_rate": 9.257480168789565e-05, |
|
"loss": 1.5655, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 0.43592330978809285, |
|
"grad_norm": 1.4133797883987427, |
|
"learning_rate": 9.248707121895555e-05, |
|
"loss": 1.6437, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 0.437941473259334, |
|
"grad_norm": 1.9490972757339478, |
|
"learning_rate": 9.239886759031398e-05, |
|
"loss": 1.4673, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 0.4399596367305752, |
|
"grad_norm": 1.6025413274765015, |
|
"learning_rate": 9.231019178425573e-05, |
|
"loss": 1.6627, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 0.4419778002018164, |
|
"grad_norm": 1.5877257585525513, |
|
"learning_rate": 9.222104478832398e-05, |
|
"loss": 1.5995, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 0.4439959636730575, |
|
"grad_norm": 1.5336265563964844, |
|
"learning_rate": 9.213142759530936e-05, |
|
"loss": 1.5419, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.44601412714429867, |
|
"grad_norm": 1.4506264925003052, |
|
"learning_rate": 9.204134120323883e-05, |
|
"loss": 1.6579, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 0.44803229061553984, |
|
"grad_norm": 1.896984338760376, |
|
"learning_rate": 9.195078661536471e-05, |
|
"loss": 1.5389, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 0.450050454086781, |
|
"grad_norm": 1.4249799251556396, |
|
"learning_rate": 9.185976484015333e-05, |
|
"loss": 1.5563, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 0.4520686175580222, |
|
"grad_norm": 1.5877562761306763, |
|
"learning_rate": 9.176827689127389e-05, |
|
"loss": 1.6289, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 0.45408678102926336, |
|
"grad_norm": 1.5677759647369385, |
|
"learning_rate": 9.167632378758719e-05, |
|
"loss": 1.4721, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 0.45610494450050454, |
|
"grad_norm": 1.6074806451797485, |
|
"learning_rate": 9.158390655313422e-05, |
|
"loss": 1.6101, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 0.4581231079717457, |
|
"grad_norm": 1.6484979391098022, |
|
"learning_rate": 9.149102621712482e-05, |
|
"loss": 1.51, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 0.4601412714429869, |
|
"grad_norm": 1.7982864379882812, |
|
"learning_rate": 9.139768381392616e-05, |
|
"loss": 1.6383, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 0.46215943491422806, |
|
"grad_norm": 2.003589391708374, |
|
"learning_rate": 9.130388038305127e-05, |
|
"loss": 1.6297, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 0.46417759838546924, |
|
"grad_norm": 1.659283995628357, |
|
"learning_rate": 9.12096169691474e-05, |
|
"loss": 1.6508, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.4661957618567104, |
|
"grad_norm": 1.4378119707107544, |
|
"learning_rate": 9.111489462198448e-05, |
|
"loss": 1.5852, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 0.4682139253279516, |
|
"grad_norm": 1.7221019268035889, |
|
"learning_rate": 9.101971439644335e-05, |
|
"loss": 1.5671, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 0.47023208879919276, |
|
"grad_norm": 1.6366978883743286, |
|
"learning_rate": 9.092407735250404e-05, |
|
"loss": 1.6594, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 0.4722502522704339, |
|
"grad_norm": 1.7291353940963745, |
|
"learning_rate": 9.082798455523396e-05, |
|
"loss": 1.5025, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 0.47426841574167505, |
|
"grad_norm": 1.6143367290496826, |
|
"learning_rate": 9.073143707477607e-05, |
|
"loss": 1.6939, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 0.47628657921291623, |
|
"grad_norm": 1.5067676305770874, |
|
"learning_rate": 9.063443598633688e-05, |
|
"loss": 1.5281, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 0.4783047426841574, |
|
"grad_norm": 1.5984981060028076, |
|
"learning_rate": 9.053698237017459e-05, |
|
"loss": 1.5932, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 0.4803229061553986, |
|
"grad_norm": 1.7212659120559692, |
|
"learning_rate": 9.043907731158699e-05, |
|
"loss": 1.5234, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 0.48234106962663975, |
|
"grad_norm": 1.4098666906356812, |
|
"learning_rate": 9.034072190089932e-05, |
|
"loss": 1.5491, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 0.4843592330978809, |
|
"grad_norm": 1.656089186668396, |
|
"learning_rate": 9.02419172334523e-05, |
|
"loss": 1.4821, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.4863773965691221, |
|
"grad_norm": 1.4160220623016357, |
|
"learning_rate": 9.014266440958974e-05, |
|
"loss": 1.6342, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 0.4883955600403633, |
|
"grad_norm": 1.7586220502853394, |
|
"learning_rate": 9.004296453464638e-05, |
|
"loss": 1.5503, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 0.49041372351160445, |
|
"grad_norm": 1.5569350719451904, |
|
"learning_rate": 8.994281871893562e-05, |
|
"loss": 1.5558, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 0.4924318869828456, |
|
"grad_norm": 1.9452998638153076, |
|
"learning_rate": 8.984222807773706e-05, |
|
"loss": 1.6341, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 0.4944500504540868, |
|
"grad_norm": 1.6458998918533325, |
|
"learning_rate": 8.974119373128411e-05, |
|
"loss": 1.5798, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 0.496468213925328, |
|
"grad_norm": 1.7907352447509766, |
|
"learning_rate": 8.963971680475161e-05, |
|
"loss": 1.499, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 0.49848637739656915, |
|
"grad_norm": 1.6107879877090454, |
|
"learning_rate": 8.95377984282431e-05, |
|
"loss": 1.5705, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 0.5005045408678103, |
|
"grad_norm": 1.5708277225494385, |
|
"learning_rate": 8.943543973677846e-05, |
|
"loss": 1.6062, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 0.5025227043390514, |
|
"grad_norm": 1.5355361700057983, |
|
"learning_rate": 8.933264187028109e-05, |
|
"loss": 1.6155, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 0.5045408678102926, |
|
"grad_norm": 2.4249730110168457, |
|
"learning_rate": 8.922940597356532e-05, |
|
"loss": 1.4927, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.5065590312815338, |
|
"grad_norm": 1.5792551040649414, |
|
"learning_rate": 8.912573319632367e-05, |
|
"loss": 1.6917, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 0.508577194752775, |
|
"grad_norm": 1.5651421546936035, |
|
"learning_rate": 8.90216246931139e-05, |
|
"loss": 1.5866, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 0.5105953582240161, |
|
"grad_norm": 1.485982060432434, |
|
"learning_rate": 8.891708162334635e-05, |
|
"loss": 1.5548, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 0.5126135216952573, |
|
"grad_norm": 1.4294884204864502, |
|
"learning_rate": 8.88121051512709e-05, |
|
"loss": 1.5208, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 0.5146316851664985, |
|
"grad_norm": 1.7587571144104004, |
|
"learning_rate": 8.870669644596402e-05, |
|
"loss": 1.5843, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 0.5166498486377397, |
|
"grad_norm": 1.257310152053833, |
|
"learning_rate": 8.860085668131582e-05, |
|
"loss": 1.437, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 0.5186680121089808, |
|
"grad_norm": 1.6487629413604736, |
|
"learning_rate": 8.84945870360169e-05, |
|
"loss": 1.6055, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 0.520686175580222, |
|
"grad_norm": 1.415015459060669, |
|
"learning_rate": 8.838788869354522e-05, |
|
"loss": 1.509, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 0.5227043390514632, |
|
"grad_norm": 1.5729633569717407, |
|
"learning_rate": 8.828076284215301e-05, |
|
"loss": 1.4648, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 0.5247225025227044, |
|
"grad_norm": 2.0716793537139893, |
|
"learning_rate": 8.817321067485343e-05, |
|
"loss": 1.6064, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.5267406659939455, |
|
"grad_norm": 1.4554495811462402, |
|
"learning_rate": 8.806523338940736e-05, |
|
"loss": 1.6376, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 0.5287588294651867, |
|
"grad_norm": 1.7924822568893433, |
|
"learning_rate": 8.795683218831001e-05, |
|
"loss": 1.6513, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 0.5307769929364279, |
|
"grad_norm": 1.3876484632492065, |
|
"learning_rate": 8.78480082787776e-05, |
|
"loss": 1.6005, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 0.5327951564076691, |
|
"grad_norm": 1.5425324440002441, |
|
"learning_rate": 8.773876287273377e-05, |
|
"loss": 1.6121, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 0.5348133198789102, |
|
"grad_norm": 1.5110645294189453, |
|
"learning_rate": 8.762909718679629e-05, |
|
"loss": 1.5611, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 0.5368314833501514, |
|
"grad_norm": 1.580310583114624, |
|
"learning_rate": 8.751901244226332e-05, |
|
"loss": 1.636, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 0.5388496468213926, |
|
"grad_norm": 1.5676929950714111, |
|
"learning_rate": 8.740850986509994e-05, |
|
"loss": 1.4185, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 0.5408678102926338, |
|
"grad_norm": 1.6521614789962769, |
|
"learning_rate": 8.729759068592442e-05, |
|
"loss": 1.5195, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 0.5428859737638748, |
|
"grad_norm": 1.6536593437194824, |
|
"learning_rate": 8.718625613999457e-05, |
|
"loss": 1.6065, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 0.544904137235116, |
|
"grad_norm": 1.223604679107666, |
|
"learning_rate": 8.70745074671939e-05, |
|
"loss": 1.5292, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.5469223007063572, |
|
"grad_norm": 1.5844485759735107, |
|
"learning_rate": 8.696234591201793e-05, |
|
"loss": 1.5145, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 0.5489404641775983, |
|
"grad_norm": 1.4692803621292114, |
|
"learning_rate": 8.684977272356024e-05, |
|
"loss": 1.5126, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 0.5509586276488395, |
|
"grad_norm": 1.509020209312439, |
|
"learning_rate": 8.673678915549855e-05, |
|
"loss": 1.6746, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 0.5529767911200807, |
|
"grad_norm": 1.6188277006149292, |
|
"learning_rate": 8.662339646608089e-05, |
|
"loss": 1.5323, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 0.5549949545913219, |
|
"grad_norm": 1.4918463230133057, |
|
"learning_rate": 8.650959591811141e-05, |
|
"loss": 1.5413, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 0.557013118062563, |
|
"grad_norm": 1.5165650844573975, |
|
"learning_rate": 8.639538877893644e-05, |
|
"loss": 1.4788, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 0.5590312815338042, |
|
"grad_norm": 1.4886066913604736, |
|
"learning_rate": 8.628077632043032e-05, |
|
"loss": 1.5158, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 0.5610494450050454, |
|
"grad_norm": 1.6249706745147705, |
|
"learning_rate": 8.616575981898125e-05, |
|
"loss": 1.4793, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 0.5630676084762866, |
|
"grad_norm": 1.3315315246582031, |
|
"learning_rate": 8.605034055547709e-05, |
|
"loss": 1.4828, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 0.5650857719475277, |
|
"grad_norm": 1.448897123336792, |
|
"learning_rate": 8.593451981529108e-05, |
|
"loss": 1.5655, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.5671039354187689, |
|
"grad_norm": 1.4807590246200562, |
|
"learning_rate": 8.581829888826754e-05, |
|
"loss": 1.5999, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 0.5691220988900101, |
|
"grad_norm": 1.7087481021881104, |
|
"learning_rate": 8.570167906870745e-05, |
|
"loss": 1.591, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 0.5711402623612513, |
|
"grad_norm": 1.6135573387145996, |
|
"learning_rate": 8.558466165535411e-05, |
|
"loss": 1.6063, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 0.5731584258324924, |
|
"grad_norm": 1.5271415710449219, |
|
"learning_rate": 8.546724795137865e-05, |
|
"loss": 1.5007, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 0.5751765893037336, |
|
"grad_norm": 1.5794763565063477, |
|
"learning_rate": 8.534943926436554e-05, |
|
"loss": 1.5415, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 0.5771947527749748, |
|
"grad_norm": 1.4942843914031982, |
|
"learning_rate": 8.523123690629791e-05, |
|
"loss": 1.5891, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 0.579212916246216, |
|
"grad_norm": 1.5353363752365112, |
|
"learning_rate": 8.511264219354313e-05, |
|
"loss": 1.4995, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 0.5812310797174571, |
|
"grad_norm": 1.5738115310668945, |
|
"learning_rate": 8.4993656446838e-05, |
|
"loss": 1.5277, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 0.5832492431886983, |
|
"grad_norm": 1.4808515310287476, |
|
"learning_rate": 8.48742809912741e-05, |
|
"loss": 1.5682, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 0.5852674066599395, |
|
"grad_norm": 1.5826464891433716, |
|
"learning_rate": 8.475451715628302e-05, |
|
"loss": 1.4706, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.5872855701311807, |
|
"grad_norm": 1.5062007904052734, |
|
"learning_rate": 8.463436627562158e-05, |
|
"loss": 1.6083, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 0.5893037336024218, |
|
"grad_norm": 1.56938898563385, |
|
"learning_rate": 8.451382968735693e-05, |
|
"loss": 1.4747, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 0.591321897073663, |
|
"grad_norm": 1.4118072986602783, |
|
"learning_rate": 8.43929087338517e-05, |
|
"loss": 1.4768, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 0.5933400605449042, |
|
"grad_norm": 1.5036567449569702, |
|
"learning_rate": 8.4271604761749e-05, |
|
"loss": 1.4414, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 0.5953582240161454, |
|
"grad_norm": 1.4589097499847412, |
|
"learning_rate": 8.414991912195747e-05, |
|
"loss": 1.5648, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 0.5973763874873865, |
|
"grad_norm": 1.522581934928894, |
|
"learning_rate": 8.402785316963618e-05, |
|
"loss": 1.4947, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 0.5993945509586277, |
|
"grad_norm": 1.5915330648422241, |
|
"learning_rate": 8.390540826417964e-05, |
|
"loss": 1.474, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 0.6014127144298688, |
|
"grad_norm": 1.5158594846725464, |
|
"learning_rate": 8.378258576920253e-05, |
|
"loss": 1.5417, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 0.6034308779011099, |
|
"grad_norm": 1.5189268589019775, |
|
"learning_rate": 8.365938705252459e-05, |
|
"loss": 1.4829, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 0.6054490413723511, |
|
"grad_norm": 1.3481444120407104, |
|
"learning_rate": 8.353581348615538e-05, |
|
"loss": 1.5622, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.6074672048435923, |
|
"grad_norm": 1.482692003250122, |
|
"learning_rate": 8.341186644627901e-05, |
|
"loss": 1.6286, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 0.6094853683148335, |
|
"grad_norm": 1.5743776559829712, |
|
"learning_rate": 8.32875473132388e-05, |
|
"loss": 1.4813, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 0.6115035317860746, |
|
"grad_norm": 1.3886172771453857, |
|
"learning_rate": 8.316285747152189e-05, |
|
"loss": 1.4297, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 0.6135216952573158, |
|
"grad_norm": 1.558706283569336, |
|
"learning_rate": 8.30377983097438e-05, |
|
"loss": 1.4639, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 0.615539858728557, |
|
"grad_norm": 1.537921667098999, |
|
"learning_rate": 8.291237122063309e-05, |
|
"loss": 1.5532, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 0.6175580221997982, |
|
"grad_norm": 1.2276337146759033, |
|
"learning_rate": 8.27865776010157e-05, |
|
"loss": 1.4639, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 0.6195761856710393, |
|
"grad_norm": 1.4864561557769775, |
|
"learning_rate": 8.266041885179949e-05, |
|
"loss": 1.436, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 0.6215943491422805, |
|
"grad_norm": 1.7501429319381714, |
|
"learning_rate": 8.253389637795858e-05, |
|
"loss": 1.4653, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 0.6236125126135217, |
|
"grad_norm": 1.4437967538833618, |
|
"learning_rate": 8.240701158851778e-05, |
|
"loss": 1.4266, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 0.6256306760847629, |
|
"grad_norm": 1.3351330757141113, |
|
"learning_rate": 8.227976589653676e-05, |
|
"loss": 1.4005, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.627648839556004, |
|
"grad_norm": 1.4452919960021973, |
|
"learning_rate": 8.215216071909448e-05, |
|
"loss": 1.5651, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 0.6296670030272452, |
|
"grad_norm": 1.4321256875991821, |
|
"learning_rate": 8.202419747727333e-05, |
|
"loss": 1.4941, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 0.6316851664984864, |
|
"grad_norm": 1.3962360620498657, |
|
"learning_rate": 8.189587759614325e-05, |
|
"loss": 1.4671, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 0.6337033299697276, |
|
"grad_norm": 1.4179112911224365, |
|
"learning_rate": 8.176720250474594e-05, |
|
"loss": 1.4636, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 0.6357214934409687, |
|
"grad_norm": 1.4552099704742432, |
|
"learning_rate": 8.163817363607894e-05, |
|
"loss": 1.5253, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 0.6377396569122099, |
|
"grad_norm": 1.6266930103302002, |
|
"learning_rate": 8.150879242707962e-05, |
|
"loss": 1.4704, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 0.6397578203834511, |
|
"grad_norm": 1.480033278465271, |
|
"learning_rate": 8.137906031860925e-05, |
|
"loss": 1.5921, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 0.6417759838546923, |
|
"grad_norm": 1.4520894289016724, |
|
"learning_rate": 8.124897875543684e-05, |
|
"loss": 1.4444, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 0.6437941473259334, |
|
"grad_norm": 1.3743687868118286, |
|
"learning_rate": 8.111854918622321e-05, |
|
"loss": 1.6175, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 0.6458123107971746, |
|
"grad_norm": 1.5462607145309448, |
|
"learning_rate": 8.098777306350469e-05, |
|
"loss": 1.4526, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.6478304742684158, |
|
"grad_norm": 1.4813790321350098, |
|
"learning_rate": 8.08566518436771e-05, |
|
"loss": 1.5253, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 0.649848637739657, |
|
"grad_norm": 1.5957119464874268, |
|
"learning_rate": 8.072518698697938e-05, |
|
"loss": 1.4505, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 0.6518668012108981, |
|
"grad_norm": 1.5894341468811035, |
|
"learning_rate": 8.059337995747743e-05, |
|
"loss": 1.4643, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 0.6538849646821393, |
|
"grad_norm": 1.4520728588104248, |
|
"learning_rate": 8.046123222304781e-05, |
|
"loss": 1.5529, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 0.6559031281533805, |
|
"grad_norm": 1.379225730895996, |
|
"learning_rate": 8.032874525536131e-05, |
|
"loss": 1.4944, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 0.6579212916246215, |
|
"grad_norm": 1.4799381494522095, |
|
"learning_rate": 8.019592052986665e-05, |
|
"loss": 1.3809, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 0.6599394550958627, |
|
"grad_norm": 1.3304007053375244, |
|
"learning_rate": 8.006275952577397e-05, |
|
"loss": 1.5455, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 0.6619576185671039, |
|
"grad_norm": 1.4968523979187012, |
|
"learning_rate": 7.992926372603842e-05, |
|
"loss": 1.4879, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 0.6639757820383451, |
|
"grad_norm": 1.582527756690979, |
|
"learning_rate": 7.979543461734362e-05, |
|
"loss": 1.4796, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 0.6659939455095862, |
|
"grad_norm": 1.3123412132263184, |
|
"learning_rate": 7.966127369008512e-05, |
|
"loss": 1.4623, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.6680121089808274, |
|
"grad_norm": 1.3255366086959839, |
|
"learning_rate": 7.952678243835376e-05, |
|
"loss": 1.4871, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 0.6700302724520686, |
|
"grad_norm": 1.375701904296875, |
|
"learning_rate": 7.939196235991904e-05, |
|
"loss": 1.49, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 0.6720484359233098, |
|
"grad_norm": 1.5660688877105713, |
|
"learning_rate": 7.925681495621253e-05, |
|
"loss": 1.5556, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 0.6740665993945509, |
|
"grad_norm": 1.2856935262680054, |
|
"learning_rate": 7.912134173231098e-05, |
|
"loss": 1.4971, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 0.6760847628657921, |
|
"grad_norm": 1.7174125909805298, |
|
"learning_rate": 7.898554419691974e-05, |
|
"loss": 1.506, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 0.6781029263370333, |
|
"grad_norm": 1.4166866540908813, |
|
"learning_rate": 7.884942386235582e-05, |
|
"loss": 1.367, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 0.6801210898082745, |
|
"grad_norm": 1.3765437602996826, |
|
"learning_rate": 7.871298224453113e-05, |
|
"loss": 1.4017, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 0.6821392532795156, |
|
"grad_norm": 1.4745761156082153, |
|
"learning_rate": 7.857622086293557e-05, |
|
"loss": 1.6014, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 0.6841574167507568, |
|
"grad_norm": 1.5563029050827026, |
|
"learning_rate": 7.843914124062006e-05, |
|
"loss": 1.4713, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 0.686175580221998, |
|
"grad_norm": 1.6175247430801392, |
|
"learning_rate": 7.830174490417972e-05, |
|
"loss": 1.5117, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.6881937436932392, |
|
"grad_norm": 1.4464702606201172, |
|
"learning_rate": 7.816403338373666e-05, |
|
"loss": 1.4251, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 0.6902119071644803, |
|
"grad_norm": 1.3877936601638794, |
|
"learning_rate": 7.802600821292314e-05, |
|
"loss": 1.3907, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 0.6922300706357215, |
|
"grad_norm": 1.4214582443237305, |
|
"learning_rate": 7.78876709288644e-05, |
|
"loss": 1.4608, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 0.6942482341069627, |
|
"grad_norm": 1.3867719173431396, |
|
"learning_rate": 7.774902307216148e-05, |
|
"loss": 1.5583, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 0.6962663975782039, |
|
"grad_norm": 1.345284104347229, |
|
"learning_rate": 7.76100661868742e-05, |
|
"loss": 1.5107, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 0.698284561049445, |
|
"grad_norm": 1.4984257221221924, |
|
"learning_rate": 7.747080182050388e-05, |
|
"loss": 1.4186, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 0.7003027245206862, |
|
"grad_norm": 1.4104434251785278, |
|
"learning_rate": 7.733123152397609e-05, |
|
"loss": 1.4989, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 0.7023208879919274, |
|
"grad_norm": 1.3834434747695923, |
|
"learning_rate": 7.719135685162342e-05, |
|
"loss": 1.4089, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 0.7043390514631686, |
|
"grad_norm": 1.4428340196609497, |
|
"learning_rate": 7.705117936116822e-05, |
|
"loss": 1.5516, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 0.7063572149344097, |
|
"grad_norm": 1.5729140043258667, |
|
"learning_rate": 7.691070061370507e-05, |
|
"loss": 1.5622, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.7083753784056509, |
|
"grad_norm": 1.3539918661117554, |
|
"learning_rate": 7.676992217368364e-05, |
|
"loss": 1.4938, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 0.7103935418768921, |
|
"grad_norm": 1.540166974067688, |
|
"learning_rate": 7.662884560889105e-05, |
|
"loss": 1.3785, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 0.7124117053481333, |
|
"grad_norm": 1.3016571998596191, |
|
"learning_rate": 7.648747249043457e-05, |
|
"loss": 1.5543, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 0.7144298688193743, |
|
"grad_norm": 1.4095804691314697, |
|
"learning_rate": 7.634580439272401e-05, |
|
"loss": 1.5495, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 0.7164480322906155, |
|
"grad_norm": 1.280922770500183, |
|
"learning_rate": 7.620384289345425e-05, |
|
"loss": 1.5104, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 0.7184661957618567, |
|
"grad_norm": 1.3793776035308838, |
|
"learning_rate": 7.606158957358769e-05, |
|
"loss": 1.5002, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 0.7204843592330978, |
|
"grad_norm": 1.5305780172348022, |
|
"learning_rate": 7.591904601733655e-05, |
|
"loss": 1.5148, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 0.722502522704339, |
|
"grad_norm": 1.3183702230453491, |
|
"learning_rate": 7.577621381214529e-05, |
|
"loss": 1.4927, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 0.7245206861755802, |
|
"grad_norm": 1.2844158411026, |
|
"learning_rate": 7.563309454867295e-05, |
|
"loss": 1.4843, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 0.7265388496468214, |
|
"grad_norm": 1.252433180809021, |
|
"learning_rate": 7.548968982077542e-05, |
|
"loss": 1.4047, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.7285570131180625, |
|
"grad_norm": 1.1570249795913696, |
|
"learning_rate": 7.534600122548765e-05, |
|
"loss": 1.4062, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 0.7305751765893037, |
|
"grad_norm": 1.3853447437286377, |
|
"learning_rate": 7.520203036300588e-05, |
|
"loss": 1.4593, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 0.7325933400605449, |
|
"grad_norm": 1.6483482122421265, |
|
"learning_rate": 7.505777883666993e-05, |
|
"loss": 1.6098, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 0.7346115035317861, |
|
"grad_norm": 1.3295645713806152, |
|
"learning_rate": 7.491324825294514e-05, |
|
"loss": 1.4467, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 0.7366296670030272, |
|
"grad_norm": 1.336206078529358, |
|
"learning_rate": 7.476844022140464e-05, |
|
"loss": 1.5218, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 0.7386478304742684, |
|
"grad_norm": 1.2594281435012817, |
|
"learning_rate": 7.462335635471136e-05, |
|
"loss": 1.4216, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 0.7406659939455096, |
|
"grad_norm": 2.950639009475708, |
|
"learning_rate": 7.44779982686001e-05, |
|
"loss": 1.4549, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 0.7426841574167508, |
|
"grad_norm": 1.4663448333740234, |
|
"learning_rate": 7.43323675818595e-05, |
|
"loss": 1.3631, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 0.7447023208879919, |
|
"grad_norm": 1.5686269998550415, |
|
"learning_rate": 7.418646591631404e-05, |
|
"loss": 1.3991, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 0.7467204843592331, |
|
"grad_norm": 1.5072070360183716, |
|
"learning_rate": 7.404029489680598e-05, |
|
"loss": 1.4257, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.7487386478304743, |
|
"grad_norm": 1.391025185585022, |
|
"learning_rate": 7.389385615117723e-05, |
|
"loss": 1.4345, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 0.7507568113017155, |
|
"grad_norm": 1.382051944732666, |
|
"learning_rate": 7.37471513102513e-05, |
|
"loss": 1.4038, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 0.7527749747729566, |
|
"grad_norm": 1.3509782552719116, |
|
"learning_rate": 7.360018200781502e-05, |
|
"loss": 1.4457, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 0.7547931382441978, |
|
"grad_norm": 1.6141653060913086, |
|
"learning_rate": 7.345294988060046e-05, |
|
"loss": 1.5944, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 0.756811301715439, |
|
"grad_norm": 1.4731391668319702, |
|
"learning_rate": 7.330545656826662e-05, |
|
"loss": 1.4548, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 0.7588294651866802, |
|
"grad_norm": 1.3180512189865112, |
|
"learning_rate": 7.315770371338126e-05, |
|
"loss": 1.3972, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 0.7608476286579213, |
|
"grad_norm": 1.3752028942108154, |
|
"learning_rate": 7.300969296140244e-05, |
|
"loss": 1.5337, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 0.7628657921291625, |
|
"grad_norm": 1.5503935813903809, |
|
"learning_rate": 7.286142596066044e-05, |
|
"loss": 1.4658, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 0.7648839556004037, |
|
"grad_norm": 1.3432625532150269, |
|
"learning_rate": 7.271290436233916e-05, |
|
"loss": 1.5027, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 0.7669021190716448, |
|
"grad_norm": 1.6639512777328491, |
|
"learning_rate": 7.25641298204579e-05, |
|
"loss": 1.4577, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.768920282542886, |
|
"grad_norm": 1.5050352811813354, |
|
"learning_rate": 7.241510399185287e-05, |
|
"loss": 1.4345, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 0.7709384460141272, |
|
"grad_norm": 1.6508047580718994, |
|
"learning_rate": 7.226582853615874e-05, |
|
"loss": 1.359, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 0.7729566094853683, |
|
"grad_norm": 1.489529013633728, |
|
"learning_rate": 7.211630511579015e-05, |
|
"loss": 1.4296, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 0.7749747729566094, |
|
"grad_norm": 1.270341396331787, |
|
"learning_rate": 7.196653539592326e-05, |
|
"loss": 1.4177, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 0.7769929364278506, |
|
"grad_norm": 1.2509267330169678, |
|
"learning_rate": 7.181652104447711e-05, |
|
"loss": 1.4425, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 0.7790110998990918, |
|
"grad_norm": 1.4303765296936035, |
|
"learning_rate": 7.166626373209514e-05, |
|
"loss": 1.4735, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 0.781029263370333, |
|
"grad_norm": 1.3523200750350952, |
|
"learning_rate": 7.15157651321265e-05, |
|
"loss": 1.5438, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 0.7830474268415741, |
|
"grad_norm": 1.3462188243865967, |
|
"learning_rate": 7.136502692060746e-05, |
|
"loss": 1.5316, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 0.7850655903128153, |
|
"grad_norm": 1.3250367641448975, |
|
"learning_rate": 7.121405077624276e-05, |
|
"loss": 1.4727, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 0.7870837537840565, |
|
"grad_norm": 1.267572045326233, |
|
"learning_rate": 7.106283838038685e-05, |
|
"loss": 1.4804, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.7891019172552977, |
|
"grad_norm": 1.2590205669403076, |
|
"learning_rate": 7.091139141702527e-05, |
|
"loss": 1.3984, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 0.7911200807265388, |
|
"grad_norm": 1.4484533071517944, |
|
"learning_rate": 7.075971157275575e-05, |
|
"loss": 1.4728, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 0.79313824419778, |
|
"grad_norm": 1.4239338636398315, |
|
"learning_rate": 7.06078005367696e-05, |
|
"loss": 1.4305, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 0.7951564076690212, |
|
"grad_norm": 1.4230101108551025, |
|
"learning_rate": 7.045566000083278e-05, |
|
"loss": 1.5003, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 0.7971745711402624, |
|
"grad_norm": 1.2686034440994263, |
|
"learning_rate": 7.030329165926706e-05, |
|
"loss": 1.4169, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 0.7991927346115035, |
|
"grad_norm": 1.5182725191116333, |
|
"learning_rate": 7.01506972089312e-05, |
|
"loss": 1.5385, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 0.8012108980827447, |
|
"grad_norm": 1.4067586660385132, |
|
"learning_rate": 6.999787834920202e-05, |
|
"loss": 1.3908, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 0.8032290615539859, |
|
"grad_norm": 1.2837873697280884, |
|
"learning_rate": 6.984483678195553e-05, |
|
"loss": 1.4505, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 0.805247225025227, |
|
"grad_norm": 1.4042167663574219, |
|
"learning_rate": 6.969157421154789e-05, |
|
"loss": 1.4897, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 0.8072653884964682, |
|
"grad_norm": 1.6277389526367188, |
|
"learning_rate": 6.95380923447965e-05, |
|
"loss": 1.4117, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.8092835519677094, |
|
"grad_norm": 1.572359323501587, |
|
"learning_rate": 6.938439289096095e-05, |
|
"loss": 1.4158, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 0.8113017154389506, |
|
"grad_norm": 1.5279749631881714, |
|
"learning_rate": 6.923047756172401e-05, |
|
"loss": 1.455, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 0.8133198789101918, |
|
"grad_norm": 1.298711895942688, |
|
"learning_rate": 6.907634807117257e-05, |
|
"loss": 1.3315, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 0.8153380423814329, |
|
"grad_norm": 1.2476575374603271, |
|
"learning_rate": 6.892200613577852e-05, |
|
"loss": 1.514, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 0.8173562058526741, |
|
"grad_norm": 1.289162278175354, |
|
"learning_rate": 6.876745347437964e-05, |
|
"loss": 1.4861, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 0.8193743693239153, |
|
"grad_norm": 1.2558218240737915, |
|
"learning_rate": 6.861269180816052e-05, |
|
"loss": 1.3809, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 0.8213925327951564, |
|
"grad_norm": 1.2769505977630615, |
|
"learning_rate": 6.845772286063332e-05, |
|
"loss": 1.4678, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 0.8234106962663976, |
|
"grad_norm": 1.4346575736999512, |
|
"learning_rate": 6.830254835761856e-05, |
|
"loss": 1.4251, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 0.8254288597376388, |
|
"grad_norm": 1.4318442344665527, |
|
"learning_rate": 6.814717002722602e-05, |
|
"loss": 1.5475, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 0.82744702320888, |
|
"grad_norm": 1.2699110507965088, |
|
"learning_rate": 6.799158959983536e-05, |
|
"loss": 1.4722, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.829465186680121, |
|
"grad_norm": 1.5809247493743896, |
|
"learning_rate": 6.78358088080769e-05, |
|
"loss": 1.5189, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 0.8314833501513622, |
|
"grad_norm": 1.4829879999160767, |
|
"learning_rate": 6.767982938681239e-05, |
|
"loss": 1.5611, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 0.8335015136226034, |
|
"grad_norm": 1.251018762588501, |
|
"learning_rate": 6.752365307311556e-05, |
|
"loss": 1.4698, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 0.8355196770938446, |
|
"grad_norm": 1.1748842000961304, |
|
"learning_rate": 6.736728160625284e-05, |
|
"loss": 1.5476, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 0.8375378405650857, |
|
"grad_norm": 1.4876999855041504, |
|
"learning_rate": 6.721071672766406e-05, |
|
"loss": 1.4378, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 0.8395560040363269, |
|
"grad_norm": 1.4530222415924072, |
|
"learning_rate": 6.705396018094297e-05, |
|
"loss": 1.4794, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 0.8415741675075681, |
|
"grad_norm": 1.2909533977508545, |
|
"learning_rate": 6.689701371181781e-05, |
|
"loss": 1.3877, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 0.8435923309788093, |
|
"grad_norm": 1.4188405275344849, |
|
"learning_rate": 6.673987906813191e-05, |
|
"loss": 1.3629, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 0.8456104944500504, |
|
"grad_norm": 1.498369812965393, |
|
"learning_rate": 6.658255799982424e-05, |
|
"loss": 1.4719, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 0.8476286579212916, |
|
"grad_norm": 1.3397397994995117, |
|
"learning_rate": 6.642505225890987e-05, |
|
"loss": 1.3999, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.8496468213925328, |
|
"grad_norm": 1.3505257368087769, |
|
"learning_rate": 6.626736359946052e-05, |
|
"loss": 1.4824, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 0.851664984863774, |
|
"grad_norm": 1.312658667564392, |
|
"learning_rate": 6.610949377758497e-05, |
|
"loss": 1.4902, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 0.8536831483350151, |
|
"grad_norm": 1.4204723834991455, |
|
"learning_rate": 6.595144455140952e-05, |
|
"loss": 1.4635, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 0.8557013118062563, |
|
"grad_norm": 1.2958680391311646, |
|
"learning_rate": 6.579321768105845e-05, |
|
"loss": 1.4672, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 0.8577194752774975, |
|
"grad_norm": 1.4016082286834717, |
|
"learning_rate": 6.563481492863436e-05, |
|
"loss": 1.4476, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 0.8597376387487387, |
|
"grad_norm": 1.3869260549545288, |
|
"learning_rate": 6.547623805819854e-05, |
|
"loss": 1.4194, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 0.8617558022199798, |
|
"grad_norm": 1.2927767038345337, |
|
"learning_rate": 6.531748883575143e-05, |
|
"loss": 1.4523, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 0.863773965691221, |
|
"grad_norm": 2.418339729309082, |
|
"learning_rate": 6.51585690292128e-05, |
|
"loss": 1.3799, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 0.8657921291624622, |
|
"grad_norm": 1.6442086696624756, |
|
"learning_rate": 6.499948040840219e-05, |
|
"loss": 1.4596, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 0.8678102926337034, |
|
"grad_norm": 1.377209186553955, |
|
"learning_rate": 6.484022474501914e-05, |
|
"loss": 1.4226, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.8698284561049445, |
|
"grad_norm": 1.3288756608963013, |
|
"learning_rate": 6.468080381262347e-05, |
|
"loss": 1.4244, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 0.8718466195761857, |
|
"grad_norm": 1.6183438301086426, |
|
"learning_rate": 6.45212193866155e-05, |
|
"loss": 1.4628, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 0.8738647830474269, |
|
"grad_norm": 1.3636276721954346, |
|
"learning_rate": 6.436147324421635e-05, |
|
"loss": 1.4082, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 0.875882946518668, |
|
"grad_norm": 1.5240586996078491, |
|
"learning_rate": 6.420156716444805e-05, |
|
"loss": 1.4043, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 0.8779011099899092, |
|
"grad_norm": 1.2216293811798096, |
|
"learning_rate": 6.404150292811386e-05, |
|
"loss": 1.3972, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 0.8799192734611504, |
|
"grad_norm": 1.447521448135376, |
|
"learning_rate": 6.388128231777828e-05, |
|
"loss": 1.5133, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 0.8819374369323916, |
|
"grad_norm": 1.3184555768966675, |
|
"learning_rate": 6.372090711774732e-05, |
|
"loss": 1.3777, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 0.8839556004036327, |
|
"grad_norm": 1.3064029216766357, |
|
"learning_rate": 6.356037911404858e-05, |
|
"loss": 1.4235, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 0.8859737638748738, |
|
"grad_norm": 1.252462387084961, |
|
"learning_rate": 6.339970009441137e-05, |
|
"loss": 1.4071, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 0.887991927346115, |
|
"grad_norm": 1.3973218202590942, |
|
"learning_rate": 6.323887184824678e-05, |
|
"loss": 1.397, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.8900100908173562, |
|
"grad_norm": 1.3893166780471802, |
|
"learning_rate": 6.307789616662778e-05, |
|
"loss": 1.3642, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 0.8920282542885973, |
|
"grad_norm": 1.329697847366333, |
|
"learning_rate": 6.291677484226929e-05, |
|
"loss": 1.5611, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 0.8940464177598385, |
|
"grad_norm": 1.4498480558395386, |
|
"learning_rate": 6.275550966950814e-05, |
|
"loss": 1.49, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 0.8960645812310797, |
|
"grad_norm": 1.3522253036499023, |
|
"learning_rate": 6.259410244428318e-05, |
|
"loss": 1.3945, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 0.8980827447023209, |
|
"grad_norm": 1.3933300971984863, |
|
"learning_rate": 6.243255496411519e-05, |
|
"loss": 1.4169, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 0.900100908173562, |
|
"grad_norm": 1.4387352466583252, |
|
"learning_rate": 6.227086902808697e-05, |
|
"loss": 1.5595, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 0.9021190716448032, |
|
"grad_norm": 1.339574933052063, |
|
"learning_rate": 6.210904643682318e-05, |
|
"loss": 1.4787, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 0.9041372351160444, |
|
"grad_norm": 1.3527144193649292, |
|
"learning_rate": 6.194708899247037e-05, |
|
"loss": 1.4132, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 0.9061553985872856, |
|
"grad_norm": 1.471655249595642, |
|
"learning_rate": 6.178499849867689e-05, |
|
"loss": 1.4548, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 0.9081735620585267, |
|
"grad_norm": 1.5146980285644531, |
|
"learning_rate": 6.162277676057284e-05, |
|
"loss": 1.4628, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.9101917255297679, |
|
"grad_norm": 1.2837083339691162, |
|
"learning_rate": 6.146042558474987e-05, |
|
"loss": 1.4305, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 0.9122098890010091, |
|
"grad_norm": 1.2286232709884644, |
|
"learning_rate": 6.129794677924113e-05, |
|
"loss": 1.4211, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 0.9142280524722503, |
|
"grad_norm": 1.3042244911193848, |
|
"learning_rate": 6.113534215350116e-05, |
|
"loss": 1.4328, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 0.9162462159434914, |
|
"grad_norm": 1.2900363206863403, |
|
"learning_rate": 6.097261351838569e-05, |
|
"loss": 1.591, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 0.9182643794147326, |
|
"grad_norm": 1.315226674079895, |
|
"learning_rate": 6.0809762686131474e-05, |
|
"loss": 1.3962, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 0.9202825428859738, |
|
"grad_norm": 1.2552099227905273, |
|
"learning_rate": 6.064679147033614e-05, |
|
"loss": 1.5005, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 0.922300706357215, |
|
"grad_norm": 1.4059544801712036, |
|
"learning_rate": 6.0483701685937954e-05, |
|
"loss": 1.4515, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 0.9243188698284561, |
|
"grad_norm": 1.4410680532455444, |
|
"learning_rate": 6.0320495149195644e-05, |
|
"loss": 1.4045, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 0.9263370332996973, |
|
"grad_norm": 1.1760427951812744, |
|
"learning_rate": 6.015717367766815e-05, |
|
"loss": 1.5034, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 0.9283551967709385, |
|
"grad_norm": 1.3556947708129883, |
|
"learning_rate": 5.999373909019437e-05, |
|
"loss": 1.4571, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.9303733602421796, |
|
"grad_norm": 1.2202001810073853, |
|
"learning_rate": 5.9830193206872974e-05, |
|
"loss": 1.4304, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 0.9323915237134208, |
|
"grad_norm": 1.2118003368377686, |
|
"learning_rate": 5.966653784904207e-05, |
|
"loss": 1.4254, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 0.934409687184662, |
|
"grad_norm": 1.5171030759811401, |
|
"learning_rate": 5.950277483925889e-05, |
|
"loss": 1.4243, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 0.9364278506559032, |
|
"grad_norm": 1.0997145175933838, |
|
"learning_rate": 5.933890600127958e-05, |
|
"loss": 1.4485, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 0.9384460141271443, |
|
"grad_norm": 1.3518075942993164, |
|
"learning_rate": 5.917493316003884e-05, |
|
"loss": 1.4907, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 0.9404641775983855, |
|
"grad_norm": 1.3248131275177002, |
|
"learning_rate": 5.90108581416296e-05, |
|
"loss": 1.4629, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 0.9424823410696267, |
|
"grad_norm": 1.2627878189086914, |
|
"learning_rate": 5.8846682773282694e-05, |
|
"loss": 1.451, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 0.9445005045408678, |
|
"grad_norm": 1.4110251665115356, |
|
"learning_rate": 5.868240888334653e-05, |
|
"loss": 1.4425, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 0.9465186680121089, |
|
"grad_norm": 1.2221981287002563, |
|
"learning_rate": 5.851803830126666e-05, |
|
"loss": 1.5313, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 0.9485368314833501, |
|
"grad_norm": 1.2160431146621704, |
|
"learning_rate": 5.835357285756552e-05, |
|
"loss": 1.3897, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.9505549949545913, |
|
"grad_norm": 1.2338696718215942, |
|
"learning_rate": 5.8189014383821914e-05, |
|
"loss": 1.311, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 0.9525731584258325, |
|
"grad_norm": 1.3585981130599976, |
|
"learning_rate": 5.8024364712650724e-05, |
|
"loss": 1.4082, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 0.9545913218970736, |
|
"grad_norm": 1.922635555267334, |
|
"learning_rate": 5.785962567768243e-05, |
|
"loss": 1.3854, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 0.9566094853683148, |
|
"grad_norm": 1.230394959449768, |
|
"learning_rate": 5.769479911354273e-05, |
|
"loss": 1.4562, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 0.958627648839556, |
|
"grad_norm": 1.2346444129943848, |
|
"learning_rate": 5.7529886855832096e-05, |
|
"loss": 1.5012, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 0.9606458123107972, |
|
"grad_norm": 1.4491231441497803, |
|
"learning_rate": 5.736489074110533e-05, |
|
"loss": 1.2942, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 0.9626639757820383, |
|
"grad_norm": 1.3211175203323364, |
|
"learning_rate": 5.71998126068511e-05, |
|
"loss": 1.3634, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 0.9646821392532795, |
|
"grad_norm": 1.215053915977478, |
|
"learning_rate": 5.7034654291471524e-05, |
|
"loss": 1.434, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 0.9667003027245207, |
|
"grad_norm": 1.1765618324279785, |
|
"learning_rate": 5.686941763426161e-05, |
|
"loss": 1.4677, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 0.9687184661957619, |
|
"grad_norm": 1.3212573528289795, |
|
"learning_rate": 5.670410447538889e-05, |
|
"loss": 1.4113, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.970736629667003, |
|
"grad_norm": 1.455395221710205, |
|
"learning_rate": 5.653871665587278e-05, |
|
"loss": 1.4146, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 0.9727547931382442, |
|
"grad_norm": 1.19629967212677, |
|
"learning_rate": 5.6373256017564215e-05, |
|
"loss": 1.3996, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 0.9747729566094854, |
|
"grad_norm": 1.6926542520523071, |
|
"learning_rate": 5.620772440312508e-05, |
|
"loss": 1.4043, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 0.9767911200807265, |
|
"grad_norm": 1.3891587257385254, |
|
"learning_rate": 5.6042123656007685e-05, |
|
"loss": 1.4503, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 0.9788092835519677, |
|
"grad_norm": 1.368962287902832, |
|
"learning_rate": 5.587645562043422e-05, |
|
"loss": 1.427, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 0.9808274470232089, |
|
"grad_norm": 1.3921308517456055, |
|
"learning_rate": 5.5710722141376245e-05, |
|
"loss": 1.3451, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 0.9828456104944501, |
|
"grad_norm": 1.1947131156921387, |
|
"learning_rate": 5.5544925064534145e-05, |
|
"loss": 1.3041, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 0.9848637739656912, |
|
"grad_norm": 1.3686097860336304, |
|
"learning_rate": 5.537906623631657e-05, |
|
"loss": 1.5366, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 0.9868819374369324, |
|
"grad_norm": 1.3779717683792114, |
|
"learning_rate": 5.521314750381983e-05, |
|
"loss": 1.3769, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 0.9889001009081736, |
|
"grad_norm": 1.2698273658752441, |
|
"learning_rate": 5.5047170714807406e-05, |
|
"loss": 1.3719, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.9909182643794148, |
|
"grad_norm": 1.1771740913391113, |
|
"learning_rate": 5.4881137717689315e-05, |
|
"loss": 1.3579, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 0.992936427850656, |
|
"grad_norm": 1.2798762321472168, |
|
"learning_rate": 5.471505036150154e-05, |
|
"loss": 1.3889, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 0.9949545913218971, |
|
"grad_norm": 1.7384541034698486, |
|
"learning_rate": 5.454891049588544e-05, |
|
"loss": 1.531, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 0.9969727547931383, |
|
"grad_norm": 1.2217923402786255, |
|
"learning_rate": 5.438271997106712e-05, |
|
"loss": 1.406, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 0.9989909182643795, |
|
"grad_norm": 1.1362420320510864, |
|
"learning_rate": 5.421648063783689e-05, |
|
"loss": 1.2914, |
|
"step": 4950 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 9910, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 4955, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 8.387441374475059e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|