diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,47201 +1,11851 @@ { - "best_metric": 0.05167969688773155, - "best_model_checkpoint": "./microsoft_beit-base-patch16-224-pt22k-ft22k_epoch_5/checkpoint-67330", + "best_metric": 0.041273970156908035, + "best_model_checkpoint": "./microsoft_dit/checkpoint-16835", "epoch": 5.0, "eval_steps": 500, - "global_step": 67330, + "global_step": 16835, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0007426110203475419, - "grad_norm": 19.504955291748047, - "learning_rate": 2.9995544333877916e-05, - "loss": 0.4424, + "epoch": 0.00297000297000297, + "grad_norm": 1.4537379741668701, + "learning_rate": 2.9982179982179983e-05, + "loss": 0.3085, "step": 10 }, { - "epoch": 0.0014852220406950838, - "grad_norm": 5.601377964019775, - "learning_rate": 2.999108866775583e-05, - "loss": 0.238, + "epoch": 0.00594000594000594, + "grad_norm": 1.9534900188446045, + "learning_rate": 2.9964359964359965e-05, + "loss": 0.2158, "step": 20 }, { - "epoch": 0.002227833061042626, - "grad_norm": 8.971532821655273, - "learning_rate": 2.9986633001633746e-05, - "loss": 0.2289, + "epoch": 0.00891000891000891, + "grad_norm": 2.2834959030151367, + "learning_rate": 2.9946539946539947e-05, + "loss": 0.1555, "step": 30 }, { - "epoch": 0.0029704440813901676, - "grad_norm": 3.3992252349853516, - "learning_rate": 2.998217733551166e-05, - "loss": 0.2005, + "epoch": 0.01188001188001188, + "grad_norm": 2.4054393768310547, + "learning_rate": 2.992871992871993e-05, + "loss": 0.1099, "step": 40 }, { - "epoch": 0.0037130551017377097, - "grad_norm": 13.932947158813477, - "learning_rate": 2.9977721669389572e-05, - "loss": 0.2329, + "epoch": 0.01485001485001485, + "grad_norm": 1.9460220336914062, + "learning_rate": 2.991089991089991e-05, + "loss": 0.1214, "step": 50 }, { - "epoch": 0.004455666122085252, - "grad_norm": 2.9520821571350098, - "learning_rate": 2.997326600326749e-05, - "loss": 0.2114, + "epoch": 0.01782001782001782, + "grad_norm": 1.1060905456542969, + "learning_rate": 2.9893079893079894e-05, + "loss": 0.1046, "step": 60 }, { - "epoch": 0.005198277142432793, - "grad_norm": 4.807623386383057, - "learning_rate": 2.9968810337145402e-05, - "loss": 0.1468, + "epoch": 0.02079002079002079, + "grad_norm": 1.0159635543823242, + "learning_rate": 2.9875259875259876e-05, + "loss": 0.1055, "step": 70 }, { - "epoch": 0.005940888162780335, - "grad_norm": 4.0346903800964355, - "learning_rate": 2.9964354671023317e-05, - "loss": 0.2179, + "epoch": 0.02376002376002376, + "grad_norm": 2.2551214694976807, + "learning_rate": 2.9857439857439858e-05, + "loss": 0.099, "step": 80 }, { - "epoch": 0.006683499183127877, - "grad_norm": 4.7647199630737305, - "learning_rate": 2.9959899004901236e-05, - "loss": 0.1647, + "epoch": 0.02673002673002673, + "grad_norm": 1.3134405612945557, + "learning_rate": 2.983961983961984e-05, + "loss": 0.0975, "step": 90 }, { - "epoch": 0.007426110203475419, - "grad_norm": 9.930302619934082, - "learning_rate": 2.9955443338779147e-05, - "loss": 0.1436, + "epoch": 0.0297000297000297, + "grad_norm": 1.055510401725769, + "learning_rate": 2.9821799821799822e-05, + "loss": 0.0945, "step": 100 }, { - "epoch": 0.008168721223822962, - "grad_norm": 2.7145912647247314, - "learning_rate": 2.9950987672657062e-05, - "loss": 0.1408, + "epoch": 0.03267003267003267, + "grad_norm": 1.4456548690795898, + "learning_rate": 2.9803979803979805e-05, + "loss": 0.1058, "step": 110 }, { - "epoch": 0.008911332244170504, - "grad_norm": 4.7143940925598145, - "learning_rate": 2.994653200653498e-05, - "loss": 0.1411, + "epoch": 0.03564003564003564, + "grad_norm": 1.4712682962417603, + "learning_rate": 2.9786159786159787e-05, + "loss": 0.0901, "step": 120 }, { - "epoch": 0.009653943264518046, - "grad_norm": 5.331055641174316, - "learning_rate": 2.9942076340412892e-05, - "loss": 0.1392, + "epoch": 0.03861003861003861, + "grad_norm": 2.145982265472412, + "learning_rate": 2.976833976833977e-05, + "loss": 0.1149, "step": 130 }, { - "epoch": 0.010396554284865587, - "grad_norm": 4.96783447265625, - "learning_rate": 2.9937620674290807e-05, - "loss": 0.1772, + "epoch": 0.04158004158004158, + "grad_norm": 2.104418992996216, + "learning_rate": 2.975051975051975e-05, + "loss": 0.102, "step": 140 }, { - "epoch": 0.011139165305213129, - "grad_norm": 3.5116193294525146, - "learning_rate": 2.9933165008168722e-05, - "loss": 0.111, + "epoch": 0.04455004455004455, + "grad_norm": 1.3641546964645386, + "learning_rate": 2.9732699732699733e-05, + "loss": 0.0996, "step": 150 }, { - "epoch": 0.01188177632556067, - "grad_norm": 3.521408796310425, - "learning_rate": 2.9928709342046637e-05, - "loss": 0.1418, + "epoch": 0.04752004752004752, + "grad_norm": 2.1951277256011963, + "learning_rate": 2.9714879714879715e-05, + "loss": 0.092, "step": 160 }, { - "epoch": 0.012624387345908213, - "grad_norm": 2.7895073890686035, - "learning_rate": 2.9924253675924552e-05, - "loss": 0.1338, + "epoch": 0.05049005049005049, + "grad_norm": 1.2363626956939697, + "learning_rate": 2.9697059697059698e-05, + "loss": 0.0971, "step": 170 }, { - "epoch": 0.013366998366255755, - "grad_norm": 5.244328022003174, - "learning_rate": 2.9919798009802464e-05, - "loss": 0.1252, + "epoch": 0.05346005346005346, + "grad_norm": 1.299441933631897, + "learning_rate": 2.967923967923968e-05, + "loss": 0.1, "step": 180 }, { - "epoch": 0.014109609386603297, - "grad_norm": 1.7632620334625244, - "learning_rate": 2.9915342343680382e-05, - "loss": 0.1122, + "epoch": 0.05643005643005643, + "grad_norm": 0.9083341956138611, + "learning_rate": 2.9661419661419662e-05, + "loss": 0.0997, "step": 190 }, { - "epoch": 0.014852220406950839, - "grad_norm": 3.5791783332824707, - "learning_rate": 2.9910886677558297e-05, - "loss": 0.1217, + "epoch": 0.0594000594000594, + "grad_norm": 1.1018556356430054, + "learning_rate": 2.9643599643599644e-05, + "loss": 0.0916, "step": 200 }, { - "epoch": 0.01559483142729838, - "grad_norm": 5.1304450035095215, - "learning_rate": 2.990643101143621e-05, - "loss": 0.1361, + "epoch": 0.062370062370062374, + "grad_norm": 1.025562047958374, + "learning_rate": 2.9625779625779626e-05, + "loss": 0.0995, "step": 210 }, { - "epoch": 0.016337442447645924, - "grad_norm": 5.422621250152588, - "learning_rate": 2.9901975345314124e-05, - "loss": 0.1404, + "epoch": 0.06534006534006534, + "grad_norm": 1.1609880924224854, + "learning_rate": 2.960795960795961e-05, + "loss": 0.0844, "step": 220 }, { - "epoch": 0.017080053467993465, - "grad_norm": 7.065879821777344, - "learning_rate": 2.9897519679192042e-05, - "loss": 0.1371, + "epoch": 0.0683100683100683, + "grad_norm": 1.7311973571777344, + "learning_rate": 2.959013959013959e-05, + "loss": 0.0862, "step": 230 }, { - "epoch": 0.01782266448834101, - "grad_norm": 2.3558316230773926, - "learning_rate": 2.9893064013069954e-05, - "loss": 0.1386, + "epoch": 0.07128007128007129, + "grad_norm": 1.5643036365509033, + "learning_rate": 2.9572319572319573e-05, + "loss": 0.0999, "step": 240 }, { - "epoch": 0.01856527550868855, - "grad_norm": 1.8473409414291382, - "learning_rate": 2.988860834694787e-05, - "loss": 0.166, + "epoch": 0.07425007425007425, + "grad_norm": 1.468729019165039, + "learning_rate": 2.9554499554499555e-05, + "loss": 0.0989, "step": 250 }, { - "epoch": 0.019307886529036093, - "grad_norm": 4.502384662628174, - "learning_rate": 2.9884152680825784e-05, - "loss": 0.1241, + "epoch": 0.07722007722007722, + "grad_norm": 0.8540601134300232, + "learning_rate": 2.9536679536679537e-05, + "loss": 0.0822, "step": 260 }, { - "epoch": 0.020050497549383633, - "grad_norm": 1.857735514640808, - "learning_rate": 2.98796970147037e-05, - "loss": 0.1369, + "epoch": 0.08019008019008018, + "grad_norm": 1.0522710084915161, + "learning_rate": 2.951885951885952e-05, + "loss": 0.0818, "step": 270 }, { - "epoch": 0.020793108569731173, - "grad_norm": 6.109134674072266, - "learning_rate": 2.9875241348581614e-05, - "loss": 0.1139, + "epoch": 0.08316008316008316, + "grad_norm": 1.6779365539550781, + "learning_rate": 2.95010395010395e-05, + "loss": 0.0804, "step": 280 }, { - "epoch": 0.021535719590078717, - "grad_norm": 3.0833802223205566, - "learning_rate": 2.987078568245953e-05, - "loss": 0.1203, + "epoch": 0.08613008613008613, + "grad_norm": 0.6255664825439453, + "learning_rate": 2.9483219483219484e-05, + "loss": 0.0958, "step": 290 }, { - "epoch": 0.022278330610426257, - "grad_norm": 2.37730073928833, - "learning_rate": 2.9866330016337444e-05, - "loss": 0.12, + "epoch": 0.0891000891000891, + "grad_norm": 0.9004175662994385, + "learning_rate": 2.9465399465399466e-05, + "loss": 0.0924, "step": 300 }, { - "epoch": 0.0230209416307738, - "grad_norm": 2.950272798538208, - "learning_rate": 2.986187435021536e-05, - "loss": 0.1139, + "epoch": 0.09207009207009206, + "grad_norm": 1.5575037002563477, + "learning_rate": 2.9447579447579448e-05, + "loss": 0.0927, "step": 310 }, { - "epoch": 0.02376355265112134, - "grad_norm": 4.4759521484375, - "learning_rate": 2.985741868409327e-05, - "loss": 0.146, + "epoch": 0.09504009504009504, + "grad_norm": 1.6837743520736694, + "learning_rate": 2.942975942975943e-05, + "loss": 0.0997, "step": 320 }, { - "epoch": 0.024506163671468885, - "grad_norm": 1.08765709400177, - "learning_rate": 2.985296301797119e-05, - "loss": 0.1132, + "epoch": 0.09801009801009801, + "grad_norm": 0.6957836151123047, + "learning_rate": 2.9411939411939412e-05, + "loss": 0.07, "step": 330 }, { - "epoch": 0.025248774691816425, - "grad_norm": 2.5022499561309814, - "learning_rate": 2.9848507351849104e-05, - "loss": 0.0975, + "epoch": 0.10098010098010098, + "grad_norm": 1.7029621601104736, + "learning_rate": 2.9394119394119395e-05, + "loss": 0.109, "step": 340 }, { - "epoch": 0.02599138571216397, - "grad_norm": 6.558846950531006, - "learning_rate": 2.9844051685727015e-05, - "loss": 0.1128, + "epoch": 0.10395010395010396, + "grad_norm": 1.519026756286621, + "learning_rate": 2.9376299376299377e-05, + "loss": 0.093, "step": 350 }, { - "epoch": 0.02673399673251151, - "grad_norm": 2.4449667930603027, - "learning_rate": 2.9839596019604934e-05, - "loss": 0.146, + "epoch": 0.10692010692010692, + "grad_norm": 0.8110401034355164, + "learning_rate": 2.935847935847936e-05, + "loss": 0.0795, "step": 360 }, { - "epoch": 0.027476607752859053, - "grad_norm": 3.488586902618408, - "learning_rate": 2.9835140353482845e-05, - "loss": 0.1066, + "epoch": 0.10989010989010989, + "grad_norm": 0.5369464159011841, + "learning_rate": 2.934065934065934e-05, + "loss": 0.0877, "step": 370 }, { - "epoch": 0.028219218773206593, - "grad_norm": 3.4595577716827393, - "learning_rate": 2.983068468736076e-05, - "loss": 0.1295, + "epoch": 0.11286011286011285, + "grad_norm": 2.6125996112823486, + "learning_rate": 2.9322839322839323e-05, + "loss": 0.0852, "step": 380 }, { - "epoch": 0.028961829793554137, - "grad_norm": 5.9359259605407715, - "learning_rate": 2.9826229021238675e-05, - "loss": 0.1566, + "epoch": 0.11583011583011583, + "grad_norm": 0.8178906440734863, + "learning_rate": 2.930501930501931e-05, + "loss": 0.0708, "step": 390 }, { - "epoch": 0.029704440813901677, - "grad_norm": 2.771470546722412, - "learning_rate": 2.982177335511659e-05, - "loss": 0.0896, + "epoch": 0.1188001188001188, + "grad_norm": 1.6640368700027466, + "learning_rate": 2.9287199287199288e-05, + "loss": 0.0951, "step": 400 }, { - "epoch": 0.03044705183424922, - "grad_norm": 5.8616862297058105, - "learning_rate": 2.9817317688994505e-05, - "loss": 0.1435, + "epoch": 0.12177012177012177, + "grad_norm": 0.5659627318382263, + "learning_rate": 2.926937926937927e-05, + "loss": 0.0927, "step": 410 }, { - "epoch": 0.03118966285459676, - "grad_norm": 3.5416555404663086, - "learning_rate": 2.981286202287242e-05, - "loss": 0.133, + "epoch": 0.12474012474012475, + "grad_norm": 1.324707269668579, + "learning_rate": 2.9251559251559252e-05, + "loss": 0.0749, "step": 420 }, { - "epoch": 0.0319322738749443, - "grad_norm": 2.3944127559661865, - "learning_rate": 2.9808406356750335e-05, - "loss": 0.1329, + "epoch": 0.1277101277101277, + "grad_norm": 1.9675109386444092, + "learning_rate": 2.9233739233739234e-05, + "loss": 0.0827, "step": 430 }, { - "epoch": 0.03267488489529185, - "grad_norm": 2.402383804321289, - "learning_rate": 2.980395069062825e-05, - "loss": 0.1159, + "epoch": 0.13068013068013068, + "grad_norm": 1.659543752670288, + "learning_rate": 2.9215919215919216e-05, + "loss": 0.0938, "step": 440 }, { - "epoch": 0.03341749591563939, - "grad_norm": 8.119304656982422, - "learning_rate": 2.9799495024506165e-05, - "loss": 0.0956, + "epoch": 0.13365013365013365, + "grad_norm": 1.0365532636642456, + "learning_rate": 2.91980991980992e-05, + "loss": 0.0922, "step": 450 }, { - "epoch": 0.03416010693598693, - "grad_norm": 7.150251865386963, - "learning_rate": 2.9795039358384077e-05, - "loss": 0.1152, + "epoch": 0.1366201366201366, + "grad_norm": 1.2649776935577393, + "learning_rate": 2.9180279180279184e-05, + "loss": 0.0741, "step": 460 }, { - "epoch": 0.03490271795633447, - "grad_norm": 3.084035634994507, - "learning_rate": 2.9790583692261995e-05, - "loss": 0.0913, + "epoch": 0.13959013959013958, + "grad_norm": 0.6527109146118164, + "learning_rate": 2.9162459162459163e-05, + "loss": 0.0865, "step": 470 }, { - "epoch": 0.03564532897668202, - "grad_norm": 4.946183681488037, - "learning_rate": 2.9786128026139907e-05, - "loss": 0.1228, + "epoch": 0.14256014256014257, + "grad_norm": 1.706261396408081, + "learning_rate": 2.9144639144639145e-05, + "loss": 0.0805, "step": 480 }, { - "epoch": 0.03638793999702956, - "grad_norm": 1.6884194612503052, - "learning_rate": 2.9781672360017822e-05, - "loss": 0.1226, + "epoch": 0.14553014553014554, + "grad_norm": 1.399427890777588, + "learning_rate": 2.9126819126819127e-05, + "loss": 0.0821, "step": 490 }, { - "epoch": 0.0371305510173771, - "grad_norm": 3.5960988998413086, - "learning_rate": 2.977721669389574e-05, - "loss": 0.1086, + "epoch": 0.1485001485001485, + "grad_norm": 1.1516929864883423, + "learning_rate": 2.910899910899911e-05, + "loss": 0.0877, "step": 500 }, { - "epoch": 0.03787316203772464, - "grad_norm": 1.143905758857727, - "learning_rate": 2.9772761027773652e-05, - "loss": 0.1043, + "epoch": 0.15147015147015147, + "grad_norm": 0.6054642200469971, + "learning_rate": 2.909117909117909e-05, + "loss": 0.0732, "step": 510 }, { - "epoch": 0.038615773058072185, - "grad_norm": 5.153063774108887, - "learning_rate": 2.9768305361651567e-05, - "loss": 0.1263, + "epoch": 0.15444015444015444, + "grad_norm": 1.518789291381836, + "learning_rate": 2.9073359073359074e-05, + "loss": 0.0929, "step": 520 }, { - "epoch": 0.039358384078419725, - "grad_norm": 3.656158924102783, - "learning_rate": 2.9763849695529485e-05, - "loss": 0.1135, + "epoch": 0.1574101574101574, + "grad_norm": 0.8014304637908936, + "learning_rate": 2.905553905553906e-05, + "loss": 0.0948, "step": 530 }, { - "epoch": 0.040100995098767266, - "grad_norm": 3.6122283935546875, - "learning_rate": 2.9759394029407397e-05, - "loss": 0.1087, + "epoch": 0.16038016038016037, + "grad_norm": 1.0207316875457764, + "learning_rate": 2.9037719037719038e-05, + "loss": 0.083, "step": 540 }, { - "epoch": 0.040843606119114806, - "grad_norm": 2.1035525798797607, - "learning_rate": 2.9754938363285312e-05, - "loss": 0.1246, + "epoch": 0.16335016335016336, + "grad_norm": 0.8969537019729614, + "learning_rate": 2.901989901989902e-05, + "loss": 0.0819, "step": 550 }, { - "epoch": 0.041586217139462346, - "grad_norm": 4.143787384033203, - "learning_rate": 2.9750482697163227e-05, - "loss": 0.1212, + "epoch": 0.16632016632016633, + "grad_norm": 1.3450151681900024, + "learning_rate": 2.9002079002079002e-05, + "loss": 0.0886, "step": 560 }, { - "epoch": 0.04232882815980989, - "grad_norm": 3.2385432720184326, - "learning_rate": 2.9746027031041142e-05, - "loss": 0.1181, + "epoch": 0.1692901692901693, + "grad_norm": 1.7601408958435059, + "learning_rate": 2.8984258984258984e-05, + "loss": 0.0898, "step": 570 }, { - "epoch": 0.043071439180157434, - "grad_norm": 2.036144733428955, - "learning_rate": 2.9741571364919057e-05, - "loss": 0.1131, + "epoch": 0.17226017226017226, + "grad_norm": 1.2938804626464844, + "learning_rate": 2.8966438966438967e-05, + "loss": 0.0776, "step": 580 }, { - "epoch": 0.043814050200504974, - "grad_norm": 8.29155158996582, - "learning_rate": 2.973711569879697e-05, - "loss": 0.1433, + "epoch": 0.17523017523017523, + "grad_norm": 0.87087082862854, + "learning_rate": 2.894861894861895e-05, + "loss": 0.0869, "step": 590 }, { - "epoch": 0.044556661220852514, - "grad_norm": 4.493346691131592, - "learning_rate": 2.9732660032674887e-05, - "loss": 0.1083, + "epoch": 0.1782001782001782, + "grad_norm": 0.7267048358917236, + "learning_rate": 2.8930798930798934e-05, + "loss": 0.0877, "step": 600 }, { - "epoch": 0.04529927224120006, - "grad_norm": 2.184577465057373, - "learning_rate": 2.9728204366552802e-05, - "loss": 0.076, + "epoch": 0.18117018117018116, + "grad_norm": 0.9188293218612671, + "learning_rate": 2.8912978912978913e-05, + "loss": 0.0701, "step": 610 }, { - "epoch": 0.0460418832615476, - "grad_norm": 5.727611064910889, - "learning_rate": 2.9723748700430713e-05, - "loss": 0.1295, + "epoch": 0.18414018414018413, + "grad_norm": 1.1492931842803955, + "learning_rate": 2.8895158895158895e-05, + "loss": 0.0744, "step": 620 }, { - "epoch": 0.04678449428189514, - "grad_norm": 2.224817991256714, - "learning_rate": 2.971929303430863e-05, - "loss": 0.1438, + "epoch": 0.18711018711018712, + "grad_norm": 1.3128528594970703, + "learning_rate": 2.8877338877338877e-05, + "loss": 0.0807, "step": 630 }, { - "epoch": 0.04752710530224268, - "grad_norm": 2.196302652359009, - "learning_rate": 2.9714837368186547e-05, - "loss": 0.1176, + "epoch": 0.1900801900801901, + "grad_norm": 0.9953811168670654, + "learning_rate": 2.885951885951886e-05, + "loss": 0.0727, "step": 640 }, { - "epoch": 0.04826971632259023, - "grad_norm": 2.8282594680786133, - "learning_rate": 2.971038170206446e-05, - "loss": 0.1177, + "epoch": 0.19305019305019305, + "grad_norm": 0.6890339851379395, + "learning_rate": 2.8841698841698842e-05, + "loss": 0.0888, "step": 650 }, { - "epoch": 0.04901232734293777, - "grad_norm": 3.0465710163116455, - "learning_rate": 2.9705926035942373e-05, - "loss": 0.1386, + "epoch": 0.19602019602019602, + "grad_norm": 0.5367225408554077, + "learning_rate": 2.8823878823878824e-05, + "loss": 0.0912, "step": 660 }, { - "epoch": 0.04975493836328531, - "grad_norm": 2.1503002643585205, - "learning_rate": 2.9701470369820292e-05, - "loss": 0.1032, + "epoch": 0.19899019899019899, + "grad_norm": 0.6967876553535461, + "learning_rate": 2.880605880605881e-05, + "loss": 0.0831, "step": 670 }, { - "epoch": 0.05049754938363285, - "grad_norm": 3.2530624866485596, - "learning_rate": 2.9697014703698203e-05, - "loss": 0.1137, + "epoch": 0.20196020196020195, + "grad_norm": 1.0245673656463623, + "learning_rate": 2.878823878823879e-05, + "loss": 0.0906, "step": 680 }, { - "epoch": 0.0512401604039804, - "grad_norm": 4.812537670135498, - "learning_rate": 2.969255903757612e-05, - "loss": 0.1326, + "epoch": 0.20493020493020492, + "grad_norm": 0.6193961501121521, + "learning_rate": 2.877041877041877e-05, + "loss": 0.0798, "step": 690 }, { - "epoch": 0.05198277142432794, - "grad_norm": 3.6558496952056885, - "learning_rate": 2.9688103371454033e-05, - "loss": 0.1269, + "epoch": 0.2079002079002079, + "grad_norm": 1.8188307285308838, + "learning_rate": 2.8752598752598753e-05, + "loss": 0.0859, "step": 700 }, { - "epoch": 0.05272538244467548, - "grad_norm": 2.3057334423065186, - "learning_rate": 2.968364770533195e-05, - "loss": 0.1299, + "epoch": 0.21087021087021088, + "grad_norm": 1.4549452066421509, + "learning_rate": 2.8734778734778735e-05, + "loss": 0.0759, "step": 710 }, { - "epoch": 0.05346799346502302, - "grad_norm": 3.2395918369293213, - "learning_rate": 2.9679192039209863e-05, - "loss": 0.0952, + "epoch": 0.21384021384021384, + "grad_norm": 1.0474627017974854, + "learning_rate": 2.8716958716958717e-05, + "loss": 0.0809, "step": 720 }, { - "epoch": 0.054210604485370566, - "grad_norm": 2.045339584350586, - "learning_rate": 2.9674736373087775e-05, - "loss": 0.1311, + "epoch": 0.2168102168102168, + "grad_norm": 0.8879497051239014, + "learning_rate": 2.86991386991387e-05, + "loss": 0.0725, "step": 730 }, { - "epoch": 0.054953215505718106, - "grad_norm": 5.249059677124023, - "learning_rate": 2.9670280706965693e-05, - "loss": 0.1554, + "epoch": 0.21978021978021978, + "grad_norm": 0.6862474679946899, + "learning_rate": 2.8681318681318685e-05, + "loss": 0.08, "step": 740 }, { - "epoch": 0.055695826526065646, - "grad_norm": 2.609159469604492, - "learning_rate": 2.966582504084361e-05, - "loss": 0.094, + "epoch": 0.22275022275022274, + "grad_norm": 1.7506073713302612, + "learning_rate": 2.8663498663498664e-05, + "loss": 0.0992, "step": 750 }, { - "epoch": 0.05643843754641319, - "grad_norm": 6.062933444976807, - "learning_rate": 2.966136937472152e-05, - "loss": 0.1746, + "epoch": 0.2257202257202257, + "grad_norm": 0.9330048561096191, + "learning_rate": 2.8645678645678646e-05, + "loss": 0.0936, "step": 760 }, { - "epoch": 0.057181048566760734, - "grad_norm": 1.9558234214782715, - "learning_rate": 2.965691370859944e-05, - "loss": 0.1119, + "epoch": 0.2286902286902287, + "grad_norm": 1.0609358549118042, + "learning_rate": 2.8627858627858628e-05, + "loss": 0.0816, "step": 770 }, { - "epoch": 0.057923659587108274, - "grad_norm": 3.7589683532714844, - "learning_rate": 2.965245804247735e-05, - "loss": 0.1309, + "epoch": 0.23166023166023167, + "grad_norm": 0.6896756291389465, + "learning_rate": 2.861003861003861e-05, + "loss": 0.073, "step": 780 }, { - "epoch": 0.058666270607455814, - "grad_norm": 5.349353790283203, - "learning_rate": 2.9648002376355265e-05, - "loss": 0.1092, + "epoch": 0.23463023463023464, + "grad_norm": 1.0784275531768799, + "learning_rate": 2.8592218592218592e-05, + "loss": 0.0799, "step": 790 }, { - "epoch": 0.059408881627803355, - "grad_norm": 2.1258533000946045, - "learning_rate": 2.964354671023318e-05, - "loss": 0.0964, + "epoch": 0.2376002376002376, + "grad_norm": 1.0042153596878052, + "learning_rate": 2.8574398574398574e-05, + "loss": 0.079, "step": 800 }, { - "epoch": 0.0601514926481509, - "grad_norm": 2.0149474143981934, - "learning_rate": 2.9639091044111095e-05, - "loss": 0.1101, + "epoch": 0.24057024057024057, + "grad_norm": 1.3999987840652466, + "learning_rate": 2.855657855657856e-05, + "loss": 0.0841, "step": 810 }, { - "epoch": 0.06089410366849844, - "grad_norm": 4.236600875854492, - "learning_rate": 2.963463537798901e-05, - "loss": 0.091, + "epoch": 0.24354024354024353, + "grad_norm": 0.6107861995697021, + "learning_rate": 2.853875853875854e-05, + "loss": 0.067, "step": 820 }, { - "epoch": 0.06163671468884598, - "grad_norm": 1.9681212902069092, - "learning_rate": 2.9630179711866925e-05, - "loss": 0.1065, + "epoch": 0.2465102465102465, + "grad_norm": 0.8890926837921143, + "learning_rate": 2.852093852093852e-05, + "loss": 0.0639, "step": 830 }, { - "epoch": 0.06237932570919352, - "grad_norm": 3.0421035289764404, - "learning_rate": 2.962572404574484e-05, - "loss": 0.1165, + "epoch": 0.2494802494802495, + "grad_norm": 0.7977913618087769, + "learning_rate": 2.8503118503118503e-05, + "loss": 0.0745, "step": 840 }, { - "epoch": 0.06312193672954107, - "grad_norm": 4.69833517074585, - "learning_rate": 2.9621268379622755e-05, - "loss": 0.1158, + "epoch": 0.25245025245025243, + "grad_norm": 1.1050574779510498, + "learning_rate": 2.8485298485298485e-05, + "loss": 0.0828, "step": 850 }, { - "epoch": 0.0638645477498886, - "grad_norm": 1.8385733366012573, - "learning_rate": 2.961681271350067e-05, - "loss": 0.1073, + "epoch": 0.2554202554202554, + "grad_norm": 1.4865517616271973, + "learning_rate": 2.8467478467478467e-05, + "loss": 0.0971, "step": 860 }, { - "epoch": 0.06460715877023615, - "grad_norm": 4.566705703735352, - "learning_rate": 2.961235704737858e-05, - "loss": 0.0994, + "epoch": 0.25839025839025836, + "grad_norm": 0.8170812726020813, + "learning_rate": 2.844965844965845e-05, + "loss": 0.0736, "step": 870 }, { - "epoch": 0.0653497697905837, - "grad_norm": 5.810744285583496, - "learning_rate": 2.96079013812565e-05, - "loss": 0.101, + "epoch": 0.26136026136026136, + "grad_norm": 0.8887490630149841, + "learning_rate": 2.8431838431838435e-05, + "loss": 0.0843, "step": 880 }, { - "epoch": 0.06609238081093123, - "grad_norm": 2.1213254928588867, - "learning_rate": 2.960344571513441e-05, - "loss": 0.1196, + "epoch": 0.26433026433026435, + "grad_norm": 1.2814133167266846, + "learning_rate": 2.8414018414018414e-05, + "loss": 0.07, "step": 890 }, { - "epoch": 0.06683499183127878, - "grad_norm": 2.281233787536621, - "learning_rate": 2.9598990049012327e-05, - "loss": 0.0955, + "epoch": 0.2673002673002673, + "grad_norm": 1.0275837182998657, + "learning_rate": 2.8396198396198396e-05, + "loss": 0.1014, "step": 900 }, { - "epoch": 0.06757760285162631, - "grad_norm": 1.4217268228530884, - "learning_rate": 2.9594534382890245e-05, - "loss": 0.1013, + "epoch": 0.2702702702702703, + "grad_norm": 2.31121563911438, + "learning_rate": 2.8378378378378378e-05, + "loss": 0.0798, "step": 910 }, { - "epoch": 0.06832021387197386, - "grad_norm": 2.6054506301879883, - "learning_rate": 2.9590078716768157e-05, - "loss": 0.0866, + "epoch": 0.2732402732402732, + "grad_norm": 0.7157280445098877, + "learning_rate": 2.836055836055836e-05, + "loss": 0.0856, "step": 920 }, { - "epoch": 0.0690628248923214, - "grad_norm": 3.899290084838867, - "learning_rate": 2.958562305064607e-05, - "loss": 0.128, + "epoch": 0.2762102762102762, + "grad_norm": 1.51650071144104, + "learning_rate": 2.8342738342738343e-05, + "loss": 0.0886, "step": 930 }, { - "epoch": 0.06980543591266894, - "grad_norm": 1.7163687944412231, - "learning_rate": 2.958116738452399e-05, - "loss": 0.14, + "epoch": 0.27918027918027916, + "grad_norm": 1.0991870164871216, + "learning_rate": 2.8324918324918325e-05, + "loss": 0.0832, "step": 940 }, { - "epoch": 0.07054804693301649, - "grad_norm": 2.1250195503234863, - "learning_rate": 2.95767117184019e-05, - "loss": 0.1174, + "epoch": 0.28215028215028215, + "grad_norm": 1.0901234149932861, + "learning_rate": 2.830709830709831e-05, + "loss": 0.0763, "step": 950 }, { - "epoch": 0.07129065795336403, - "grad_norm": 3.302210569381714, - "learning_rate": 2.9572256052279816e-05, - "loss": 0.0919, + "epoch": 0.28512028512028514, + "grad_norm": 1.1311888694763184, + "learning_rate": 2.8289278289278293e-05, + "loss": 0.0766, "step": 960 }, { - "epoch": 0.07203326897371157, - "grad_norm": 2.169238328933716, - "learning_rate": 2.956780038615773e-05, - "loss": 0.1573, + "epoch": 0.2880902880902881, + "grad_norm": 0.6374753713607788, + "learning_rate": 2.827145827145827e-05, + "loss": 0.0736, "step": 970 }, { - "epoch": 0.07277587999405911, - "grad_norm": 5.784306049346924, - "learning_rate": 2.9563344720035646e-05, - "loss": 0.1171, + "epoch": 0.2910602910602911, + "grad_norm": 1.2088044881820679, + "learning_rate": 2.8253638253638253e-05, + "loss": 0.0681, "step": 980 }, { - "epoch": 0.07351849101440665, - "grad_norm": 1.108161449432373, - "learning_rate": 2.955888905391356e-05, - "loss": 0.1081, + "epoch": 0.294030294030294, + "grad_norm": 0.7970484495162964, + "learning_rate": 2.8235818235818236e-05, + "loss": 0.0803, "step": 990 }, { - "epoch": 0.0742611020347542, - "grad_norm": 2.4193813800811768, - "learning_rate": 2.9554433387791473e-05, - "loss": 0.1057, + "epoch": 0.297000297000297, + "grad_norm": 1.2192399501800537, + "learning_rate": 2.8217998217998218e-05, + "loss": 0.0896, "step": 1000 }, { - "epoch": 0.07500371305510174, - "grad_norm": 0.32207611203193665, - "learning_rate": 2.954997772166939e-05, - "loss": 0.1128, + "epoch": 0.29997029997029995, + "grad_norm": 1.2083388566970825, + "learning_rate": 2.82001782001782e-05, + "loss": 0.0795, "step": 1010 }, { - "epoch": 0.07574632407544928, - "grad_norm": 2.084397554397583, - "learning_rate": 2.9545522055547306e-05, - "loss": 0.0976, + "epoch": 0.30294030294030294, + "grad_norm": 1.0027815103530884, + "learning_rate": 2.8182358182358186e-05, + "loss": 0.0874, "step": 1020 }, { - "epoch": 0.07648893509579682, - "grad_norm": 4.024555683135986, - "learning_rate": 2.9541066389425218e-05, - "loss": 0.1344, + "epoch": 0.30591030591030594, + "grad_norm": 1.0552337169647217, + "learning_rate": 2.8164538164538168e-05, + "loss": 0.0718, "step": 1030 }, { - "epoch": 0.07723154611614437, - "grad_norm": 1.9376616477966309, - "learning_rate": 2.9536610723303133e-05, - "loss": 0.0779, + "epoch": 0.3088803088803089, + "grad_norm": 1.1285655498504639, + "learning_rate": 2.8146718146718146e-05, + "loss": 0.0723, "step": 1040 }, { - "epoch": 0.0779741571364919, - "grad_norm": 4.285502910614014, - "learning_rate": 2.953215505718105e-05, - "loss": 0.1116, + "epoch": 0.31185031185031187, + "grad_norm": 1.0405571460723877, + "learning_rate": 2.812889812889813e-05, + "loss": 0.0929, "step": 1050 }, { - "epoch": 0.07871676815683945, - "grad_norm": 2.065674066543579, - "learning_rate": 2.9527699391058963e-05, - "loss": 0.1212, + "epoch": 0.3148203148203148, + "grad_norm": 0.9266412258148193, + "learning_rate": 2.811107811107811e-05, + "loss": 0.0831, "step": 1060 }, { - "epoch": 0.07945937917718698, - "grad_norm": 3.7805116176605225, - "learning_rate": 2.9523243724936878e-05, - "loss": 0.1089, + "epoch": 0.3177903177903178, + "grad_norm": 1.0532838106155396, + "learning_rate": 2.8093258093258093e-05, + "loss": 0.0767, "step": 1070 }, { - "epoch": 0.08020199019753453, - "grad_norm": 2.0384578704833984, - "learning_rate": 2.9518788058814796e-05, - "loss": 0.0872, + "epoch": 0.32076032076032074, + "grad_norm": 0.8444746136665344, + "learning_rate": 2.8075438075438075e-05, + "loss": 0.0779, "step": 1080 }, { - "epoch": 0.08094460121788208, - "grad_norm": 1.7026249170303345, - "learning_rate": 2.9514332392692708e-05, - "loss": 0.0844, + "epoch": 0.32373032373032373, + "grad_norm": 0.7850976586341858, + "learning_rate": 2.805761805761806e-05, + "loss": 0.0803, "step": 1090 }, { - "epoch": 0.08168721223822961, - "grad_norm": 2.1008737087249756, - "learning_rate": 2.9509876726570623e-05, - "loss": 0.0968, + "epoch": 0.3267003267003267, + "grad_norm": 1.0749289989471436, + "learning_rate": 2.8039798039798043e-05, + "loss": 0.0891, "step": 1100 }, { - "epoch": 0.08242982325857716, - "grad_norm": 2.490581750869751, - "learning_rate": 2.9505421060448538e-05, - "loss": 0.0936, + "epoch": 0.32967032967032966, + "grad_norm": 0.9671429395675659, + "learning_rate": 2.802197802197802e-05, + "loss": 0.0734, "step": 1110 }, { - "epoch": 0.08317243427892469, - "grad_norm": 3.1089231967926025, - "learning_rate": 2.9500965394326453e-05, - "loss": 0.1072, + "epoch": 0.33264033264033266, + "grad_norm": 0.665507435798645, + "learning_rate": 2.8004158004158004e-05, + "loss": 0.0771, "step": 1120 }, { - "epoch": 0.08391504529927224, - "grad_norm": 5.446791172027588, - "learning_rate": 2.9496509728204368e-05, - "loss": 0.1201, + "epoch": 0.3356103356103356, + "grad_norm": 0.6605273485183716, + "learning_rate": 2.7986337986337986e-05, + "loss": 0.0698, "step": 1130 }, { - "epoch": 0.08465765631961979, - "grad_norm": 2.201861619949341, - "learning_rate": 2.949205406208228e-05, - "loss": 0.1447, + "epoch": 0.3385803385803386, + "grad_norm": 0.6776021122932434, + "learning_rate": 2.7968517968517968e-05, + "loss": 0.0694, "step": 1140 }, { - "epoch": 0.08540026733996732, - "grad_norm": 1.191215991973877, - "learning_rate": 2.9487598395960198e-05, - "loss": 0.1118, + "epoch": 0.34155034155034153, + "grad_norm": 0.8488766551017761, + "learning_rate": 2.795069795069795e-05, + "loss": 0.0664, "step": 1150 }, { - "epoch": 0.08614287836031487, - "grad_norm": 1.263275146484375, - "learning_rate": 2.9483142729838113e-05, - "loss": 0.0975, + "epoch": 0.3445203445203445, + "grad_norm": 1.0404934883117676, + "learning_rate": 2.7932877932877936e-05, + "loss": 0.0759, "step": 1160 }, { - "epoch": 0.08688548938066241, - "grad_norm": 4.553534507751465, - "learning_rate": 2.9478687063716025e-05, - "loss": 0.112, + "epoch": 0.3474903474903475, + "grad_norm": 1.0605899095535278, + "learning_rate": 2.7915057915057918e-05, + "loss": 0.0863, "step": 1170 }, { - "epoch": 0.08762810040100995, - "grad_norm": 2.4747018814086914, - "learning_rate": 2.9474231397593943e-05, - "loss": 0.1136, + "epoch": 0.35046035046035046, + "grad_norm": 1.034513235092163, + "learning_rate": 2.7897237897237897e-05, + "loss": 0.0815, "step": 1180 }, { - "epoch": 0.0883707114213575, - "grad_norm": 1.9060287475585938, - "learning_rate": 2.9469775731471858e-05, - "loss": 0.1184, + "epoch": 0.35343035343035345, + "grad_norm": 1.6698535680770874, + "learning_rate": 2.787941787941788e-05, + "loss": 0.0869, "step": 1190 }, { - "epoch": 0.08911332244170503, - "grad_norm": 2.2394371032714844, - "learning_rate": 2.946532006534977e-05, - "loss": 0.1119, + "epoch": 0.3564003564003564, + "grad_norm": 0.8041000366210938, + "learning_rate": 2.786159786159786e-05, + "loss": 0.0739, "step": 1200 }, { - "epoch": 0.08985593346205258, - "grad_norm": 4.413462162017822, - "learning_rate": 2.9460864399227685e-05, - "loss": 0.1005, + "epoch": 0.3593703593703594, + "grad_norm": 0.8568106889724731, + "learning_rate": 2.7843777843777843e-05, + "loss": 0.0697, "step": 1210 }, { - "epoch": 0.09059854448240012, - "grad_norm": 2.7055234909057617, - "learning_rate": 2.94564087331056e-05, - "loss": 0.1211, + "epoch": 0.3623403623403623, + "grad_norm": 1.3246437311172485, + "learning_rate": 2.7825957825957826e-05, + "loss": 0.0761, "step": 1220 }, { - "epoch": 0.09134115550274766, - "grad_norm": 4.200916290283203, - "learning_rate": 2.9451953066983515e-05, - "loss": 0.1064, + "epoch": 0.3653103653103653, + "grad_norm": 1.048767328262329, + "learning_rate": 2.780813780813781e-05, + "loss": 0.0806, "step": 1230 }, { - "epoch": 0.0920837665230952, - "grad_norm": 4.629003047943115, - "learning_rate": 2.944749740086143e-05, - "loss": 0.1465, + "epoch": 0.36828036828036825, + "grad_norm": 0.8982730507850647, + "learning_rate": 2.7790317790317793e-05, + "loss": 0.0751, "step": 1240 }, { - "epoch": 0.09282637754344275, - "grad_norm": 2.249943494796753, - "learning_rate": 2.9443041734739345e-05, - "loss": 0.1019, + "epoch": 0.37125037125037125, + "grad_norm": 0.6694151759147644, + "learning_rate": 2.7772497772497772e-05, + "loss": 0.0729, "step": 1250 }, { - "epoch": 0.09356898856379028, - "grad_norm": 2.5523922443389893, - "learning_rate": 2.943858606861726e-05, - "loss": 0.1458, + "epoch": 0.37422037422037424, + "grad_norm": 0.5974312424659729, + "learning_rate": 2.7754677754677754e-05, + "loss": 0.0839, "step": 1260 }, { - "epoch": 0.09431159958413783, - "grad_norm": 1.662513017654419, - "learning_rate": 2.9434130402495175e-05, - "loss": 0.1219, + "epoch": 0.3771903771903772, + "grad_norm": 1.6992155313491821, + "learning_rate": 2.7736857736857736e-05, + "loss": 0.0683, "step": 1270 }, { - "epoch": 0.09505421060448536, - "grad_norm": 1.973795771598816, - "learning_rate": 2.9429674736373086e-05, - "loss": 0.1208, + "epoch": 0.3801603801603802, + "grad_norm": 1.6791558265686035, + "learning_rate": 2.771903771903772e-05, + "loss": 0.0809, "step": 1280 }, { - "epoch": 0.09579682162483291, - "grad_norm": 2.5172524452209473, - "learning_rate": 2.9425219070251005e-05, - "loss": 0.0764, + "epoch": 0.3831303831303831, + "grad_norm": 1.4574382305145264, + "learning_rate": 2.77012177012177e-05, + "loss": 0.0723, "step": 1290 }, { - "epoch": 0.09653943264518046, - "grad_norm": 2.8811981678009033, - "learning_rate": 2.9420763404128916e-05, - "loss": 0.1142, + "epoch": 0.3861003861003861, + "grad_norm": 0.6830180287361145, + "learning_rate": 2.7683397683397686e-05, + "loss": 0.0619, "step": 1300 }, { - "epoch": 0.09728204366552799, - "grad_norm": 2.752640962600708, - "learning_rate": 2.941630773800683e-05, - "loss": 0.0713, + "epoch": 0.38907038907038904, + "grad_norm": 0.7997535467147827, + "learning_rate": 2.766557766557767e-05, + "loss": 0.0797, "step": 1310 }, { - "epoch": 0.09802465468587554, - "grad_norm": 3.1258955001831055, - "learning_rate": 2.941185207188475e-05, - "loss": 0.1004, + "epoch": 0.39204039204039204, + "grad_norm": 0.8323992490768433, + "learning_rate": 2.7647757647757647e-05, + "loss": 0.0717, "step": 1320 }, { - "epoch": 0.09876726570622309, - "grad_norm": 2.652444839477539, - "learning_rate": 2.940739640576266e-05, - "loss": 0.1075, + "epoch": 0.39501039501039503, + "grad_norm": 0.5575417876243591, + "learning_rate": 2.762993762993763e-05, + "loss": 0.072, "step": 1330 }, { - "epoch": 0.09950987672657062, - "grad_norm": 1.6719880104064941, - "learning_rate": 2.9402940739640576e-05, - "loss": 0.1315, + "epoch": 0.39798039798039797, + "grad_norm": 0.9314611554145813, + "learning_rate": 2.761211761211761e-05, + "loss": 0.0594, "step": 1340 }, { - "epoch": 0.10025248774691817, - "grad_norm": 1.6493836641311646, - "learning_rate": 2.9398485073518494e-05, - "loss": 0.1261, + "epoch": 0.40095040095040096, + "grad_norm": 1.7011877298355103, + "learning_rate": 2.7594297594297594e-05, + "loss": 0.0702, "step": 1350 }, { - "epoch": 0.1009950987672657, - "grad_norm": 3.3327760696411133, - "learning_rate": 2.9394029407396406e-05, - "loss": 0.1226, + "epoch": 0.4039204039204039, + "grad_norm": 0.8633953332901001, + "learning_rate": 2.7576477576477576e-05, + "loss": 0.0769, "step": 1360 }, { - "epoch": 0.10173770978761325, - "grad_norm": 1.6937384605407715, - "learning_rate": 2.938957374127432e-05, - "loss": 0.1263, + "epoch": 0.4068904068904069, + "grad_norm": 0.6947808265686035, + "learning_rate": 2.755865755865756e-05, + "loss": 0.069, "step": 1370 }, { - "epoch": 0.1024803208079608, - "grad_norm": 1.859004259109497, - "learning_rate": 2.9385118075152236e-05, - "loss": 0.0915, + "epoch": 0.40986040986040984, + "grad_norm": 0.5774619579315186, + "learning_rate": 2.7540837540837544e-05, + "loss": 0.0642, "step": 1380 }, { - "epoch": 0.10322293182830833, - "grad_norm": 2.384235382080078, - "learning_rate": 2.938066240903015e-05, - "loss": 0.1396, + "epoch": 0.41283041283041283, + "grad_norm": 1.0650389194488525, + "learning_rate": 2.7523017523017522e-05, + "loss": 0.0603, "step": 1390 }, { - "epoch": 0.10396554284865588, - "grad_norm": 1.8067560195922852, - "learning_rate": 2.9376206742908066e-05, - "loss": 0.0948, + "epoch": 0.4158004158004158, + "grad_norm": 0.9271636605262756, + "learning_rate": 2.7505197505197505e-05, + "loss": 0.0695, "step": 1400 }, { - "epoch": 0.10470815386900341, - "grad_norm": 0.9995975494384766, - "learning_rate": 2.9371751076785978e-05, - "loss": 0.0838, + "epoch": 0.41877041877041876, + "grad_norm": 1.1117199659347534, + "learning_rate": 2.7487377487377487e-05, + "loss": 0.0691, "step": 1410 }, { - "epoch": 0.10545076488935096, - "grad_norm": 1.3114655017852783, - "learning_rate": 2.9367295410663896e-05, - "loss": 0.1145, + "epoch": 0.42174042174042176, + "grad_norm": 0.7966795563697815, + "learning_rate": 2.746955746955747e-05, + "loss": 0.068, "step": 1420 }, { - "epoch": 0.1061933759096985, - "grad_norm": 2.0225460529327393, - "learning_rate": 2.936283974454181e-05, - "loss": 0.1128, + "epoch": 0.4247104247104247, + "grad_norm": 1.0240360498428345, + "learning_rate": 2.745173745173745e-05, + "loss": 0.0656, "step": 1430 }, { - "epoch": 0.10693598693004604, - "grad_norm": 2.146571159362793, - "learning_rate": 2.9358384078419723e-05, - "loss": 0.0799, + "epoch": 0.4276804276804277, + "grad_norm": 0.9488998651504517, + "learning_rate": 2.7433917433917437e-05, + "loss": 0.0666, "step": 1440 }, { - "epoch": 0.10767859795039358, - "grad_norm": 4.288600921630859, - "learning_rate": 2.9353928412297638e-05, - "loss": 0.1249, + "epoch": 0.4306504306504306, + "grad_norm": 1.2475918531417847, + "learning_rate": 2.741609741609742e-05, + "loss": 0.0791, "step": 1450 }, { - "epoch": 0.10842120897074113, - "grad_norm": 1.9718669652938843, - "learning_rate": 2.9349472746175556e-05, - "loss": 0.1119, + "epoch": 0.4336204336204336, + "grad_norm": 1.125158667564392, + "learning_rate": 2.7398277398277398e-05, + "loss": 0.0768, "step": 1460 }, { - "epoch": 0.10916381999108866, - "grad_norm": 2.543238639831543, - "learning_rate": 2.9345017080053468e-05, - "loss": 0.113, + "epoch": 0.4365904365904366, + "grad_norm": 0.8701415657997131, + "learning_rate": 2.738045738045738e-05, + "loss": 0.0869, "step": 1470 }, { - "epoch": 0.10990643101143621, - "grad_norm": 1.8163429498672485, - "learning_rate": 2.9340561413931383e-05, - "loss": 0.0969, + "epoch": 0.43956043956043955, + "grad_norm": 1.3756756782531738, + "learning_rate": 2.7362637362637362e-05, + "loss": 0.0814, "step": 1480 }, { - "epoch": 0.11064904203178375, - "grad_norm": 1.0760383605957031, - "learning_rate": 2.93361057478093e-05, - "loss": 0.0848, + "epoch": 0.44253044253044255, + "grad_norm": 1.0403056144714355, + "learning_rate": 2.7344817344817344e-05, + "loss": 0.0678, "step": 1490 }, { - "epoch": 0.11139165305213129, - "grad_norm": 0.9687877297401428, - "learning_rate": 2.9331650081687213e-05, - "loss": 0.0766, + "epoch": 0.4455004455004455, + "grad_norm": 0.45081251859664917, + "learning_rate": 2.7326997326997326e-05, + "loss": 0.0598, "step": 1500 }, { - "epoch": 0.11213426407247884, - "grad_norm": 3.9466569423675537, - "learning_rate": 2.9327194415565128e-05, - "loss": 0.1187, + "epoch": 0.4484704484704485, + "grad_norm": 0.4850378930568695, + "learning_rate": 2.7309177309177312e-05, + "loss": 0.0733, "step": 1510 }, { - "epoch": 0.11287687509282637, - "grad_norm": 4.158041477203369, - "learning_rate": 2.9322738749443043e-05, - "loss": 0.1327, + "epoch": 0.4514404514404514, + "grad_norm": 1.3777422904968262, + "learning_rate": 2.7291357291357294e-05, + "loss": 0.068, "step": 1520 }, { - "epoch": 0.11361948611317392, - "grad_norm": 4.5801591873168945, - "learning_rate": 2.9318283083320958e-05, - "loss": 0.0884, + "epoch": 0.4544104544104544, + "grad_norm": 1.1350257396697998, + "learning_rate": 2.7273537273537276e-05, + "loss": 0.0789, "step": 1530 }, { - "epoch": 0.11436209713352147, - "grad_norm": 4.984243392944336, - "learning_rate": 2.9313827417198873e-05, - "loss": 0.1085, + "epoch": 0.4573804573804574, + "grad_norm": 0.4942154288291931, + "learning_rate": 2.7255717255717255e-05, + "loss": 0.0721, "step": 1540 }, { - "epoch": 0.115104708153869, - "grad_norm": 3.010652780532837, - "learning_rate": 2.9309371751076784e-05, - "loss": 0.0696, + "epoch": 0.46035046035046034, + "grad_norm": 0.8224741816520691, + "learning_rate": 2.7237897237897237e-05, + "loss": 0.0683, "step": 1550 }, { - "epoch": 0.11584731917421655, - "grad_norm": 2.6272575855255127, - "learning_rate": 2.9304916084954703e-05, - "loss": 0.0979, + "epoch": 0.46332046332046334, + "grad_norm": 0.7868162393569946, + "learning_rate": 2.722007722007722e-05, + "loss": 0.0795, "step": 1560 }, { - "epoch": 0.11658993019456408, - "grad_norm": 5.034517765045166, - "learning_rate": 2.9300460418832618e-05, - "loss": 0.0946, + "epoch": 0.4662904662904663, + "grad_norm": 0.9320521354675293, + "learning_rate": 2.72022572022572e-05, + "loss": 0.0809, "step": 1570 }, { - "epoch": 0.11733254121491163, - "grad_norm": 2.4360742568969727, - "learning_rate": 2.929600475271053e-05, - "loss": 0.1243, + "epoch": 0.46926046926046927, + "grad_norm": 1.2369945049285889, + "learning_rate": 2.7184437184437187e-05, + "loss": 0.0606, "step": 1580 }, { - "epoch": 0.11807515223525918, - "grad_norm": 1.762876033782959, - "learning_rate": 2.9291549086588448e-05, - "loss": 0.0878, + "epoch": 0.4722304722304722, + "grad_norm": 0.934948742389679, + "learning_rate": 2.716661716661717e-05, + "loss": 0.0659, "step": 1590 }, { - "epoch": 0.11881776325560671, - "grad_norm": 4.341997146606445, - "learning_rate": 2.9287093420466363e-05, - "loss": 0.143, + "epoch": 0.4752004752004752, + "grad_norm": 0.8786292672157288, + "learning_rate": 2.714879714879715e-05, + "loss": 0.0979, "step": 1600 }, { - "epoch": 0.11956037427595426, - "grad_norm": 2.6205196380615234, - "learning_rate": 2.9282637754344274e-05, - "loss": 0.1234, + "epoch": 0.4781704781704782, + "grad_norm": 0.5932102799415588, + "learning_rate": 2.713097713097713e-05, + "loss": 0.0868, "step": 1610 }, { - "epoch": 0.1203029852963018, - "grad_norm": 1.5635881423950195, - "learning_rate": 2.927818208822219e-05, - "loss": 0.0918, + "epoch": 0.48114048114048114, + "grad_norm": 0.9076425433158875, + "learning_rate": 2.7113157113157112e-05, + "loss": 0.0734, "step": 1620 }, { - "epoch": 0.12104559631664934, - "grad_norm": 4.154393672943115, - "learning_rate": 2.9273726422100104e-05, - "loss": 0.1227, + "epoch": 0.48411048411048413, + "grad_norm": 0.8655311465263367, + "learning_rate": 2.7095337095337095e-05, + "loss": 0.0582, "step": 1630 }, { - "epoch": 0.12178820733699688, - "grad_norm": 3.161184549331665, - "learning_rate": 2.926927075597802e-05, - "loss": 0.1084, + "epoch": 0.48708048708048707, + "grad_norm": 0.6988229155540466, + "learning_rate": 2.7077517077517077e-05, + "loss": 0.0815, "step": 1640 }, { - "epoch": 0.12253081835734442, - "grad_norm": 1.4087167978286743, - "learning_rate": 2.9264815089855934e-05, - "loss": 0.0725, + "epoch": 0.49005049005049006, + "grad_norm": 0.9337241649627686, + "learning_rate": 2.7059697059697062e-05, + "loss": 0.0675, "step": 1650 }, { - "epoch": 0.12327342937769196, - "grad_norm": 2.6927695274353027, - "learning_rate": 2.926035942373385e-05, - "loss": 0.0761, + "epoch": 0.493020493020493, + "grad_norm": 1.3023432493209839, + "learning_rate": 2.7041877041877044e-05, + "loss": 0.0777, "step": 1660 }, { - "epoch": 0.12401604039803951, - "grad_norm": 2.0278165340423584, - "learning_rate": 2.9255903757611764e-05, - "loss": 0.0915, + "epoch": 0.495990495990496, + "grad_norm": 1.0824613571166992, + "learning_rate": 2.7024057024057027e-05, + "loss": 0.0707, "step": 1670 }, { - "epoch": 0.12475865141838705, - "grad_norm": 4.448098659515381, - "learning_rate": 2.925144809148968e-05, - "loss": 0.1183, + "epoch": 0.498960498960499, + "grad_norm": 0.7362037897109985, + "learning_rate": 2.7006237006237005e-05, + "loss": 0.0653, "step": 1680 }, { - "epoch": 0.12550126243873458, - "grad_norm": 1.2473807334899902, - "learning_rate": 2.9246992425367594e-05, - "loss": 0.0999, + "epoch": 0.5019305019305019, + "grad_norm": 0.8872225284576416, + "learning_rate": 2.6988416988416988e-05, + "loss": 0.0737, "step": 1690 }, { - "epoch": 0.12624387345908214, - "grad_norm": 1.8847259283065796, - "learning_rate": 2.924253675924551e-05, - "loss": 0.0896, + "epoch": 0.5049005049005049, + "grad_norm": 1.3285760879516602, + "learning_rate": 2.697059697059697e-05, + "loss": 0.074, "step": 1700 }, { - "epoch": 0.12698648447942967, - "grad_norm": 2.6261157989501953, - "learning_rate": 2.923808109312342e-05, - "loss": 0.1043, + "epoch": 0.5078705078705079, + "grad_norm": 0.8795962929725647, + "learning_rate": 2.6952776952776952e-05, + "loss": 0.0825, "step": 1710 }, { - "epoch": 0.1277290954997772, - "grad_norm": 4.396406650543213, - "learning_rate": 2.9233625427001336e-05, - "loss": 0.1166, + "epoch": 0.5108405108405109, + "grad_norm": 1.0784797668457031, + "learning_rate": 2.6934956934956937e-05, + "loss": 0.0792, "step": 1720 }, { - "epoch": 0.12847170652012477, - "grad_norm": 1.8150869607925415, - "learning_rate": 2.9229169760879254e-05, - "loss": 0.0932, + "epoch": 0.5138105138105138, + "grad_norm": 1.0349229574203491, + "learning_rate": 2.691713691713692e-05, + "loss": 0.0775, "step": 1730 }, { - "epoch": 0.1292143175404723, - "grad_norm": 1.0094959735870361, - "learning_rate": 2.9224714094757166e-05, - "loss": 0.107, + "epoch": 0.5167805167805167, + "grad_norm": 0.4279988706111908, + "learning_rate": 2.6899316899316902e-05, + "loss": 0.0746, "step": 1740 }, { - "epoch": 0.12995692856081983, - "grad_norm": 1.1417855024337769, - "learning_rate": 2.922025842863508e-05, - "loss": 0.1165, + "epoch": 0.5197505197505198, + "grad_norm": 0.7496609091758728, + "learning_rate": 2.688149688149688e-05, + "loss": 0.0501, "step": 1750 }, { - "epoch": 0.1306995395811674, - "grad_norm": 2.269012689590454, - "learning_rate": 2.9215802762513e-05, - "loss": 0.124, + "epoch": 0.5227205227205227, + "grad_norm": 0.5489044785499573, + "learning_rate": 2.6863676863676863e-05, + "loss": 0.0464, "step": 1760 }, { - "epoch": 0.13144215060151493, - "grad_norm": 2.0236096382141113, - "learning_rate": 2.921134709639091e-05, - "loss": 0.0817, + "epoch": 0.5256905256905257, + "grad_norm": 0.6475281119346619, + "learning_rate": 2.6845856845856845e-05, + "loss": 0.0732, "step": 1770 }, { - "epoch": 0.13218476162186246, - "grad_norm": 3.055938482284546, - "learning_rate": 2.9206891430268826e-05, - "loss": 0.1123, + "epoch": 0.5286605286605287, + "grad_norm": 0.9776890873908997, + "learning_rate": 2.6828036828036827e-05, + "loss": 0.0755, "step": 1780 }, { - "epoch": 0.13292737264221002, - "grad_norm": 3.183199882507324, - "learning_rate": 2.920243576414674e-05, - "loss": 0.1264, + "epoch": 0.5316305316305316, + "grad_norm": 0.8436118364334106, + "learning_rate": 2.6810216810216813e-05, + "loss": 0.0743, "step": 1790 }, { - "epoch": 0.13366998366255756, - "grad_norm": 1.401501178741455, - "learning_rate": 2.9197980098024656e-05, - "loss": 0.1142, + "epoch": 0.5346005346005346, + "grad_norm": 1.0720716714859009, + "learning_rate": 2.6792396792396795e-05, + "loss": 0.0821, "step": 1800 }, { - "epoch": 0.1344125946829051, - "grad_norm": 0.9348109364509583, - "learning_rate": 2.919352443190257e-05, - "loss": 0.0885, + "epoch": 0.5375705375705375, + "grad_norm": 1.699569582939148, + "learning_rate": 2.6774576774576777e-05, + "loss": 0.0664, "step": 1810 }, { - "epoch": 0.13515520570325262, - "grad_norm": 2.85656476020813, - "learning_rate": 2.9189068765780482e-05, - "loss": 0.1249, + "epoch": 0.5405405405405406, + "grad_norm": 0.975058376789093, + "learning_rate": 2.6756756756756756e-05, + "loss": 0.0699, "step": 1820 }, { - "epoch": 0.13589781672360018, - "grad_norm": 2.1008095741271973, - "learning_rate": 2.91846130996584e-05, - "loss": 0.092, + "epoch": 0.5435105435105435, + "grad_norm": 0.9048154950141907, + "learning_rate": 2.6738936738936738e-05, + "loss": 0.0862, "step": 1830 }, { - "epoch": 0.13664042774394772, - "grad_norm": 3.1172657012939453, - "learning_rate": 2.9180157433536316e-05, - "loss": 0.0867, + "epoch": 0.5464805464805464, + "grad_norm": 1.2828189134597778, + "learning_rate": 2.672111672111672e-05, + "loss": 0.0683, "step": 1840 }, { - "epoch": 0.13738303876429525, - "grad_norm": 1.8529694080352783, - "learning_rate": 2.9175701767414227e-05, - "loss": 0.0946, + "epoch": 0.5494505494505495, + "grad_norm": 1.5076489448547363, + "learning_rate": 2.6703296703296702e-05, + "loss": 0.0827, "step": 1850 }, { - "epoch": 0.1381256497846428, - "grad_norm": 2.7626330852508545, - "learning_rate": 2.9171246101292142e-05, - "loss": 0.0952, + "epoch": 0.5524205524205524, + "grad_norm": 0.8987582325935364, + "learning_rate": 2.6685476685476688e-05, + "loss": 0.0637, "step": 1860 }, { - "epoch": 0.13886826080499035, - "grad_norm": 7.8472089767456055, - "learning_rate": 2.916679043517006e-05, - "loss": 0.1124, + "epoch": 0.5553905553905554, + "grad_norm": 0.7532503008842468, + "learning_rate": 2.666765666765667e-05, + "loss": 0.0847, "step": 1870 }, { - "epoch": 0.13961087182533788, - "grad_norm": 2.6485297679901123, - "learning_rate": 2.9162334769047972e-05, - "loss": 0.1109, + "epoch": 0.5583605583605583, + "grad_norm": 0.8142328858375549, + "learning_rate": 2.6649836649836652e-05, + "loss": 0.0732, "step": 1880 }, { - "epoch": 0.14035348284568544, - "grad_norm": 4.575742721557617, - "learning_rate": 2.9157879102925887e-05, - "loss": 0.0989, + "epoch": 0.5613305613305614, + "grad_norm": 0.6130629181861877, + "learning_rate": 2.663201663201663e-05, + "loss": 0.0858, "step": 1890 }, { - "epoch": 0.14109609386603297, - "grad_norm": 4.842647552490234, - "learning_rate": 2.9153423436803806e-05, - "loss": 0.1074, + "epoch": 0.5643005643005643, + "grad_norm": 1.0010277032852173, + "learning_rate": 2.6614196614196613e-05, + "loss": 0.0773, "step": 1900 }, { - "epoch": 0.1418387048863805, - "grad_norm": 3.04119873046875, - "learning_rate": 2.9148967770681717e-05, - "loss": 0.0972, + "epoch": 0.5672705672705672, + "grad_norm": 0.8659054040908813, + "learning_rate": 2.6596376596376595e-05, + "loss": 0.0778, "step": 1910 }, { - "epoch": 0.14258131590672807, - "grad_norm": 2.0396547317504883, - "learning_rate": 2.9144512104559632e-05, - "loss": 0.1181, + "epoch": 0.5702405702405703, + "grad_norm": 0.7538540959358215, + "learning_rate": 2.6578556578556577e-05, + "loss": 0.071, "step": 1920 }, { - "epoch": 0.1433239269270756, - "grad_norm": 2.2266111373901367, - "learning_rate": 2.9140056438437547e-05, - "loss": 0.0901, + "epoch": 0.5732105732105732, + "grad_norm": 0.9332623481750488, + "learning_rate": 2.6560736560736563e-05, + "loss": 0.0795, "step": 1930 }, { - "epoch": 0.14406653794742313, - "grad_norm": 3.9434754848480225, - "learning_rate": 2.9135600772315462e-05, - "loss": 0.1078, + "epoch": 0.5761805761805762, + "grad_norm": 1.3811964988708496, + "learning_rate": 2.6542916542916545e-05, + "loss": 0.0782, "step": 1940 }, { - "epoch": 0.1448091489677707, - "grad_norm": 1.5583536624908447, - "learning_rate": 2.9131145106193377e-05, - "loss": 0.0767, + "epoch": 0.5791505791505791, + "grad_norm": 0.8431365489959717, + "learning_rate": 2.6525096525096527e-05, + "loss": 0.0727, "step": 1950 }, { - "epoch": 0.14555175998811823, - "grad_norm": 2.2595632076263428, - "learning_rate": 2.912668944007129e-05, - "loss": 0.0906, + "epoch": 0.5821205821205822, + "grad_norm": 0.6309846639633179, + "learning_rate": 2.6507276507276506e-05, + "loss": 0.074, "step": 1960 }, { - "epoch": 0.14629437100846576, - "grad_norm": 1.268849492073059, - "learning_rate": 2.9122233773949207e-05, - "loss": 0.0847, + "epoch": 0.5850905850905851, + "grad_norm": 1.0400676727294922, + "learning_rate": 2.648945648945649e-05, + "loss": 0.0683, "step": 1970 }, { - "epoch": 0.1470369820288133, - "grad_norm": 2.6412172317504883, - "learning_rate": 2.9117778107827122e-05, - "loss": 0.1297, + "epoch": 0.588060588060588, + "grad_norm": 1.1572082042694092, + "learning_rate": 2.647163647163647e-05, + "loss": 0.0718, "step": 1980 }, { - "epoch": 0.14777959304916086, - "grad_norm": 3.151843547821045, - "learning_rate": 2.9113322441705034e-05, - "loss": 0.0975, + "epoch": 0.5910305910305911, + "grad_norm": 0.6798633337020874, + "learning_rate": 2.6453816453816453e-05, + "loss": 0.0695, "step": 1990 }, { - "epoch": 0.1485222040695084, - "grad_norm": 2.6987667083740234, - "learning_rate": 2.9108866775582952e-05, - "loss": 0.0992, + "epoch": 0.594000594000594, + "grad_norm": 0.49746111035346985, + "learning_rate": 2.6435996435996438e-05, + "loss": 0.0726, "step": 2000 }, { - "epoch": 0.14926481508985592, - "grad_norm": 3.049734115600586, - "learning_rate": 2.9104411109460867e-05, - "loss": 0.1189, + "epoch": 0.596970596970597, + "grad_norm": 0.7728568315505981, + "learning_rate": 2.641817641817642e-05, + "loss": 0.0681, "step": 2010 }, { - "epoch": 0.15000742611020348, - "grad_norm": 2.4125332832336426, - "learning_rate": 2.909995544333878e-05, - "loss": 0.059, + "epoch": 0.5999405999405999, + "grad_norm": 1.0050711631774902, + "learning_rate": 2.6400356400356403e-05, + "loss": 0.0783, "step": 2020 }, { - "epoch": 0.15075003713055102, - "grad_norm": 2.5139408111572266, - "learning_rate": 2.9095499777216694e-05, - "loss": 0.1085, + "epoch": 0.6029106029106029, + "grad_norm": 0.6406471729278564, + "learning_rate": 2.638253638253638e-05, + "loss": 0.0743, "step": 2030 }, { - "epoch": 0.15149264815089855, - "grad_norm": 2.7138638496398926, - "learning_rate": 2.909104411109461e-05, - "loss": 0.1312, + "epoch": 0.6058806058806059, + "grad_norm": 0.5787233710289001, + "learning_rate": 2.6364716364716364e-05, + "loss": 0.0692, "step": 2040 }, { - "epoch": 0.1522352591712461, - "grad_norm": 2.654601812362671, - "learning_rate": 2.9086588444972524e-05, - "loss": 0.1116, + "epoch": 0.6088506088506088, + "grad_norm": 1.646081566810608, + "learning_rate": 2.6346896346896346e-05, + "loss": 0.0712, "step": 2050 }, { - "epoch": 0.15297787019159365, - "grad_norm": 2.8384549617767334, - "learning_rate": 2.908213277885044e-05, - "loss": 0.0842, + "epoch": 0.6118206118206119, + "grad_norm": 1.0009373426437378, + "learning_rate": 2.6329076329076328e-05, + "loss": 0.0775, "step": 2060 }, { - "epoch": 0.15372048121194118, - "grad_norm": 2.3352036476135254, - "learning_rate": 2.9077677112728354e-05, - "loss": 0.1261, + "epoch": 0.6147906147906148, + "grad_norm": 0.7711787819862366, + "learning_rate": 2.6311256311256313e-05, + "loss": 0.0649, "step": 2070 }, { - "epoch": 0.15446309223228874, - "grad_norm": 4.299140453338623, - "learning_rate": 2.907322144660627e-05, - "loss": 0.1066, + "epoch": 0.6177606177606177, + "grad_norm": 0.6170060038566589, + "learning_rate": 2.6293436293436296e-05, + "loss": 0.0722, "step": 2080 }, { - "epoch": 0.15520570325263627, - "grad_norm": 3.3088362216949463, - "learning_rate": 2.9068765780484184e-05, - "loss": 0.0954, + "epoch": 0.6207306207306207, + "grad_norm": 0.5135714411735535, + "learning_rate": 2.6275616275616278e-05, + "loss": 0.0676, "step": 2090 }, { - "epoch": 0.1559483142729838, - "grad_norm": 1.6698198318481445, - "learning_rate": 2.90643101143621e-05, - "loss": 0.1086, + "epoch": 0.6237006237006237, + "grad_norm": 0.6718676090240479, + "learning_rate": 2.625779625779626e-05, + "loss": 0.0748, "step": 2100 }, { - "epoch": 0.15669092529333134, - "grad_norm": 2.066899061203003, - "learning_rate": 2.9059854448240014e-05, - "loss": 0.1169, + "epoch": 0.6266706266706267, + "grad_norm": 0.47525379061698914, + "learning_rate": 2.623997623997624e-05, + "loss": 0.0685, "step": 2110 }, { - "epoch": 0.1574335363136789, - "grad_norm": 1.2918481826782227, - "learning_rate": 2.905539878211793e-05, - "loss": 0.1248, + "epoch": 0.6296406296406296, + "grad_norm": 1.864790916442871, + "learning_rate": 2.622215622215622e-05, + "loss": 0.0934, "step": 2120 }, { - "epoch": 0.15817614733402643, - "grad_norm": 3.492408037185669, - "learning_rate": 2.905094311599584e-05, - "loss": 0.0907, + "epoch": 0.6326106326106327, + "grad_norm": 1.857503890991211, + "learning_rate": 2.6204336204336203e-05, + "loss": 0.0736, "step": 2130 }, { - "epoch": 0.15891875835437397, - "grad_norm": 4.03883695602417, - "learning_rate": 2.904648744987376e-05, - "loss": 0.1044, + "epoch": 0.6355806355806356, + "grad_norm": 0.8403912782669067, + "learning_rate": 2.618651618651619e-05, + "loss": 0.0686, "step": 2140 }, { - "epoch": 0.15966136937472153, - "grad_norm": 2.541898250579834, - "learning_rate": 2.904203178375167e-05, - "loss": 0.1244, + "epoch": 0.6385506385506385, + "grad_norm": 1.2553832530975342, + "learning_rate": 2.616869616869617e-05, + "loss": 0.075, "step": 2150 }, { - "epoch": 0.16040398039506906, - "grad_norm": 1.1779425144195557, - "learning_rate": 2.9037576117629585e-05, - "loss": 0.096, + "epoch": 0.6415206415206415, + "grad_norm": 0.75575190782547, + "learning_rate": 2.6150876150876153e-05, + "loss": 0.072, "step": 2160 }, { - "epoch": 0.1611465914154166, - "grad_norm": 2.521737575531006, - "learning_rate": 2.9033120451507504e-05, - "loss": 0.0854, + "epoch": 0.6444906444906445, + "grad_norm": 0.7265666723251343, + "learning_rate": 2.6133056133056135e-05, + "loss": 0.081, "step": 2170 }, { - "epoch": 0.16188920243576416, - "grad_norm": 2.2708542346954346, - "learning_rate": 2.9028664785385415e-05, - "loss": 0.1053, + "epoch": 0.6474606474606475, + "grad_norm": 0.4849323332309723, + "learning_rate": 2.6115236115236114e-05, + "loss": 0.0801, "step": 2180 }, { - "epoch": 0.1626318134561117, - "grad_norm": 1.611698865890503, - "learning_rate": 2.902420911926333e-05, - "loss": 0.1094, + "epoch": 0.6504306504306504, + "grad_norm": 1.0659674406051636, + "learning_rate": 2.6097416097416096e-05, + "loss": 0.07, "step": 2190 }, { - "epoch": 0.16337442447645922, - "grad_norm": 2.962660074234009, - "learning_rate": 2.9019753453141245e-05, - "loss": 0.1022, + "epoch": 0.6534006534006535, + "grad_norm": 1.1422368288040161, + "learning_rate": 2.6079596079596078e-05, + "loss": 0.0689, "step": 2200 }, { - "epoch": 0.16411703549680678, - "grad_norm": 1.720831036567688, - "learning_rate": 2.901529778701916e-05, - "loss": 0.1207, + "epoch": 0.6563706563706564, + "grad_norm": 0.6629343032836914, + "learning_rate": 2.6061776061776064e-05, + "loss": 0.0647, "step": 2210 }, { - "epoch": 0.16485964651715432, - "grad_norm": 2.2500967979431152, - "learning_rate": 2.9010842120897075e-05, - "loss": 0.1023, + "epoch": 0.6593406593406593, + "grad_norm": 1.0721797943115234, + "learning_rate": 2.6043956043956046e-05, + "loss": 0.065, "step": 2220 }, { - "epoch": 0.16560225753750185, - "grad_norm": 2.8786466121673584, - "learning_rate": 2.9006386454774987e-05, - "loss": 0.133, + "epoch": 0.6623106623106623, + "grad_norm": 1.3788233995437622, + "learning_rate": 2.6026136026136028e-05, + "loss": 0.0743, "step": 2230 }, { - "epoch": 0.16634486855784938, - "grad_norm": 2.339738607406616, - "learning_rate": 2.9001930788652905e-05, - "loss": 0.1436, + "epoch": 0.6652806652806653, + "grad_norm": 1.3036271333694458, + "learning_rate": 2.600831600831601e-05, + "loss": 0.0883, "step": 2240 }, { - "epoch": 0.16708747957819695, - "grad_norm": 2.527097702026367, - "learning_rate": 2.899747512253082e-05, - "loss": 0.0817, + "epoch": 0.6682506682506683, + "grad_norm": 0.8231455683708191, + "learning_rate": 2.599049599049599e-05, + "loss": 0.0684, "step": 2250 }, { - "epoch": 0.16783009059854448, - "grad_norm": 2.750969171524048, - "learning_rate": 2.8993019456408732e-05, - "loss": 0.1492, + "epoch": 0.6712206712206712, + "grad_norm": 1.1133084297180176, + "learning_rate": 2.597267597267597e-05, + "loss": 0.0658, "step": 2260 }, { - "epoch": 0.168572701618892, - "grad_norm": 2.195770740509033, - "learning_rate": 2.8988563790286647e-05, - "loss": 0.115, + "epoch": 0.6741906741906742, + "grad_norm": 1.1839704513549805, + "learning_rate": 2.5954855954855953e-05, + "loss": 0.0613, "step": 2270 }, { - "epoch": 0.16931531263923957, - "grad_norm": 3.0774083137512207, - "learning_rate": 2.8984108124164565e-05, - "loss": 0.1062, + "epoch": 0.6771606771606772, + "grad_norm": 0.9162219762802124, + "learning_rate": 2.593703593703594e-05, + "loss": 0.0611, "step": 2280 }, { - "epoch": 0.1700579236595871, - "grad_norm": 2.673882484436035, - "learning_rate": 2.8979652458042477e-05, - "loss": 0.1063, + "epoch": 0.6801306801306801, + "grad_norm": 0.9693069458007812, + "learning_rate": 2.591921591921592e-05, + "loss": 0.0738, "step": 2290 }, { - "epoch": 0.17080053467993464, - "grad_norm": 3.152207612991333, - "learning_rate": 2.8975196791920392e-05, - "loss": 0.1098, + "epoch": 0.6831006831006831, + "grad_norm": 1.2180196046829224, + "learning_rate": 2.5901395901395903e-05, + "loss": 0.0706, "step": 2300 }, { - "epoch": 0.1715431457002822, - "grad_norm": 4.860641956329346, - "learning_rate": 2.897074112579831e-05, - "loss": 0.1073, + "epoch": 0.6860706860706861, + "grad_norm": 1.0316444635391235, + "learning_rate": 2.5883575883575886e-05, + "loss": 0.0678, "step": 2310 }, { - "epoch": 0.17228575672062973, - "grad_norm": 2.261838436126709, - "learning_rate": 2.8966285459676222e-05, - "loss": 0.1035, + "epoch": 0.689040689040689, + "grad_norm": 0.9395328164100647, + "learning_rate": 2.5865755865755864e-05, + "loss": 0.0745, "step": 2320 }, { - "epoch": 0.17302836774097727, - "grad_norm": 1.3627759218215942, - "learning_rate": 2.8961829793554137e-05, - "loss": 0.0873, + "epoch": 0.692010692010692, + "grad_norm": 0.9199443459510803, + "learning_rate": 2.5847935847935846e-05, + "loss": 0.0736, "step": 2330 }, { - "epoch": 0.17377097876132483, - "grad_norm": 2.009950637817383, - "learning_rate": 2.8957374127432052e-05, - "loss": 0.1033, + "epoch": 0.694980694980695, + "grad_norm": 0.6675525307655334, + "learning_rate": 2.583011583011583e-05, + "loss": 0.07, "step": 2340 }, { - "epoch": 0.17451358978167236, - "grad_norm": 1.0061966180801392, - "learning_rate": 2.8952918461309967e-05, - "loss": 0.0995, + "epoch": 0.697950697950698, + "grad_norm": 1.0296847820281982, + "learning_rate": 2.5812295812295814e-05, + "loss": 0.0667, "step": 2350 }, { - "epoch": 0.1752562008020199, - "grad_norm": 4.665594100952148, - "learning_rate": 2.8948462795187882e-05, - "loss": 0.1148, + "epoch": 0.7009207009207009, + "grad_norm": 0.7614896893501282, + "learning_rate": 2.5794475794475796e-05, + "loss": 0.0598, "step": 2360 }, { - "epoch": 0.17599881182236746, - "grad_norm": 2.1051509380340576, - "learning_rate": 2.8944007129065793e-05, - "loss": 0.0832, + "epoch": 0.7038907038907039, + "grad_norm": 0.3479407727718353, + "learning_rate": 2.577665577665578e-05, + "loss": 0.073, "step": 2370 }, { - "epoch": 0.176741422842715, - "grad_norm": 4.589431285858154, - "learning_rate": 2.8939551462943712e-05, - "loss": 0.072, + "epoch": 0.7068607068607069, + "grad_norm": 0.906836986541748, + "learning_rate": 2.575883575883576e-05, + "loss": 0.066, "step": 2380 }, { - "epoch": 0.17748403386306252, - "grad_norm": 5.031434059143066, - "learning_rate": 2.8935095796821627e-05, - "loss": 0.1275, + "epoch": 0.7098307098307098, + "grad_norm": 1.1523551940917969, + "learning_rate": 2.574101574101574e-05, + "loss": 0.0752, "step": 2390 }, { - "epoch": 0.17822664488341006, - "grad_norm": 1.6660507917404175, - "learning_rate": 2.893064013069954e-05, - "loss": 0.1189, + "epoch": 0.7128007128007128, + "grad_norm": 1.5044126510620117, + "learning_rate": 2.572319572319572e-05, + "loss": 0.0629, "step": 2400 }, { - "epoch": 0.17896925590375762, - "grad_norm": 2.1114559173583984, - "learning_rate": 2.8926184464577457e-05, - "loss": 0.09, + "epoch": 0.7157707157707157, + "grad_norm": 0.7415446639060974, + "learning_rate": 2.5705375705375707e-05, + "loss": 0.0851, "step": 2410 }, { - "epoch": 0.17971186692410515, - "grad_norm": 1.1121262311935425, - "learning_rate": 2.8921728798455372e-05, - "loss": 0.0799, + "epoch": 0.7187407187407188, + "grad_norm": 0.9454631805419922, + "learning_rate": 2.568755568755569e-05, + "loss": 0.0818, "step": 2420 }, { - "epoch": 0.18045447794445268, - "grad_norm": 2.8174080848693848, - "learning_rate": 2.8917273132333283e-05, - "loss": 0.0926, + "epoch": 0.7217107217107217, + "grad_norm": 1.3333286046981812, + "learning_rate": 2.566973566973567e-05, + "loss": 0.0781, "step": 2430 }, { - "epoch": 0.18119708896480025, - "grad_norm": 3.2218480110168457, - "learning_rate": 2.89128174662112e-05, - "loss": 0.0758, + "epoch": 0.7246807246807246, + "grad_norm": 1.030158519744873, + "learning_rate": 2.5651915651915654e-05, + "loss": 0.0752, "step": 2440 }, { - "epoch": 0.18193969998514778, - "grad_norm": 1.7610548734664917, - "learning_rate": 2.8908361800089113e-05, - "loss": 0.1052, + "epoch": 0.7276507276507277, + "grad_norm": 0.9422637820243835, + "learning_rate": 2.5634095634095636e-05, + "loss": 0.0779, "step": 2450 }, { - "epoch": 0.1826823110054953, - "grad_norm": 2.7015151977539062, - "learning_rate": 2.890390613396703e-05, - "loss": 0.0857, + "epoch": 0.7306207306207306, + "grad_norm": 0.8229959011077881, + "learning_rate": 2.5616275616275615e-05, + "loss": 0.0644, "step": 2460 }, { - "epoch": 0.18342492202584287, - "grad_norm": 1.8576743602752686, - "learning_rate": 2.8899450467844943e-05, - "loss": 0.0653, + "epoch": 0.7335907335907336, + "grad_norm": 0.7680384516716003, + "learning_rate": 2.5598455598455597e-05, + "loss": 0.0717, "step": 2470 }, { - "epoch": 0.1841675330461904, - "grad_norm": 5.928577423095703, - "learning_rate": 2.889499480172286e-05, - "loss": 0.1243, + "epoch": 0.7365607365607365, + "grad_norm": 1.118505835533142, + "learning_rate": 2.5580635580635582e-05, + "loss": 0.0777, "step": 2480 }, { - "epoch": 0.18491014406653794, - "grad_norm": 2.597346544265747, - "learning_rate": 2.8890539135600773e-05, - "loss": 0.1309, + "epoch": 0.7395307395307396, + "grad_norm": 0.7873930335044861, + "learning_rate": 2.5562815562815565e-05, + "loss": 0.0843, "step": 2490 }, { - "epoch": 0.1856527550868855, - "grad_norm": 3.324141263961792, - "learning_rate": 2.888608346947869e-05, - "loss": 0.0892, + "epoch": 0.7425007425007425, + "grad_norm": 0.688714861869812, + "learning_rate": 2.5544995544995547e-05, + "loss": 0.0664, "step": 2500 }, { - "epoch": 0.18639536610723303, - "grad_norm": 2.4857001304626465, - "learning_rate": 2.8881627803356603e-05, - "loss": 0.0984, + "epoch": 0.7454707454707454, + "grad_norm": 0.6850858330726624, + "learning_rate": 2.552717552717553e-05, + "loss": 0.0715, "step": 2510 }, { - "epoch": 0.18713797712758057, - "grad_norm": 2.5961930751800537, - "learning_rate": 2.887717213723452e-05, - "loss": 0.091, + "epoch": 0.7484407484407485, + "grad_norm": 0.71670001745224, + "learning_rate": 2.550935550935551e-05, + "loss": 0.0579, "step": 2520 }, { - "epoch": 0.1878805881479281, - "grad_norm": 0.8424578309059143, - "learning_rate": 2.8872716471112433e-05, - "loss": 0.1048, + "epoch": 0.7514107514107514, + "grad_norm": 0.8807306885719299, + "learning_rate": 2.549153549153549e-05, + "loss": 0.0568, "step": 2530 }, { - "epoch": 0.18862319916827566, - "grad_norm": 1.7092845439910889, - "learning_rate": 2.8868260804990345e-05, - "loss": 0.0916, + "epoch": 0.7543807543807544, + "grad_norm": 1.108122706413269, + "learning_rate": 2.5473715473715472e-05, + "loss": 0.0726, "step": 2540 }, { - "epoch": 0.1893658101886232, - "grad_norm": 1.8642319440841675, - "learning_rate": 2.8863805138868263e-05, - "loss": 0.0953, + "epoch": 0.7573507573507573, + "grad_norm": 0.91880863904953, + "learning_rate": 2.5455895455895458e-05, + "loss": 0.0697, "step": 2550 }, { - "epoch": 0.19010842120897073, - "grad_norm": 3.4981400966644287, - "learning_rate": 2.8859349472746175e-05, - "loss": 0.088, + "epoch": 0.7603207603207603, + "grad_norm": 1.2890892028808594, + "learning_rate": 2.543807543807544e-05, + "loss": 0.0742, "step": 2560 }, { - "epoch": 0.1908510322293183, - "grad_norm": 4.905360221862793, - "learning_rate": 2.885489380662409e-05, - "loss": 0.102, + "epoch": 0.7632907632907633, + "grad_norm": 1.3207858800888062, + "learning_rate": 2.5420255420255422e-05, + "loss": 0.0602, "step": 2570 }, { - "epoch": 0.19159364324966582, - "grad_norm": 3.5098886489868164, - "learning_rate": 2.8850438140502008e-05, - "loss": 0.116, + "epoch": 0.7662607662607662, + "grad_norm": 1.0641006231307983, + "learning_rate": 2.5402435402435404e-05, + "loss": 0.0783, "step": 2580 }, { - "epoch": 0.19233625427001336, - "grad_norm": 2.462068557739258, - "learning_rate": 2.884598247437992e-05, - "loss": 0.0878, + "epoch": 0.7692307692307693, + "grad_norm": 0.7104412317276001, + "learning_rate": 2.5384615384615386e-05, + "loss": 0.0622, "step": 2590 }, { - "epoch": 0.19307886529036092, - "grad_norm": 1.3594739437103271, - "learning_rate": 2.8841526808257835e-05, - "loss": 0.0978, + "epoch": 0.7722007722007722, + "grad_norm": 1.2291561365127563, + "learning_rate": 2.5366795366795365e-05, + "loss": 0.0789, "step": 2600 }, { - "epoch": 0.19382147631070845, - "grad_norm": 0.8977119326591492, - "learning_rate": 2.883707114213575e-05, - "loss": 0.1115, + "epoch": 0.7751707751707752, + "grad_norm": 0.9593638181686401, + "learning_rate": 2.5348975348975347e-05, + "loss": 0.0604, "step": 2610 }, { - "epoch": 0.19456408733105598, - "grad_norm": 4.278836727142334, - "learning_rate": 2.8832615476013665e-05, - "loss": 0.0873, + "epoch": 0.7781407781407781, + "grad_norm": 0.7314882278442383, + "learning_rate": 2.5331155331155333e-05, + "loss": 0.0679, "step": 2620 }, { - "epoch": 0.19530669835140355, - "grad_norm": 2.4040420055389404, - "learning_rate": 2.882815980989158e-05, - "loss": 0.1154, + "epoch": 0.7811107811107811, + "grad_norm": 1.0600396394729614, + "learning_rate": 2.5313335313335315e-05, + "loss": 0.0723, "step": 2630 }, { - "epoch": 0.19604930937175108, - "grad_norm": 3.2387709617614746, - "learning_rate": 2.8823704143769495e-05, - "loss": 0.1202, + "epoch": 0.7840807840807841, + "grad_norm": 0.8725117444992065, + "learning_rate": 2.5295515295515297e-05, + "loss": 0.0627, "step": 2640 }, { - "epoch": 0.1967919203920986, - "grad_norm": 1.2771217823028564, - "learning_rate": 2.881924847764741e-05, - "loss": 0.1064, + "epoch": 0.787050787050787, + "grad_norm": 0.7169741988182068, + "learning_rate": 2.527769527769528e-05, + "loss": 0.0674, "step": 2650 }, { - "epoch": 0.19753453141244617, - "grad_norm": 4.477030277252197, - "learning_rate": 2.8814792811525325e-05, - "loss": 0.0813, + "epoch": 0.7900207900207901, + "grad_norm": 0.8746957182884216, + "learning_rate": 2.525987525987526e-05, + "loss": 0.0729, "step": 2660 }, { - "epoch": 0.1982771424327937, - "grad_norm": 2.6116533279418945, - "learning_rate": 2.8810337145403236e-05, - "loss": 0.1133, + "epoch": 0.792990792990793, + "grad_norm": 0.5545559525489807, + "learning_rate": 2.524205524205524e-05, + "loss": 0.0829, "step": 2670 }, { - "epoch": 0.19901975345314124, - "grad_norm": 2.1124253273010254, - "learning_rate": 2.880588147928115e-05, - "loss": 0.1134, + "epoch": 0.7959607959607959, + "grad_norm": 1.095009207725525, + "learning_rate": 2.5224235224235222e-05, + "loss": 0.0705, "step": 2680 }, { - "epoch": 0.19976236447348877, - "grad_norm": 2.3649754524230957, - "learning_rate": 2.880142581315907e-05, - "loss": 0.1062, + "epoch": 0.7989307989307989, + "grad_norm": 1.138685941696167, + "learning_rate": 2.5206415206415208e-05, + "loss": 0.0683, "step": 2690 }, { - "epoch": 0.20050497549383633, - "grad_norm": 2.6647801399230957, - "learning_rate": 2.879697014703698e-05, - "loss": 0.1092, + "epoch": 0.8019008019008019, + "grad_norm": 0.8513908386230469, + "learning_rate": 2.518859518859519e-05, + "loss": 0.0721, "step": 2700 }, { - "epoch": 0.20124758651418387, - "grad_norm": 3.3392791748046875, - "learning_rate": 2.8792514480914896e-05, - "loss": 0.1107, + "epoch": 0.8048708048708049, + "grad_norm": 0.6944445967674255, + "learning_rate": 2.5170775170775172e-05, + "loss": 0.0666, "step": 2710 }, { - "epoch": 0.2019901975345314, - "grad_norm": 2.0530688762664795, - "learning_rate": 2.8788058814792815e-05, - "loss": 0.1148, + "epoch": 0.8078408078408078, + "grad_norm": 0.7438215613365173, + "learning_rate": 2.5152955152955155e-05, + "loss": 0.0587, "step": 2720 }, { - "epoch": 0.20273280855487896, - "grad_norm": 2.3883824348449707, - "learning_rate": 2.8783603148670726e-05, - "loss": 0.0924, + "epoch": 0.8108108108108109, + "grad_norm": 1.1805411577224731, + "learning_rate": 2.5135135135135137e-05, + "loss": 0.0701, "step": 2730 }, { - "epoch": 0.2034754195752265, - "grad_norm": 1.486218810081482, - "learning_rate": 2.877914748254864e-05, - "loss": 0.0943, + "epoch": 0.8137808137808138, + "grad_norm": 0.8039736151695251, + "learning_rate": 2.511731511731512e-05, + "loss": 0.0611, "step": 2740 }, { - "epoch": 0.20421803059557403, - "grad_norm": 2.0853097438812256, - "learning_rate": 2.8774691816426556e-05, - "loss": 0.1223, + "epoch": 0.8167508167508167, + "grad_norm": 0.9000430107116699, + "learning_rate": 2.5099495099495098e-05, + "loss": 0.076, "step": 2750 }, { - "epoch": 0.2049606416159216, - "grad_norm": 3.1080849170684814, - "learning_rate": 2.877023615030447e-05, - "loss": 0.0905, + "epoch": 0.8197208197208197, + "grad_norm": 1.0777641534805298, + "learning_rate": 2.5081675081675083e-05, + "loss": 0.0683, "step": 2760 }, { - "epoch": 0.20570325263626912, - "grad_norm": 1.9018908739089966, - "learning_rate": 2.8765780484182386e-05, - "loss": 0.0928, + "epoch": 0.8226908226908227, + "grad_norm": 1.0703097581863403, + "learning_rate": 2.5063855063855065e-05, + "loss": 0.0684, "step": 2770 }, { - "epoch": 0.20644586365661666, - "grad_norm": 2.179426908493042, - "learning_rate": 2.8761324818060298e-05, - "loss": 0.1011, + "epoch": 0.8256608256608257, + "grad_norm": 1.3522082567214966, + "learning_rate": 2.5046035046035048e-05, + "loss": 0.0687, "step": 2780 }, { - "epoch": 0.20718847467696422, - "grad_norm": 1.9516263008117676, - "learning_rate": 2.8756869151938216e-05, - "loss": 0.1033, + "epoch": 0.8286308286308286, + "grad_norm": 0.916243314743042, + "learning_rate": 2.502821502821503e-05, + "loss": 0.071, "step": 2790 }, { - "epoch": 0.20793108569731175, - "grad_norm": 2.347296953201294, - "learning_rate": 2.875241348581613e-05, - "loss": 0.104, + "epoch": 0.8316008316008316, + "grad_norm": 0.844256579875946, + "learning_rate": 2.5010395010395012e-05, + "loss": 0.0819, "step": 2800 }, { - "epoch": 0.20867369671765928, - "grad_norm": 2.022731304168701, - "learning_rate": 2.8747957819694043e-05, - "loss": 0.0947, + "epoch": 0.8345708345708346, + "grad_norm": 0.5879322290420532, + "learning_rate": 2.4992574992574994e-05, + "loss": 0.0723, "step": 2810 }, { - "epoch": 0.20941630773800682, - "grad_norm": 1.8994909524917603, - "learning_rate": 2.874350215357196e-05, - "loss": 0.0827, + "epoch": 0.8375408375408375, + "grad_norm": 1.0017356872558594, + "learning_rate": 2.4974754974754973e-05, + "loss": 0.0723, "step": 2820 }, { - "epoch": 0.21015891875835438, - "grad_norm": 1.9812676906585693, - "learning_rate": 2.8739046487449876e-05, - "loss": 0.0721, + "epoch": 0.8405108405108405, + "grad_norm": 1.0885043144226074, + "learning_rate": 2.495693495693496e-05, + "loss": 0.0607, "step": 2830 }, { - "epoch": 0.2109015297787019, - "grad_norm": 0.4040673077106476, - "learning_rate": 2.8734590821327788e-05, - "loss": 0.0885, + "epoch": 0.8434808434808435, + "grad_norm": 0.7798848748207092, + "learning_rate": 2.493911493911494e-05, + "loss": 0.057, "step": 2840 }, { - "epoch": 0.21164414079904945, - "grad_norm": 3.147190570831299, - "learning_rate": 2.8730135155205703e-05, - "loss": 0.1106, + "epoch": 0.8464508464508465, + "grad_norm": 1.2266145944595337, + "learning_rate": 2.4921294921294923e-05, + "loss": 0.0785, "step": 2850 }, { - "epoch": 0.212386751819397, - "grad_norm": 2.5040011405944824, - "learning_rate": 2.8725679489083618e-05, - "loss": 0.1102, + "epoch": 0.8494208494208494, + "grad_norm": 1.3794151544570923, + "learning_rate": 2.4903474903474905e-05, + "loss": 0.0711, "step": 2860 }, { - "epoch": 0.21312936283974454, - "grad_norm": 2.1592671871185303, - "learning_rate": 2.8721223822961533e-05, - "loss": 0.0697, + "epoch": 0.8523908523908524, + "grad_norm": 1.2152888774871826, + "learning_rate": 2.4885654885654887e-05, + "loss": 0.0783, "step": 2870 }, { - "epoch": 0.21387197386009207, - "grad_norm": 2.1262803077697754, - "learning_rate": 2.8716768156839448e-05, - "loss": 0.1251, + "epoch": 0.8553608553608554, + "grad_norm": 0.5685151219367981, + "learning_rate": 2.486783486783487e-05, + "loss": 0.0818, "step": 2880 }, { - "epoch": 0.21461458488043963, - "grad_norm": 6.860218524932861, - "learning_rate": 2.8712312490717363e-05, - "loss": 0.0852, + "epoch": 0.8583308583308583, + "grad_norm": 1.0047805309295654, + "learning_rate": 2.4850014850014848e-05, + "loss": 0.0768, "step": 2890 }, { - "epoch": 0.21535719590078717, - "grad_norm": 3.187988758087158, - "learning_rate": 2.8707856824595278e-05, - "loss": 0.112, + "epoch": 0.8613008613008613, + "grad_norm": 0.4588945806026459, + "learning_rate": 2.4832194832194834e-05, + "loss": 0.0616, "step": 2900 }, { - "epoch": 0.2160998069211347, - "grad_norm": 2.9651613235473633, - "learning_rate": 2.8703401158473193e-05, - "loss": 0.0847, + "epoch": 0.8642708642708643, + "grad_norm": 0.815482497215271, + "learning_rate": 2.4814374814374816e-05, + "loss": 0.0734, "step": 2910 }, { - "epoch": 0.21684241794148226, - "grad_norm": 1.7240506410598755, - "learning_rate": 2.8698945492351108e-05, - "loss": 0.0983, + "epoch": 0.8672408672408672, + "grad_norm": 1.1462020874023438, + "learning_rate": 2.4796554796554798e-05, + "loss": 0.0716, "step": 2920 }, { - "epoch": 0.2175850289618298, - "grad_norm": 3.074819803237915, - "learning_rate": 2.8694489826229023e-05, - "loss": 0.0944, + "epoch": 0.8702108702108702, + "grad_norm": 1.8708317279815674, + "learning_rate": 2.477873477873478e-05, + "loss": 0.0653, "step": 2930 }, { - "epoch": 0.21832763998217733, - "grad_norm": 4.255871772766113, - "learning_rate": 2.8690034160106938e-05, - "loss": 0.1251, + "epoch": 0.8731808731808732, + "grad_norm": 0.5544032454490662, + "learning_rate": 2.4760914760914762e-05, + "loss": 0.07, "step": 2940 }, { - "epoch": 0.2190702510025249, - "grad_norm": 2.6262733936309814, - "learning_rate": 2.868557849398485e-05, - "loss": 0.0804, + "epoch": 0.8761508761508762, + "grad_norm": 0.7917482256889343, + "learning_rate": 2.4743094743094744e-05, + "loss": 0.0656, "step": 2950 }, { - "epoch": 0.21981286202287242, - "grad_norm": 1.9793500900268555, - "learning_rate": 2.8681122827862768e-05, - "loss": 0.1058, + "epoch": 0.8791208791208791, + "grad_norm": 1.3552825450897217, + "learning_rate": 2.4725274725274723e-05, + "loss": 0.064, "step": 2960 }, { - "epoch": 0.22055547304321996, - "grad_norm": 1.1691769361495972, - "learning_rate": 2.867666716174068e-05, - "loss": 0.0838, + "epoch": 0.882090882090882, + "grad_norm": 0.7009034752845764, + "learning_rate": 2.470745470745471e-05, + "loss": 0.0707, "step": 2970 }, { - "epoch": 0.2212980840635675, - "grad_norm": 2.1811420917510986, - "learning_rate": 2.8672211495618594e-05, - "loss": 0.1095, + "epoch": 0.8850608850608851, + "grad_norm": 0.835366427898407, + "learning_rate": 2.468963468963469e-05, + "loss": 0.0724, "step": 2980 }, { - "epoch": 0.22204069508391505, - "grad_norm": 2.311396360397339, - "learning_rate": 2.8667755829496513e-05, - "loss": 0.1164, + "epoch": 0.888030888030888, + "grad_norm": 0.8953189253807068, + "learning_rate": 2.4671814671814673e-05, + "loss": 0.0692, "step": 2990 }, { - "epoch": 0.22278330610426259, - "grad_norm": 5.444539546966553, - "learning_rate": 2.8663300163374424e-05, - "loss": 0.1294, + "epoch": 0.891000891000891, + "grad_norm": 1.1693567037582397, + "learning_rate": 2.4653994653994655e-05, + "loss": 0.0737, "step": 3000 }, { - "epoch": 0.22352591712461012, - "grad_norm": 1.1934783458709717, - "learning_rate": 2.865884449725234e-05, - "loss": 0.1179, + "epoch": 0.893970893970894, + "grad_norm": 0.7871369123458862, + "learning_rate": 2.4636174636174637e-05, + "loss": 0.0812, "step": 3010 }, { - "epoch": 0.22426852814495768, - "grad_norm": 1.7925602197647095, - "learning_rate": 2.8654388831130254e-05, - "loss": 0.144, + "epoch": 0.896940896940897, + "grad_norm": 0.7140387296676636, + "learning_rate": 2.461835461835462e-05, + "loss": 0.0703, "step": 3020 }, { - "epoch": 0.2250111391653052, - "grad_norm": 4.332716941833496, - "learning_rate": 2.864993316500817e-05, - "loss": 0.111, + "epoch": 0.8999108999108999, + "grad_norm": 0.7188717126846313, + "learning_rate": 2.46005346005346e-05, + "loss": 0.062, "step": 3030 }, { - "epoch": 0.22575375018565275, - "grad_norm": 3.0859615802764893, - "learning_rate": 2.8645477498886084e-05, - "loss": 0.0978, + "epoch": 0.9028809028809028, + "grad_norm": 0.5643990635871887, + "learning_rate": 2.4582714582714584e-05, + "loss": 0.066, "step": 3040 }, { - "epoch": 0.2264963612060003, - "grad_norm": 2.46098256111145, - "learning_rate": 2.8641021832764e-05, - "loss": 0.0836, + "epoch": 0.9058509058509059, + "grad_norm": 0.8118051886558533, + "learning_rate": 2.4564894564894566e-05, + "loss": 0.0777, "step": 3050 }, { - "epoch": 0.22723897222634784, - "grad_norm": 1.820902705192566, - "learning_rate": 2.8636566166641914e-05, - "loss": 0.0985, + "epoch": 0.9088209088209088, + "grad_norm": 0.5903987884521484, + "learning_rate": 2.454707454707455e-05, + "loss": 0.0691, "step": 3060 }, { - "epoch": 0.22798158324669537, - "grad_norm": 2.86248517036438, - "learning_rate": 2.863211050051983e-05, - "loss": 0.1093, + "epoch": 0.9117909117909118, + "grad_norm": 0.4959113299846649, + "learning_rate": 2.452925452925453e-05, + "loss": 0.0706, "step": 3070 }, { - "epoch": 0.22872419426704294, - "grad_norm": 2.933708429336548, - "learning_rate": 2.862765483439774e-05, - "loss": 0.0901, + "epoch": 0.9147609147609148, + "grad_norm": 0.6881820559501648, + "learning_rate": 2.4511434511434513e-05, + "loss": 0.0793, "step": 3080 }, { - "epoch": 0.22946680528739047, - "grad_norm": 2.0867459774017334, - "learning_rate": 2.862319916827566e-05, - "loss": 0.0686, + "epoch": 0.9177309177309178, + "grad_norm": 1.0045626163482666, + "learning_rate": 2.4493614493614495e-05, + "loss": 0.0669, "step": 3090 }, { - "epoch": 0.230209416307738, - "grad_norm": 2.3671841621398926, - "learning_rate": 2.8618743502153574e-05, - "loss": 0.118, + "epoch": 0.9207009207009207, + "grad_norm": 0.6501134037971497, + "learning_rate": 2.4475794475794474e-05, + "loss": 0.0618, "step": 3100 }, { - "epoch": 0.23095202732808554, - "grad_norm": 1.118376612663269, - "learning_rate": 2.8614287836031486e-05, - "loss": 0.0853, + "epoch": 0.9236709236709236, + "grad_norm": 0.7777647972106934, + "learning_rate": 2.445797445797446e-05, + "loss": 0.0802, "step": 3110 }, { - "epoch": 0.2316946383484331, - "grad_norm": 3.297832727432251, - "learning_rate": 2.86098321699094e-05, - "loss": 0.0996, + "epoch": 0.9266409266409267, + "grad_norm": 0.9002485275268555, + "learning_rate": 2.444015444015444e-05, + "loss": 0.0686, "step": 3120 }, { - "epoch": 0.23243724936878063, - "grad_norm": 2.1501147747039795, - "learning_rate": 2.860537650378732e-05, - "loss": 0.1381, + "epoch": 0.9296109296109296, + "grad_norm": 0.6177936792373657, + "learning_rate": 2.4422334422334424e-05, + "loss": 0.0688, "step": 3130 }, { - "epoch": 0.23317986038912816, - "grad_norm": 0.9489710927009583, - "learning_rate": 2.860092083766523e-05, - "loss": 0.0692, + "epoch": 0.9325809325809326, + "grad_norm": 0.6512916088104248, + "learning_rate": 2.4404514404514406e-05, + "loss": 0.068, "step": 3140 }, { - "epoch": 0.23392247140947572, - "grad_norm": 2.0320940017700195, - "learning_rate": 2.8596465171543146e-05, - "loss": 0.0855, + "epoch": 0.9355509355509356, + "grad_norm": 0.670874297618866, + "learning_rate": 2.4386694386694388e-05, + "loss": 0.0804, "step": 3150 }, { - "epoch": 0.23466508242982326, - "grad_norm": 2.169110059738159, - "learning_rate": 2.859200950542106e-05, - "loss": 0.0809, + "epoch": 0.9385209385209385, + "grad_norm": 0.9932950139045715, + "learning_rate": 2.436887436887437e-05, + "loss": 0.0639, "step": 3160 }, { - "epoch": 0.2354076934501708, - "grad_norm": 3.284989595413208, - "learning_rate": 2.8587553839298976e-05, - "loss": 0.1028, + "epoch": 0.9414909414909415, + "grad_norm": 0.8478681445121765, + "learning_rate": 2.435105435105435e-05, + "loss": 0.0617, "step": 3170 }, { - "epoch": 0.23615030447051835, - "grad_norm": 2.6544220447540283, - "learning_rate": 2.858309817317689e-05, - "loss": 0.115, + "epoch": 0.9444609444609444, + "grad_norm": 0.6210806369781494, + "learning_rate": 2.4333234333234334e-05, + "loss": 0.0651, "step": 3180 }, { - "epoch": 0.23689291549086589, - "grad_norm": 1.7478609085083008, - "learning_rate": 2.8578642507054803e-05, - "loss": 0.083, + "epoch": 0.9474309474309475, + "grad_norm": 0.8721588850021362, + "learning_rate": 2.4315414315414317e-05, + "loss": 0.0849, "step": 3190 }, { - "epoch": 0.23763552651121342, - "grad_norm": 2.0759472846984863, - "learning_rate": 2.857418684093272e-05, - "loss": 0.1224, + "epoch": 0.9504009504009504, + "grad_norm": 0.5963863730430603, + "learning_rate": 2.42975942975943e-05, + "loss": 0.063, "step": 3200 }, { - "epoch": 0.23837813753156098, - "grad_norm": 2.7815895080566406, - "learning_rate": 2.8569731174810636e-05, - "loss": 0.1166, + "epoch": 0.9533709533709533, + "grad_norm": 1.1241910457611084, + "learning_rate": 2.427977427977428e-05, + "loss": 0.0669, "step": 3210 }, { - "epoch": 0.2391207485519085, - "grad_norm": 3.542616367340088, - "learning_rate": 2.8565275508688548e-05, - "loss": 0.1009, + "epoch": 0.9563409563409564, + "grad_norm": 1.101955771446228, + "learning_rate": 2.4261954261954263e-05, + "loss": 0.067, "step": 3220 }, { - "epoch": 0.23986335957225605, - "grad_norm": 1.8111937046051025, - "learning_rate": 2.8560819842566466e-05, - "loss": 0.099, + "epoch": 0.9593109593109593, + "grad_norm": 1.1508077383041382, + "learning_rate": 2.4244134244134245e-05, + "loss": 0.0666, "step": 3230 }, { - "epoch": 0.2406059705926036, - "grad_norm": 1.9494497776031494, - "learning_rate": 2.855636417644438e-05, - "loss": 0.0787, + "epoch": 0.9622809622809623, + "grad_norm": 0.8571543097496033, + "learning_rate": 2.4226314226314224e-05, + "loss": 0.0811, "step": 3240 }, { - "epoch": 0.24134858161295114, - "grad_norm": 1.57643461227417, - "learning_rate": 2.8551908510322293e-05, - "loss": 0.0731, + "epoch": 0.9652509652509652, + "grad_norm": 0.7995705008506775, + "learning_rate": 2.420849420849421e-05, + "loss": 0.0602, "step": 3250 }, { - "epoch": 0.24209119263329867, - "grad_norm": 1.140007495880127, - "learning_rate": 2.8547452844200208e-05, - "loss": 0.0824, + "epoch": 0.9682209682209683, + "grad_norm": 0.6367846727371216, + "learning_rate": 2.4190674190674192e-05, + "loss": 0.0635, "step": 3260 }, { - "epoch": 0.2428338036536462, - "grad_norm": 4.138311386108398, - "learning_rate": 2.8542997178078123e-05, - "loss": 0.1063, + "epoch": 0.9711909711909712, + "grad_norm": 0.6590407490730286, + "learning_rate": 2.4172854172854174e-05, + "loss": 0.0599, "step": 3270 }, { - "epoch": 0.24357641467399377, - "grad_norm": 3.1349868774414062, - "learning_rate": 2.8538541511956038e-05, - "loss": 0.1078, + "epoch": 0.9741609741609741, + "grad_norm": 1.3518911600112915, + "learning_rate": 2.4155034155034156e-05, + "loss": 0.0816, "step": 3280 }, { - "epoch": 0.2443190256943413, - "grad_norm": 1.922900676727295, - "learning_rate": 2.8534085845833953e-05, - "loss": 0.0801, + "epoch": 0.9771309771309772, + "grad_norm": 0.7235366106033325, + "learning_rate": 2.4137214137214138e-05, + "loss": 0.0585, "step": 3290 }, { - "epoch": 0.24506163671468884, - "grad_norm": 3.57891583442688, - "learning_rate": 2.8529630179711868e-05, - "loss": 0.0956, + "epoch": 0.9801009801009801, + "grad_norm": 0.7149487733840942, + "learning_rate": 2.411939411939412e-05, + "loss": 0.0611, "step": 3300 }, { - "epoch": 0.2458042477350364, - "grad_norm": 1.5893707275390625, - "learning_rate": 2.8525174513589783e-05, - "loss": 0.0762, + "epoch": 0.9830709830709831, + "grad_norm": 0.5758341550827026, + "learning_rate": 2.4101574101574103e-05, + "loss": 0.0575, "step": 3310 }, { - "epoch": 0.24654685875538393, - "grad_norm": 4.745431423187256, - "learning_rate": 2.8520718847467698e-05, - "loss": 0.0812, + "epoch": 0.986040986040986, + "grad_norm": 0.6312692761421204, + "learning_rate": 2.4083754083754085e-05, + "loss": 0.0696, "step": 3320 }, { - "epoch": 0.24728946977573146, - "grad_norm": 1.915309190750122, - "learning_rate": 2.8516263181345613e-05, - "loss": 0.0879, + "epoch": 0.989010989010989, + "grad_norm": 0.6071924567222595, + "learning_rate": 2.4065934065934067e-05, + "loss": 0.0736, "step": 3330 }, { - "epoch": 0.24803208079607902, - "grad_norm": 2.0146467685699463, - "learning_rate": 2.8511807515223528e-05, - "loss": 0.1033, + "epoch": 0.991980991980992, + "grad_norm": 1.0844005346298218, + "learning_rate": 2.404811404811405e-05, + "loss": 0.0608, "step": 3340 }, { - "epoch": 0.24877469181642656, - "grad_norm": 1.623887300491333, - "learning_rate": 2.8507351849101443e-05, - "loss": 0.0813, + "epoch": 0.9949509949509949, + "grad_norm": 0.7785583734512329, + "learning_rate": 2.403029403029403e-05, + "loss": 0.0737, "step": 3350 }, { - "epoch": 0.2495173028367741, - "grad_norm": 0.7771584987640381, - "learning_rate": 2.8502896182979354e-05, - "loss": 0.0881, + "epoch": 0.997920997920998, + "grad_norm": 0.7778565287590027, + "learning_rate": 2.4012474012474013e-05, + "loss": 0.0584, "step": 3360 }, { - "epoch": 0.25025991385712165, - "grad_norm": 2.778308868408203, - "learning_rate": 2.8498440516857272e-05, - "loss": 0.0786, + "epoch": 1.0, + "eval_f1": 0.33031292965957215, + "eval_loss": 0.05296385660767555, + "eval_runtime": 760.7693, + "eval_samples_per_second": 49.974, + "eval_steps_per_second": 0.782, + "step": 3367 + }, + { + "epoch": 1.0008910008910008, + "grad_norm": 0.5195960402488708, + "learning_rate": 2.3994653994653996e-05, + "loss": 0.0643, "step": 3370 }, { - "epoch": 0.25100252487746916, - "grad_norm": 1.1731817722320557, - "learning_rate": 2.8493984850735184e-05, - "loss": 0.1235, + "epoch": 1.0038610038610039, + "grad_norm": 1.0091626644134521, + "learning_rate": 2.3976833976833978e-05, + "loss": 0.0659, "step": 3380 }, { - "epoch": 0.2517451358978167, - "grad_norm": 2.099097967147827, - "learning_rate": 2.84895291846131e-05, - "loss": 0.11, + "epoch": 1.006831006831007, + "grad_norm": 0.6962494254112244, + "learning_rate": 2.395901395901396e-05, + "loss": 0.0741, "step": 3390 }, { - "epoch": 0.2524877469181643, - "grad_norm": 1.712109923362732, - "learning_rate": 2.8485073518491017e-05, - "loss": 0.0873, + "epoch": 1.0098010098010097, + "grad_norm": 0.5195145010948181, + "learning_rate": 2.3941193941193942e-05, + "loss": 0.0661, "step": 3400 }, { - "epoch": 0.2532303579385118, - "grad_norm": 1.978943943977356, - "learning_rate": 2.848061785236893e-05, - "loss": 0.0906, + "epoch": 1.0127710127710128, + "grad_norm": 0.40490421652793884, + "learning_rate": 2.3923373923373924e-05, + "loss": 0.0667, "step": 3410 }, { - "epoch": 0.25397296895885935, - "grad_norm": 2.0468902587890625, - "learning_rate": 2.8476162186246844e-05, - "loss": 0.1062, + "epoch": 1.0157410157410158, + "grad_norm": 0.8673891425132751, + "learning_rate": 2.3905553905553906e-05, + "loss": 0.0648, "step": 3420 }, { - "epoch": 0.2547155799792069, - "grad_norm": 1.208884358406067, - "learning_rate": 2.847170652012476e-05, - "loss": 0.1014, + "epoch": 1.0187110187110187, + "grad_norm": 0.6482574343681335, + "learning_rate": 2.388773388773389e-05, + "loss": 0.0683, "step": 3430 }, { - "epoch": 0.2554581909995544, - "grad_norm": 2.65171217918396, - "learning_rate": 2.8467250854002674e-05, - "loss": 0.1386, + "epoch": 1.0216810216810217, + "grad_norm": 1.0671459436416626, + "learning_rate": 2.386991386991387e-05, + "loss": 0.0731, "step": 3440 }, { - "epoch": 0.256200802019902, - "grad_norm": 1.2456876039505005, - "learning_rate": 2.846279518788059e-05, - "loss": 0.0859, + "epoch": 1.0246510246510248, + "grad_norm": 1.1997050046920776, + "learning_rate": 2.3852093852093853e-05, + "loss": 0.0824, "step": 3450 }, { - "epoch": 0.25694341304024954, - "grad_norm": 1.7401740550994873, - "learning_rate": 2.8458339521758504e-05, - "loss": 0.1033, + "epoch": 1.0276210276210276, + "grad_norm": 1.1399210691452026, + "learning_rate": 2.3834273834273835e-05, + "loss": 0.0631, "step": 3460 }, { - "epoch": 0.25768602406059704, - "grad_norm": 5.769093990325928, - "learning_rate": 2.845388385563642e-05, - "loss": 0.1029, + "epoch": 1.0305910305910306, + "grad_norm": 1.4414496421813965, + "learning_rate": 2.3816453816453817e-05, + "loss": 0.0644, "step": 3470 }, { - "epoch": 0.2584286350809446, - "grad_norm": 2.1862595081329346, - "learning_rate": 2.8449428189514334e-05, - "loss": 0.082, + "epoch": 1.0335610335610335, + "grad_norm": 0.7612940669059753, + "learning_rate": 2.37986337986338e-05, + "loss": 0.0608, "step": 3480 }, { - "epoch": 0.25917124610129216, - "grad_norm": 4.852025985717773, - "learning_rate": 2.8444972523392246e-05, - "loss": 0.0956, + "epoch": 1.0365310365310365, + "grad_norm": 1.8347103595733643, + "learning_rate": 2.378081378081378e-05, + "loss": 0.0853, "step": 3490 }, { - "epoch": 0.25991385712163967, - "grad_norm": 2.4434781074523926, - "learning_rate": 2.8440516857270164e-05, - "loss": 0.108, + "epoch": 1.0395010395010396, + "grad_norm": 0.7422579526901245, + "learning_rate": 2.3762993762993764e-05, + "loss": 0.065, "step": 3500 }, { - "epoch": 0.26065646814198723, - "grad_norm": 2.209559679031372, - "learning_rate": 2.843606119114808e-05, - "loss": 0.1083, + "epoch": 1.0424710424710424, + "grad_norm": 0.4676852524280548, + "learning_rate": 2.3745173745173746e-05, + "loss": 0.0686, "step": 3510 }, { - "epoch": 0.2613990791623348, - "grad_norm": 3.44124698638916, - "learning_rate": 2.843160552502599e-05, - "loss": 0.0981, + "epoch": 1.0454410454410454, + "grad_norm": 0.7909456491470337, + "learning_rate": 2.3727353727353728e-05, + "loss": 0.0808, "step": 3520 }, { - "epoch": 0.2621416901826823, - "grad_norm": 3.689404249191284, - "learning_rate": 2.8427149858903906e-05, - "loss": 0.0863, + "epoch": 1.0484110484110485, + "grad_norm": 1.0909233093261719, + "learning_rate": 2.370953370953371e-05, + "loss": 0.0786, "step": 3530 }, { - "epoch": 0.26288430120302986, - "grad_norm": 1.4514044523239136, - "learning_rate": 2.8422694192781824e-05, - "loss": 0.0854, + "epoch": 1.0513810513810513, + "grad_norm": 0.9881356358528137, + "learning_rate": 2.3691713691713692e-05, + "loss": 0.0745, "step": 3540 }, { - "epoch": 0.2636269122233774, - "grad_norm": 1.8752799034118652, - "learning_rate": 2.8418238526659736e-05, - "loss": 0.0775, + "epoch": 1.0543510543510544, + "grad_norm": 0.6421000361442566, + "learning_rate": 2.3673893673893675e-05, + "loss": 0.0681, "step": 3550 }, { - "epoch": 0.2643695232437249, - "grad_norm": 2.1504430770874023, - "learning_rate": 2.841378286053765e-05, - "loss": 0.0839, + "epoch": 1.0573210573210574, + "grad_norm": 1.2213356494903564, + "learning_rate": 2.3656073656073657e-05, + "loss": 0.0691, "step": 3560 }, { - "epoch": 0.2651121342640725, - "grad_norm": 3.2270238399505615, - "learning_rate": 2.840932719441557e-05, - "loss": 0.1367, + "epoch": 1.0602910602910602, + "grad_norm": 1.1838406324386597, + "learning_rate": 2.363825363825364e-05, + "loss": 0.0727, "step": 3570 }, { - "epoch": 0.26585474528442005, - "grad_norm": 2.0077528953552246, - "learning_rate": 2.840487152829348e-05, - "loss": 0.0887, + "epoch": 1.0632610632610633, + "grad_norm": 0.8688098192214966, + "learning_rate": 2.362043362043362e-05, + "loss": 0.059, "step": 3580 }, { - "epoch": 0.26659735630476755, - "grad_norm": 1.6168723106384277, - "learning_rate": 2.8400415862171396e-05, - "loss": 0.1324, + "epoch": 1.0662310662310661, + "grad_norm": 0.891708493232727, + "learning_rate": 2.3602613602613603e-05, + "loss": 0.0652, "step": 3590 }, { - "epoch": 0.2673399673251151, - "grad_norm": 1.800391674041748, - "learning_rate": 2.8395960196049307e-05, - "loss": 0.1256, + "epoch": 1.0692010692010692, + "grad_norm": 0.8974260091781616, + "learning_rate": 2.3584793584793586e-05, + "loss": 0.0627, "step": 3600 }, { - "epoch": 0.2680825783454627, - "grad_norm": 1.1540509462356567, - "learning_rate": 2.8391504529927226e-05, - "loss": 0.0888, + "epoch": 1.0721710721710722, + "grad_norm": 0.9412424564361572, + "learning_rate": 2.3566973566973568e-05, + "loss": 0.0773, "step": 3610 }, { - "epoch": 0.2688251893658102, - "grad_norm": 1.3013066053390503, - "learning_rate": 2.838704886380514e-05, - "loss": 0.096, + "epoch": 1.075141075141075, + "grad_norm": 0.8957776427268982, + "learning_rate": 2.354915354915355e-05, + "loss": 0.0771, "step": 3620 }, { - "epoch": 0.26956780038615774, - "grad_norm": 1.9634844064712524, - "learning_rate": 2.8382593197683052e-05, - "loss": 0.0817, + "epoch": 1.078111078111078, + "grad_norm": 0.9389250874519348, + "learning_rate": 2.3531333531333532e-05, + "loss": 0.0631, "step": 3630 }, { - "epoch": 0.27031041140650525, - "grad_norm": 2.515450954437256, - "learning_rate": 2.837813753156097e-05, - "loss": 0.0909, + "epoch": 1.0810810810810811, + "grad_norm": 0.716650664806366, + "learning_rate": 2.3513513513513514e-05, + "loss": 0.0525, "step": 3640 }, { - "epoch": 0.2710530224268528, - "grad_norm": 2.612504482269287, - "learning_rate": 2.8373681865438886e-05, - "loss": 0.1121, + "epoch": 1.084051084051084, + "grad_norm": 0.779859721660614, + "learning_rate": 2.3495693495693496e-05, + "loss": 0.0824, "step": 3650 }, { - "epoch": 0.27179563344720037, - "grad_norm": 1.4512356519699097, - "learning_rate": 2.8369226199316797e-05, - "loss": 0.0939, + "epoch": 1.087021087021087, + "grad_norm": 0.5671702027320862, + "learning_rate": 2.347787347787348e-05, + "loss": 0.0783, "step": 3660 }, { - "epoch": 0.2725382444675479, - "grad_norm": 2.2824881076812744, - "learning_rate": 2.8364770533194712e-05, - "loss": 0.1103, + "epoch": 1.08999108999109, + "grad_norm": 0.8227300643920898, + "learning_rate": 2.346005346005346e-05, + "loss": 0.0463, "step": 3670 }, { - "epoch": 0.27328085548789544, - "grad_norm": 1.5197831392288208, - "learning_rate": 2.8360314867072627e-05, - "loss": 0.1198, + "epoch": 1.092961092961093, + "grad_norm": 0.6845151782035828, + "learning_rate": 2.3442233442233443e-05, + "loss": 0.0614, "step": 3680 }, { - "epoch": 0.274023466508243, - "grad_norm": 1.559735894203186, - "learning_rate": 2.8355859200950542e-05, - "loss": 0.1381, + "epoch": 1.095931095931096, + "grad_norm": 1.5416568517684937, + "learning_rate": 2.3424413424413425e-05, + "loss": 0.0752, "step": 3690 }, { - "epoch": 0.2747660775285905, - "grad_norm": 1.8886692523956299, - "learning_rate": 2.8351403534828457e-05, - "loss": 0.0942, + "epoch": 1.098901098901099, + "grad_norm": 0.8240406513214111, + "learning_rate": 2.3406593406593407e-05, + "loss": 0.0693, "step": 3700 }, { - "epoch": 0.27550868854893806, - "grad_norm": 1.677405834197998, - "learning_rate": 2.8346947868706372e-05, - "loss": 0.0857, + "epoch": 1.1018711018711018, + "grad_norm": 0.713246762752533, + "learning_rate": 2.338877338877339e-05, + "loss": 0.051, "step": 3710 }, { - "epoch": 0.2762512995692856, - "grad_norm": 1.0931998491287231, - "learning_rate": 2.8342492202584287e-05, - "loss": 0.1002, + "epoch": 1.1048411048411049, + "grad_norm": 0.9789752960205078, + "learning_rate": 2.337095337095337e-05, + "loss": 0.0681, "step": 3720 }, { - "epoch": 0.27699391058963313, - "grad_norm": 2.140795946121216, - "learning_rate": 2.8338036536462202e-05, - "loss": 0.1144, + "epoch": 1.107811107811108, + "grad_norm": 1.0538547039031982, + "learning_rate": 2.3353133353133354e-05, + "loss": 0.0622, "step": 3730 }, { - "epoch": 0.2777365216099807, - "grad_norm": 1.8325400352478027, - "learning_rate": 2.8333580870340117e-05, - "loss": 0.0829, + "epoch": 1.1107811107811107, + "grad_norm": 0.9167300462722778, + "learning_rate": 2.3335313335313336e-05, + "loss": 0.0695, "step": 3740 }, { - "epoch": 0.27847913263032825, - "grad_norm": 2.1785285472869873, - "learning_rate": 2.8329125204218032e-05, - "loss": 0.0643, + "epoch": 1.1137511137511138, + "grad_norm": 0.740674614906311, + "learning_rate": 2.3317493317493318e-05, + "loss": 0.0666, "step": 3750 }, { - "epoch": 0.27922174365067576, - "grad_norm": 2.3438045978546143, - "learning_rate": 2.8324669538095947e-05, - "loss": 0.0871, + "epoch": 1.1167211167211166, + "grad_norm": 0.6684421300888062, + "learning_rate": 2.32996732996733e-05, + "loss": 0.0711, "step": 3760 }, { - "epoch": 0.2799643546710233, - "grad_norm": 2.866464853286743, - "learning_rate": 2.832021387197386e-05, - "loss": 0.0718, + "epoch": 1.1196911196911197, + "grad_norm": 0.6935126781463623, + "learning_rate": 2.3281853281853282e-05, + "loss": 0.0653, "step": 3770 }, { - "epoch": 0.2807069656913709, - "grad_norm": 1.4197877645492554, - "learning_rate": 2.8315758205851777e-05, - "loss": 0.1264, + "epoch": 1.1226611226611227, + "grad_norm": 1.0922201871871948, + "learning_rate": 2.3264033264033265e-05, + "loss": 0.0653, "step": 3780 }, { - "epoch": 0.2814495767117184, - "grad_norm": 4.769101142883301, - "learning_rate": 2.831130253972969e-05, - "loss": 0.0845, + "epoch": 1.1256311256311256, + "grad_norm": 0.747627854347229, + "learning_rate": 2.3246213246213247e-05, + "loss": 0.0672, "step": 3790 }, { - "epoch": 0.28219218773206595, - "grad_norm": 1.278130292892456, - "learning_rate": 2.8306846873607604e-05, - "loss": 0.0915, + "epoch": 1.1286011286011286, + "grad_norm": 1.3024978637695312, + "learning_rate": 2.322839322839323e-05, + "loss": 0.0725, "step": 3800 }, { - "epoch": 0.2829347987524135, - "grad_norm": 2.7825405597686768, - "learning_rate": 2.8302391207485522e-05, - "loss": 0.1035, + "epoch": 1.1315711315711316, + "grad_norm": 0.9191585779190063, + "learning_rate": 2.321057321057321e-05, + "loss": 0.0662, "step": 3810 }, { - "epoch": 0.283677409772761, - "grad_norm": 3.6590402126312256, - "learning_rate": 2.8297935541363434e-05, - "loss": 0.0821, + "epoch": 1.1345411345411345, + "grad_norm": 1.0653009414672852, + "learning_rate": 2.3192753192753193e-05, + "loss": 0.0685, "step": 3820 }, { - "epoch": 0.2844200207931086, - "grad_norm": 3.2565736770629883, - "learning_rate": 2.829347987524135e-05, - "loss": 0.097, + "epoch": 1.1375111375111375, + "grad_norm": 1.1122196912765503, + "learning_rate": 2.3174933174933175e-05, + "loss": 0.073, "step": 3830 }, { - "epoch": 0.28516263181345614, - "grad_norm": 1.7409720420837402, - "learning_rate": 2.8289024209119264e-05, - "loss": 0.0929, + "epoch": 1.1404811404811406, + "grad_norm": 0.8093435764312744, + "learning_rate": 2.3157113157113158e-05, + "loss": 0.0709, "step": 3840 }, { - "epoch": 0.28590524283380364, - "grad_norm": 2.9615607261657715, - "learning_rate": 2.828456854299718e-05, - "loss": 0.0771, + "epoch": 1.1434511434511434, + "grad_norm": 0.4847230613231659, + "learning_rate": 2.313929313929314e-05, + "loss": 0.0625, "step": 3850 }, { - "epoch": 0.2866478538541512, - "grad_norm": 2.6329636573791504, - "learning_rate": 2.8280112876875094e-05, - "loss": 0.1215, + "epoch": 1.1464211464211465, + "grad_norm": 0.8691696524620056, + "learning_rate": 2.3121473121473122e-05, + "loss": 0.0558, "step": 3860 }, { - "epoch": 0.28739046487449876, - "grad_norm": 1.5111801624298096, - "learning_rate": 2.827565721075301e-05, - "loss": 0.0735, + "epoch": 1.1493911493911493, + "grad_norm": 1.140037178993225, + "learning_rate": 2.3103653103653104e-05, + "loss": 0.0704, "step": 3870 }, { - "epoch": 0.28813307589484627, - "grad_norm": 2.780776262283325, - "learning_rate": 2.8271201544630924e-05, - "loss": 0.0969, + "epoch": 1.1523611523611523, + "grad_norm": 0.8088730573654175, + "learning_rate": 2.3085833085833086e-05, + "loss": 0.0666, "step": 3880 }, { - "epoch": 0.28887568691519383, - "grad_norm": 1.8121346235275269, - "learning_rate": 2.826674587850884e-05, - "loss": 0.1015, + "epoch": 1.1553311553311554, + "grad_norm": 0.6530190110206604, + "learning_rate": 2.306801306801307e-05, + "loss": 0.0658, "step": 3890 }, { - "epoch": 0.2896182979355414, - "grad_norm": 1.4083514213562012, - "learning_rate": 2.826229021238675e-05, - "loss": 0.066, + "epoch": 1.1583011583011582, + "grad_norm": 0.7550728917121887, + "learning_rate": 2.305019305019305e-05, + "loss": 0.0715, "step": 3900 }, { - "epoch": 0.2903609089558889, - "grad_norm": 2.5285115242004395, - "learning_rate": 2.825783454626467e-05, - "loss": 0.0923, + "epoch": 1.1612711612711613, + "grad_norm": 0.6546505689620972, + "learning_rate": 2.3032373032373033e-05, + "loss": 0.0682, "step": 3910 }, { - "epoch": 0.29110351997623646, - "grad_norm": 0.7836059927940369, - "learning_rate": 2.8253378880142584e-05, - "loss": 0.0506, + "epoch": 1.1642411642411643, + "grad_norm": 1.3673691749572754, + "learning_rate": 2.3014553014553015e-05, + "loss": 0.0628, "step": 3920 }, { - "epoch": 0.29184613099658396, - "grad_norm": 1.5895808935165405, - "learning_rate": 2.8248923214020495e-05, - "loss": 0.0886, + "epoch": 1.1672111672111671, + "grad_norm": 1.3295186758041382, + "learning_rate": 2.2996732996732997e-05, + "loss": 0.0632, "step": 3930 }, { - "epoch": 0.2925887420169315, - "grad_norm": 1.657165288925171, - "learning_rate": 2.824446754789841e-05, - "loss": 0.0845, + "epoch": 1.1701811701811702, + "grad_norm": 0.8255389332771301, + "learning_rate": 2.297891297891298e-05, + "loss": 0.0598, "step": 3940 }, { - "epoch": 0.2933313530372791, - "grad_norm": 1.5813052654266357, - "learning_rate": 2.824001188177633e-05, - "loss": 0.1015, + "epoch": 1.1731511731511732, + "grad_norm": 0.7321466207504272, + "learning_rate": 2.2961092961092965e-05, + "loss": 0.0521, "step": 3950 }, { - "epoch": 0.2940739640576266, - "grad_norm": 2.2893810272216797, - "learning_rate": 2.823555621565424e-05, - "loss": 0.1201, + "epoch": 1.176121176121176, + "grad_norm": 1.0865906476974487, + "learning_rate": 2.2943272943272944e-05, + "loss": 0.0705, "step": 3960 }, { - "epoch": 0.29481657507797415, - "grad_norm": 3.8998055458068848, - "learning_rate": 2.8231100549532155e-05, - "loss": 0.1317, + "epoch": 1.179091179091179, + "grad_norm": 0.691181480884552, + "learning_rate": 2.2925452925452926e-05, + "loss": 0.0805, "step": 3970 }, { - "epoch": 0.2955591860983217, - "grad_norm": 1.5163902044296265, - "learning_rate": 2.8226644883410074e-05, - "loss": 0.1226, + "epoch": 1.1820611820611822, + "grad_norm": 0.6251980662345886, + "learning_rate": 2.2907632907632908e-05, + "loss": 0.061, "step": 3980 }, { - "epoch": 0.2963017971186692, - "grad_norm": 2.5356316566467285, - "learning_rate": 2.8222189217287985e-05, - "loss": 0.1257, + "epoch": 1.185031185031185, + "grad_norm": 0.7137186527252197, + "learning_rate": 2.288981288981289e-05, + "loss": 0.0657, "step": 3990 }, { - "epoch": 0.2970444081390168, - "grad_norm": 0.5978565216064453, - "learning_rate": 2.82177335511659e-05, - "loss": 0.0741, + "epoch": 1.188001188001188, + "grad_norm": 0.5930067896842957, + "learning_rate": 2.2871992871992872e-05, + "loss": 0.065, "step": 4000 }, { - "epoch": 0.29778701915936434, - "grad_norm": 3.2044990062713623, - "learning_rate": 2.8213277885043812e-05, - "loss": 0.0927, + "epoch": 1.190971190971191, + "grad_norm": 0.8055261373519897, + "learning_rate": 2.2854172854172855e-05, + "loss": 0.0605, "step": 4010 }, { - "epoch": 0.29852963017971185, - "grad_norm": 2.5349199771881104, - "learning_rate": 2.820882221892173e-05, - "loss": 0.1082, + "epoch": 1.193941193941194, + "grad_norm": 0.8491489887237549, + "learning_rate": 2.283635283635284e-05, + "loss": 0.0768, "step": 4020 }, { - "epoch": 0.2992722412000594, - "grad_norm": 2.297657012939453, - "learning_rate": 2.8204366552799645e-05, - "loss": 0.1034, + "epoch": 1.196911196911197, + "grad_norm": 0.5159242749214172, + "learning_rate": 2.281853281853282e-05, + "loss": 0.0577, "step": 4030 }, { - "epoch": 0.30001485222040697, - "grad_norm": 2.956207036972046, - "learning_rate": 2.8199910886677557e-05, - "loss": 0.0724, + "epoch": 1.1998811998811998, + "grad_norm": 1.0682775974273682, + "learning_rate": 2.28007128007128e-05, + "loss": 0.0669, "step": 4040 }, { - "epoch": 0.3007574632407545, - "grad_norm": 2.382066488265991, - "learning_rate": 2.8195455220555475e-05, - "loss": 0.0734, + "epoch": 1.2028512028512028, + "grad_norm": 0.7418850660324097, + "learning_rate": 2.2782892782892783e-05, + "loss": 0.064, "step": 4050 }, { - "epoch": 0.30150007426110204, - "grad_norm": 2.7788658142089844, - "learning_rate": 2.819099955443339e-05, - "loss": 0.1129, + "epoch": 1.2058212058212059, + "grad_norm": 1.3865784406661987, + "learning_rate": 2.2765072765072765e-05, + "loss": 0.056, "step": 4060 }, { - "epoch": 0.3022426852814496, - "grad_norm": 1.6891690492630005, - "learning_rate": 2.8186543888311302e-05, - "loss": 0.1168, + "epoch": 1.2087912087912087, + "grad_norm": 1.0742945671081543, + "learning_rate": 2.2747252747252748e-05, + "loss": 0.0495, "step": 4070 }, { - "epoch": 0.3029852963017971, - "grad_norm": 2.3250083923339844, - "learning_rate": 2.8182088222189217e-05, - "loss": 0.0998, + "epoch": 1.2117612117612118, + "grad_norm": 1.8992480039596558, + "learning_rate": 2.272943272943273e-05, + "loss": 0.0603, "step": 4080 }, { - "epoch": 0.30372790732214466, - "grad_norm": 2.700108766555786, - "learning_rate": 2.8177632556067135e-05, - "loss": 0.1129, + "epoch": 1.2147312147312148, + "grad_norm": 0.701547384262085, + "learning_rate": 2.2711612711612715e-05, + "loss": 0.0733, "step": 4090 }, { - "epoch": 0.3044705183424922, - "grad_norm": 2.239126443862915, - "learning_rate": 2.8173176889945047e-05, - "loss": 0.1041, + "epoch": 1.2177012177012176, + "grad_norm": 0.7614421844482422, + "learning_rate": 2.2693792693792694e-05, + "loss": 0.0634, "step": 4100 }, { - "epoch": 0.30521312936283973, - "grad_norm": 2.046869993209839, - "learning_rate": 2.8168721223822962e-05, - "loss": 0.1011, + "epoch": 1.2206712206712207, + "grad_norm": 0.6779104471206665, + "learning_rate": 2.2675972675972676e-05, + "loss": 0.0631, "step": 4110 }, { - "epoch": 0.3059557403831873, - "grad_norm": 2.6533050537109375, - "learning_rate": 2.8164265557700877e-05, - "loss": 0.0722, + "epoch": 1.2236412236412235, + "grad_norm": 0.7199757695198059, + "learning_rate": 2.265815265815266e-05, + "loss": 0.0628, "step": 4120 }, { - "epoch": 0.30669835140353485, - "grad_norm": 1.3280346393585205, - "learning_rate": 2.8159809891578792e-05, - "loss": 0.0925, + "epoch": 1.2266112266112266, + "grad_norm": 0.8466992378234863, + "learning_rate": 2.264033264033264e-05, + "loss": 0.0784, "step": 4130 }, { - "epoch": 0.30744096242388236, - "grad_norm": 1.894659161567688, - "learning_rate": 2.8155354225456707e-05, - "loss": 0.0688, + "epoch": 1.2295812295812296, + "grad_norm": 0.5136345028877258, + "learning_rate": 2.2622512622512623e-05, + "loss": 0.0866, "step": 4140 }, { - "epoch": 0.3081835734442299, - "grad_norm": 1.138370394706726, - "learning_rate": 2.8150898559334622e-05, - "loss": 0.085, + "epoch": 1.2325512325512324, + "grad_norm": 0.7699841260910034, + "learning_rate": 2.2604692604692605e-05, + "loss": 0.0607, "step": 4150 }, { - "epoch": 0.3089261844645775, - "grad_norm": 2.353771686553955, - "learning_rate": 2.8146442893212537e-05, - "loss": 0.1051, + "epoch": 1.2355212355212355, + "grad_norm": 0.61440509557724, + "learning_rate": 2.258687258687259e-05, + "loss": 0.0721, "step": 4160 }, { - "epoch": 0.309668795484925, - "grad_norm": 1.1877645254135132, - "learning_rate": 2.8141987227090452e-05, - "loss": 0.1023, + "epoch": 1.2384912384912385, + "grad_norm": 0.5147203803062439, + "learning_rate": 2.256905256905257e-05, + "loss": 0.0615, "step": 4170 }, { - "epoch": 0.31041140650527255, - "grad_norm": 1.905053973197937, - "learning_rate": 2.8137531560968363e-05, - "loss": 0.1233, + "epoch": 1.2414612414612414, + "grad_norm": 1.0179754495620728, + "learning_rate": 2.255123255123255e-05, + "loss": 0.0669, "step": 4180 }, { - "epoch": 0.3111540175256201, - "grad_norm": 2.760115385055542, - "learning_rate": 2.813307589484628e-05, - "loss": 0.1237, + "epoch": 1.2444312444312444, + "grad_norm": 1.2487351894378662, + "learning_rate": 2.2533412533412534e-05, + "loss": 0.0705, "step": 4190 }, { - "epoch": 0.3118966285459676, - "grad_norm": 2.511549711227417, - "learning_rate": 2.8128620228724193e-05, - "loss": 0.0922, + "epoch": 1.2474012474012475, + "grad_norm": 0.8838121294975281, + "learning_rate": 2.2515592515592516e-05, + "loss": 0.0582, "step": 4200 }, { - "epoch": 0.3126392395663152, - "grad_norm": 1.948473572731018, - "learning_rate": 2.8124164562602108e-05, - "loss": 0.0985, + "epoch": 1.2503712503712503, + "grad_norm": 0.5740695595741272, + "learning_rate": 2.2497772497772498e-05, + "loss": 0.0704, "step": 4210 }, { - "epoch": 0.3133818505866627, - "grad_norm": 3.190645456314087, - "learning_rate": 2.8119708896480027e-05, - "loss": 0.106, + "epoch": 1.2533412533412533, + "grad_norm": 1.0215144157409668, + "learning_rate": 2.247995247995248e-05, + "loss": 0.0672, "step": 4220 }, { - "epoch": 0.31412446160701024, - "grad_norm": 2.2379205226898193, - "learning_rate": 2.8115253230357938e-05, - "loss": 0.0896, + "epoch": 1.2563112563112564, + "grad_norm": 0.5340459942817688, + "learning_rate": 2.2462132462132466e-05, + "loss": 0.0637, "step": 4230 }, { - "epoch": 0.3148670726273578, - "grad_norm": 1.1914069652557373, - "learning_rate": 2.8110797564235853e-05, - "loss": 0.0659, + "epoch": 1.2592812592812592, + "grad_norm": 1.070460557937622, + "learning_rate": 2.2444312444312444e-05, + "loss": 0.0545, "step": 4240 }, { - "epoch": 0.3156096836477053, - "grad_norm": 1.5502461194992065, - "learning_rate": 2.8106341898113768e-05, - "loss": 0.0995, + "epoch": 1.2622512622512623, + "grad_norm": 0.8050958514213562, + "learning_rate": 2.2426492426492427e-05, + "loss": 0.0732, "step": 4250 }, { - "epoch": 0.31635229466805287, - "grad_norm": 2.563169240951538, - "learning_rate": 2.8101886231991683e-05, - "loss": 0.0967, + "epoch": 1.2652212652212653, + "grad_norm": 1.1751881837844849, + "learning_rate": 2.240867240867241e-05, + "loss": 0.0643, "step": 4260 }, { - "epoch": 0.31709490568840043, - "grad_norm": 4.562102317810059, - "learning_rate": 2.8097430565869598e-05, - "loss": 0.107, + "epoch": 1.2681912681912682, + "grad_norm": 0.7636763453483582, + "learning_rate": 2.239085239085239e-05, + "loss": 0.0653, "step": 4270 }, { - "epoch": 0.31783751670874794, - "grad_norm": 0.7943652868270874, - "learning_rate": 2.8092974899747513e-05, - "loss": 0.0811, + "epoch": 1.2711612711612712, + "grad_norm": 0.9791406393051147, + "learning_rate": 2.2373032373032373e-05, + "loss": 0.0584, "step": 4280 }, { - "epoch": 0.3185801277290955, - "grad_norm": 2.5280022621154785, - "learning_rate": 2.8088519233625428e-05, - "loss": 0.0628, + "epoch": 1.2741312741312742, + "grad_norm": 0.7061544060707092, + "learning_rate": 2.2355212355212355e-05, + "loss": 0.0702, "step": 4290 }, { - "epoch": 0.31932273874944306, - "grad_norm": 1.1994893550872803, - "learning_rate": 2.8084063567503343e-05, - "loss": 0.0747, + "epoch": 1.277101277101277, + "grad_norm": 1.0918234586715698, + "learning_rate": 2.233739233739234e-05, + "loss": 0.0597, "step": 4300 }, { - "epoch": 0.32006534976979056, - "grad_norm": 2.5964338779449463, - "learning_rate": 2.8079607901381255e-05, - "loss": 0.0978, + "epoch": 1.2800712800712801, + "grad_norm": 1.0463823080062866, + "learning_rate": 2.231957231957232e-05, + "loss": 0.0571, "step": 4310 }, { - "epoch": 0.3208079607901381, - "grad_norm": 1.0539716482162476, - "learning_rate": 2.8075152235259173e-05, - "loss": 0.1243, + "epoch": 1.2830412830412832, + "grad_norm": 0.8148181438446045, + "learning_rate": 2.2301752301752302e-05, + "loss": 0.0825, "step": 4320 }, { - "epoch": 0.3215505718104857, - "grad_norm": 3.5578460693359375, - "learning_rate": 2.8070696569137088e-05, - "loss": 0.1073, + "epoch": 1.286011286011286, + "grad_norm": 0.9247533082962036, + "learning_rate": 2.2283932283932284e-05, + "loss": 0.0643, "step": 4330 }, { - "epoch": 0.3222931828308332, - "grad_norm": 3.5634069442749023, - "learning_rate": 2.8066240903015e-05, - "loss": 0.1102, + "epoch": 1.288981288981289, + "grad_norm": 0.8927013278007507, + "learning_rate": 2.2266112266112266e-05, + "loss": 0.0695, "step": 4340 }, { - "epoch": 0.32303579385118075, - "grad_norm": 1.1170202493667603, - "learning_rate": 2.8061785236892915e-05, - "loss": 0.0718, + "epoch": 1.2919512919512919, + "grad_norm": 1.1298048496246338, + "learning_rate": 2.2248292248292248e-05, + "loss": 0.0601, "step": 4350 }, { - "epoch": 0.3237784048715283, - "grad_norm": 2.6861186027526855, - "learning_rate": 2.8057329570770833e-05, - "loss": 0.0986, + "epoch": 1.294921294921295, + "grad_norm": 0.40580281615257263, + "learning_rate": 2.223047223047223e-05, + "loss": 0.0742, "step": 4360 }, { - "epoch": 0.3245210158918758, - "grad_norm": 2.0378482341766357, - "learning_rate": 2.8052873904648745e-05, - "loss": 0.1087, + "epoch": 1.2978912978912978, + "grad_norm": 0.471123069524765, + "learning_rate": 2.2212652212652216e-05, + "loss": 0.0638, "step": 4370 }, { - "epoch": 0.3252636269122234, - "grad_norm": 2.456540822982788, - "learning_rate": 2.804841823852666e-05, - "loss": 0.0763, + "epoch": 1.3008613008613008, + "grad_norm": 1.53095543384552, + "learning_rate": 2.2194832194832195e-05, + "loss": 0.0695, "step": 4380 }, { - "epoch": 0.32600623793257094, - "grad_norm": 1.6984671354293823, - "learning_rate": 2.8043962572404578e-05, - "loss": 0.1253, + "epoch": 1.3038313038313039, + "grad_norm": 0.8293925523757935, + "learning_rate": 2.2177012177012177e-05, + "loss": 0.0526, "step": 4390 }, { - "epoch": 0.32674884895291845, - "grad_norm": 3.025683641433716, - "learning_rate": 2.803950690628249e-05, - "loss": 0.1072, + "epoch": 1.3068013068013067, + "grad_norm": 0.66612708568573, + "learning_rate": 2.215919215919216e-05, + "loss": 0.0776, "step": 4400 }, { - "epoch": 0.327491459973266, - "grad_norm": 2.3869524002075195, - "learning_rate": 2.8035051240160405e-05, - "loss": 0.0733, + "epoch": 1.3097713097713097, + "grad_norm": 0.9548154473304749, + "learning_rate": 2.214137214137214e-05, + "loss": 0.0729, "step": 4410 }, { - "epoch": 0.32823407099361357, - "grad_norm": 1.5265862941741943, - "learning_rate": 2.8030595574038316e-05, - "loss": 0.0541, + "epoch": 1.3127413127413128, + "grad_norm": 0.5580644011497498, + "learning_rate": 2.2123552123552123e-05, + "loss": 0.0831, "step": 4420 }, { - "epoch": 0.3289766820139611, - "grad_norm": 2.0215351581573486, - "learning_rate": 2.8026139907916235e-05, - "loss": 0.0865, + "epoch": 1.3157113157113156, + "grad_norm": 0.5851168036460876, + "learning_rate": 2.2105732105732106e-05, + "loss": 0.0569, "step": 4430 }, { - "epoch": 0.32971929303430864, - "grad_norm": 1.381551742553711, - "learning_rate": 2.802168424179415e-05, - "loss": 0.1084, + "epoch": 1.3186813186813187, + "grad_norm": 0.4882819652557373, + "learning_rate": 2.208791208791209e-05, + "loss": 0.0692, "step": 4440 }, { - "epoch": 0.3304619040546562, - "grad_norm": 1.7766149044036865, - "learning_rate": 2.801722857567206e-05, - "loss": 0.0987, + "epoch": 1.3216513216513217, + "grad_norm": 0.44905969500541687, + "learning_rate": 2.207009207009207e-05, + "loss": 0.0577, "step": 4450 }, { - "epoch": 0.3312045150750037, - "grad_norm": 2.317441701889038, - "learning_rate": 2.801277290954998e-05, - "loss": 0.0809, + "epoch": 1.3246213246213245, + "grad_norm": 0.8258479237556458, + "learning_rate": 2.2052272052272052e-05, + "loss": 0.0516, "step": 4460 }, { - "epoch": 0.33194712609535126, - "grad_norm": 2.322162389755249, - "learning_rate": 2.8008317243427895e-05, - "loss": 0.114, + "epoch": 1.3275913275913276, + "grad_norm": 0.8680855631828308, + "learning_rate": 2.2034452034452034e-05, + "loss": 0.0445, "step": 4470 }, { - "epoch": 0.33268973711569877, - "grad_norm": 2.353233575820923, - "learning_rate": 2.8003861577305806e-05, - "loss": 0.098, + "epoch": 1.3305613305613306, + "grad_norm": 0.6597207188606262, + "learning_rate": 2.2016632016632017e-05, + "loss": 0.0558, "step": 4480 }, { - "epoch": 0.33343234813604633, - "grad_norm": 0.9074286818504333, - "learning_rate": 2.7999405911183725e-05, - "loss": 0.1093, + "epoch": 1.3335313335313335, + "grad_norm": 1.0117040872573853, + "learning_rate": 2.1998811998812e-05, + "loss": 0.0676, "step": 4490 }, { - "epoch": 0.3341749591563939, - "grad_norm": 4.220743656158447, - "learning_rate": 2.799495024506164e-05, - "loss": 0.0983, + "epoch": 1.3365013365013365, + "grad_norm": 0.6703979969024658, + "learning_rate": 2.198099198099198e-05, + "loss": 0.0517, "step": 4500 }, { - "epoch": 0.3349175701767414, - "grad_norm": 2.652031898498535, - "learning_rate": 2.799049457893955e-05, - "loss": 0.0873, + "epoch": 1.3394713394713396, + "grad_norm": 1.2186622619628906, + "learning_rate": 2.1963171963171966e-05, + "loss": 0.0653, "step": 4510 }, { - "epoch": 0.33566018119708896, - "grad_norm": 1.0324969291687012, - "learning_rate": 2.7986038912817466e-05, - "loss": 0.0755, + "epoch": 1.3424413424413424, + "grad_norm": 0.6504166722297668, + "learning_rate": 2.1945351945351945e-05, + "loss": 0.0737, "step": 4520 }, { - "epoch": 0.3364027922174365, - "grad_norm": 0.8681501746177673, - "learning_rate": 2.798158324669538e-05, - "loss": 0.1043, + "epoch": 1.3454113454113454, + "grad_norm": 0.8014166355133057, + "learning_rate": 2.1927531927531927e-05, + "loss": 0.0667, "step": 4530 }, { - "epoch": 0.337145403237784, - "grad_norm": 1.413583755493164, - "learning_rate": 2.7977127580573296e-05, - "loss": 0.0682, + "epoch": 1.3483813483813485, + "grad_norm": 0.6877809166908264, + "learning_rate": 2.190971190971191e-05, + "loss": 0.0683, "step": 4540 }, { - "epoch": 0.3378880142581316, - "grad_norm": 1.2596721649169922, - "learning_rate": 2.797267191445121e-05, - "loss": 0.1021, + "epoch": 1.3513513513513513, + "grad_norm": 1.1891028881072998, + "learning_rate": 2.1891891891891892e-05, + "loss": 0.0659, "step": 4550 }, { - "epoch": 0.33863062527847915, - "grad_norm": 2.051772117614746, - "learning_rate": 2.7968216248329126e-05, - "loss": 0.0646, + "epoch": 1.3543213543213544, + "grad_norm": 1.254156470298767, + "learning_rate": 2.1874071874071874e-05, + "loss": 0.065, "step": 4560 }, { - "epoch": 0.33937323629882665, - "grad_norm": 2.39245343208313, - "learning_rate": 2.796376058220704e-05, - "loss": 0.0913, + "epoch": 1.3572913572913574, + "grad_norm": 1.4208029508590698, + "learning_rate": 2.1856251856251856e-05, + "loss": 0.0733, "step": 4570 }, { - "epoch": 0.3401158473191742, - "grad_norm": 1.1950043439865112, - "learning_rate": 2.7959304916084956e-05, - "loss": 0.0772, + "epoch": 1.3602613602613602, + "grad_norm": 1.2454571723937988, + "learning_rate": 2.183843183843184e-05, + "loss": 0.0639, "step": 4580 }, { - "epoch": 0.3408584583395218, - "grad_norm": 1.7713611125946045, - "learning_rate": 2.7954849249962868e-05, - "loss": 0.1027, + "epoch": 1.3632313632313633, + "grad_norm": 0.7917842864990234, + "learning_rate": 2.1820611820611824e-05, + "loss": 0.0719, "step": 4590 }, { - "epoch": 0.3416010693598693, - "grad_norm": 1.3670064210891724, - "learning_rate": 2.7950393583840786e-05, - "loss": 0.0623, + "epoch": 1.3662013662013661, + "grad_norm": 0.7804074287414551, + "learning_rate": 2.1802791802791803e-05, + "loss": 0.0667, "step": 4600 }, { - "epoch": 0.34234368038021684, - "grad_norm": 1.9665565490722656, - "learning_rate": 2.7945937917718698e-05, - "loss": 0.0808, + "epoch": 1.3691713691713692, + "grad_norm": 0.9394906163215637, + "learning_rate": 2.1784971784971785e-05, + "loss": 0.0772, "step": 4610 }, { - "epoch": 0.3430862914005644, - "grad_norm": 3.5627613067626953, - "learning_rate": 2.7941482251596613e-05, - "loss": 0.0861, + "epoch": 1.3721413721413722, + "grad_norm": 0.9124456644058228, + "learning_rate": 2.1767151767151767e-05, + "loss": 0.0641, "step": 4620 }, { - "epoch": 0.3438289024209119, - "grad_norm": 1.8066272735595703, - "learning_rate": 2.793702658547453e-05, - "loss": 0.0831, + "epoch": 1.375111375111375, + "grad_norm": 0.8035851716995239, + "learning_rate": 2.174933174933175e-05, + "loss": 0.0537, "step": 4630 }, { - "epoch": 0.34457151344125947, - "grad_norm": 2.1542608737945557, - "learning_rate": 2.7932570919352443e-05, - "loss": 0.1111, + "epoch": 1.378081378081378, + "grad_norm": 0.603728711605072, + "learning_rate": 2.173151173151173e-05, + "loss": 0.0671, "step": 4640 }, { - "epoch": 0.34531412446160703, - "grad_norm": 2.243263006210327, - "learning_rate": 2.7928115253230358e-05, - "loss": 0.0756, + "epoch": 1.381051381051381, + "grad_norm": 0.5484776496887207, + "learning_rate": 2.1713691713691717e-05, + "loss": 0.0641, "step": 4650 }, { - "epoch": 0.34605673548195454, - "grad_norm": 1.6739652156829834, - "learning_rate": 2.7923659587108273e-05, - "loss": 0.1328, + "epoch": 1.384021384021384, + "grad_norm": 1.0918580293655396, + "learning_rate": 2.16958716958717e-05, + "loss": 0.0701, "step": 4660 }, { - "epoch": 0.3467993465023021, - "grad_norm": 2.321486711502075, - "learning_rate": 2.7919203920986188e-05, - "loss": 0.0903, + "epoch": 1.386991386991387, + "grad_norm": 0.7597218751907349, + "learning_rate": 2.1678051678051678e-05, + "loss": 0.0612, "step": 4670 }, { - "epoch": 0.34754195752264966, - "grad_norm": 2.773947238922119, - "learning_rate": 2.7914748254864103e-05, - "loss": 0.0967, + "epoch": 1.3899613899613898, + "grad_norm": 1.020501971244812, + "learning_rate": 2.166023166023166e-05, + "loss": 0.0683, "step": 4680 }, { - "epoch": 0.34828456854299716, - "grad_norm": 1.9256445169448853, - "learning_rate": 2.7910292588742018e-05, - "loss": 0.1307, + "epoch": 1.392931392931393, + "grad_norm": 0.3588350713253021, + "learning_rate": 2.1642411642411642e-05, + "loss": 0.0578, "step": 4690 }, { - "epoch": 0.3490271795633447, - "grad_norm": 2.0387189388275146, - "learning_rate": 2.7905836922619933e-05, - "loss": 0.0758, + "epoch": 1.395901395901396, + "grad_norm": 0.6665700674057007, + "learning_rate": 2.1624591624591624e-05, + "loss": 0.0586, "step": 4700 }, { - "epoch": 0.3497697905836923, - "grad_norm": 0.6718337535858154, - "learning_rate": 2.7901381256497848e-05, - "loss": 0.0923, + "epoch": 1.3988713988713988, + "grad_norm": 0.5543581247329712, + "learning_rate": 2.1606771606771606e-05, + "loss": 0.0572, "step": 4710 }, { - "epoch": 0.3505124016040398, - "grad_norm": 2.4144012928009033, - "learning_rate": 2.789692559037576e-05, - "loss": 0.0815, + "epoch": 1.4018414018414018, + "grad_norm": 0.8856662511825562, + "learning_rate": 2.1588951588951592e-05, + "loss": 0.0647, "step": 4720 }, { - "epoch": 0.35125501262438735, - "grad_norm": 0.7492033839225769, - "learning_rate": 2.7892469924253678e-05, - "loss": 0.1039, + "epoch": 1.4048114048114049, + "grad_norm": 0.7327395081520081, + "learning_rate": 2.1571131571131574e-05, + "loss": 0.055, "step": 4730 }, { - "epoch": 0.3519976236447349, - "grad_norm": 3.2652149200439453, - "learning_rate": 2.7888014258131593e-05, - "loss": 0.1027, + "epoch": 1.4077814077814077, + "grad_norm": 1.0437580347061157, + "learning_rate": 2.1553311553311553e-05, + "loss": 0.052, "step": 4740 }, { - "epoch": 0.3527402346650824, - "grad_norm": 1.8765047788619995, - "learning_rate": 2.7883558592009504e-05, - "loss": 0.1082, + "epoch": 1.4107514107514108, + "grad_norm": 0.6243680119514465, + "learning_rate": 2.1535491535491535e-05, + "loss": 0.0606, "step": 4750 }, { - "epoch": 0.35348284568543, - "grad_norm": 2.7471463680267334, - "learning_rate": 2.787910292588742e-05, - "loss": 0.0829, + "epoch": 1.4137214137214138, + "grad_norm": 0.8211525678634644, + "learning_rate": 2.1517671517671517e-05, + "loss": 0.0586, "step": 4760 }, { - "epoch": 0.3542254567057775, - "grad_norm": 4.803821563720703, - "learning_rate": 2.7874647259765338e-05, - "loss": 0.0897, + "epoch": 1.4166914166914166, + "grad_norm": 0.8621878027915955, + "learning_rate": 2.14998514998515e-05, + "loss": 0.0732, "step": 4770 }, { - "epoch": 0.35496806772612505, - "grad_norm": 1.495339035987854, - "learning_rate": 2.787019159364325e-05, - "loss": 0.0898, + "epoch": 1.4196614196614197, + "grad_norm": 0.9054310321807861, + "learning_rate": 2.148203148203148e-05, + "loss": 0.0548, "step": 4780 }, { - "epoch": 0.3557106787464726, - "grad_norm": 2.4038844108581543, - "learning_rate": 2.7865735927521164e-05, - "loss": 0.0976, + "epoch": 1.4226314226314227, + "grad_norm": 1.122381567955017, + "learning_rate": 2.1464211464211467e-05, + "loss": 0.0588, "step": 4790 }, { - "epoch": 0.3564532897668201, - "grad_norm": 1.810927152633667, - "learning_rate": 2.7861280261399083e-05, - "loss": 0.0859, + "epoch": 1.4256014256014256, + "grad_norm": 1.5067578554153442, + "learning_rate": 2.144639144639145e-05, + "loss": 0.0592, "step": 4800 }, { - "epoch": 0.3571959007871677, - "grad_norm": 3.185044527053833, - "learning_rate": 2.7856824595276994e-05, - "loss": 0.0832, + "epoch": 1.4285714285714286, + "grad_norm": 0.9946634769439697, + "learning_rate": 2.1428571428571428e-05, + "loss": 0.0588, "step": 4810 }, { - "epoch": 0.35793851180751524, - "grad_norm": 4.21889066696167, - "learning_rate": 2.785236892915491e-05, - "loss": 0.0992, + "epoch": 1.4315414315414317, + "grad_norm": 0.7225719094276428, + "learning_rate": 2.141075141075141e-05, + "loss": 0.0669, "step": 4820 }, { - "epoch": 0.35868112282786274, - "grad_norm": 1.788333773612976, - "learning_rate": 2.784791326303282e-05, - "loss": 0.0538, + "epoch": 1.4345114345114345, + "grad_norm": 0.7336903810501099, + "learning_rate": 2.1392931392931392e-05, + "loss": 0.0707, "step": 4830 }, { - "epoch": 0.3594237338482103, - "grad_norm": 3.176811933517456, - "learning_rate": 2.784345759691074e-05, - "loss": 0.107, + "epoch": 1.4374814374814375, + "grad_norm": 1.2703900337219238, + "learning_rate": 2.1375111375111375e-05, + "loss": 0.0666, "step": 4840 }, { - "epoch": 0.36016634486855786, - "grad_norm": 3.0961802005767822, - "learning_rate": 2.7839001930788654e-05, - "loss": 0.0717, + "epoch": 1.4404514404514406, + "grad_norm": 1.0433528423309326, + "learning_rate": 2.1357291357291357e-05, + "loss": 0.0754, "step": 4850 }, { - "epoch": 0.36090895588890537, - "grad_norm": 1.6400991678237915, - "learning_rate": 2.7834546264666566e-05, - "loss": 0.0998, + "epoch": 1.4434214434214434, + "grad_norm": 0.6614531874656677, + "learning_rate": 2.1339471339471342e-05, + "loss": 0.0685, "step": 4860 }, { - "epoch": 0.36165156690925293, - "grad_norm": 1.0916283130645752, - "learning_rate": 2.7830090598544484e-05, - "loss": 0.0795, + "epoch": 1.4463914463914465, + "grad_norm": 0.7256604433059692, + "learning_rate": 2.1321651321651325e-05, + "loss": 0.0488, "step": 4870 }, { - "epoch": 0.3623941779296005, - "grad_norm": 0.8684899210929871, - "learning_rate": 2.78256349324224e-05, - "loss": 0.1083, + "epoch": 1.4493614493614493, + "grad_norm": 0.781690776348114, + "learning_rate": 2.1303831303831303e-05, + "loss": 0.0599, "step": 4880 }, { - "epoch": 0.363136788949948, - "grad_norm": 6.465219497680664, - "learning_rate": 2.782117926630031e-05, - "loss": 0.1498, + "epoch": 1.4523314523314523, + "grad_norm": 0.5681362748146057, + "learning_rate": 2.1286011286011286e-05, + "loss": 0.0654, "step": 4890 }, { - "epoch": 0.36387939997029556, - "grad_norm": 1.2663229703903198, - "learning_rate": 2.781672360017823e-05, - "loss": 0.0654, + "epoch": 1.4553014553014554, + "grad_norm": 0.6243995428085327, + "learning_rate": 2.1268191268191268e-05, + "loss": 0.0695, "step": 4900 }, { - "epoch": 0.3646220109906431, - "grad_norm": 3.739539861679077, - "learning_rate": 2.7812267934056144e-05, - "loss": 0.1353, + "epoch": 1.4582714582714582, + "grad_norm": 0.7592119574546814, + "learning_rate": 2.125037125037125e-05, + "loss": 0.0562, "step": 4910 }, { - "epoch": 0.3653646220109906, - "grad_norm": 3.384850025177002, - "learning_rate": 2.7807812267934056e-05, - "loss": 0.111, + "epoch": 1.4612414612414613, + "grad_norm": 0.740118682384491, + "learning_rate": 2.1232551232551232e-05, + "loss": 0.0617, "step": 4920 }, { - "epoch": 0.3661072330313382, - "grad_norm": 2.7936530113220215, - "learning_rate": 2.780335660181197e-05, - "loss": 0.1048, + "epoch": 1.464211464211464, + "grad_norm": 0.8962658643722534, + "learning_rate": 2.1214731214731218e-05, + "loss": 0.0657, "step": 4930 }, { - "epoch": 0.36684984405168575, - "grad_norm": 1.8607102632522583, - "learning_rate": 2.7798900935689886e-05, - "loss": 0.0717, + "epoch": 1.4671814671814671, + "grad_norm": 0.6510963439941406, + "learning_rate": 2.11969111969112e-05, + "loss": 0.0639, "step": 4940 }, { - "epoch": 0.36759245507203325, - "grad_norm": 2.1067261695861816, - "learning_rate": 2.77944452695678e-05, - "loss": 0.0881, + "epoch": 1.4701514701514702, + "grad_norm": 1.0592265129089355, + "learning_rate": 2.117909117909118e-05, + "loss": 0.0644, "step": 4950 }, { - "epoch": 0.3683350660923808, - "grad_norm": 1.7310969829559326, - "learning_rate": 2.7789989603445716e-05, - "loss": 0.0873, + "epoch": 1.473121473121473, + "grad_norm": 0.9842652678489685, + "learning_rate": 2.116127116127116e-05, + "loss": 0.0573, "step": 4960 }, { - "epoch": 0.3690776771127284, - "grad_norm": 1.5683966875076294, - "learning_rate": 2.778553393732363e-05, - "loss": 0.1008, + "epoch": 1.476091476091476, + "grad_norm": 0.7804544568061829, + "learning_rate": 2.1143451143451143e-05, + "loss": 0.0605, "step": 4970 }, { - "epoch": 0.3698202881330759, - "grad_norm": 3.5258140563964844, - "learning_rate": 2.7781078271201546e-05, - "loss": 0.0738, + "epoch": 1.4790614790614791, + "grad_norm": 0.7872318625450134, + "learning_rate": 2.1125631125631125e-05, + "loss": 0.0561, "step": 4980 }, { - "epoch": 0.37056289915342344, - "grad_norm": 1.4318699836730957, - "learning_rate": 2.777662260507946e-05, - "loss": 0.0773, + "epoch": 1.482031482031482, + "grad_norm": 0.9400390982627869, + "learning_rate": 2.1107811107811107e-05, + "loss": 0.0684, "step": 4990 }, { - "epoch": 0.371305510173771, - "grad_norm": 2.4203314781188965, - "learning_rate": 2.7772166938957373e-05, - "loss": 0.1097, + "epoch": 1.485001485001485, + "grad_norm": 1.317718505859375, + "learning_rate": 2.1089991089991093e-05, + "loss": 0.0586, "step": 5000 }, { - "epoch": 0.3720481211941185, - "grad_norm": 1.1299662590026855, - "learning_rate": 2.776771127283529e-05, - "loss": 0.0858, + "epoch": 1.487971487971488, + "grad_norm": 0.41417181491851807, + "learning_rate": 2.1072171072171075e-05, + "loss": 0.0671, "step": 5010 }, { - "epoch": 0.37279073221446607, - "grad_norm": 4.186913013458252, - "learning_rate": 2.7763255606713206e-05, - "loss": 0.1212, + "epoch": 1.4909414909414909, + "grad_norm": 0.775679886341095, + "learning_rate": 2.1054351054351054e-05, + "loss": 0.0753, "step": 5020 }, { - "epoch": 0.37353334323481363, - "grad_norm": 3.082172393798828, - "learning_rate": 2.7758799940591117e-05, - "loss": 0.1027, + "epoch": 1.493911493911494, + "grad_norm": 1.1018742322921753, + "learning_rate": 2.1036531036531036e-05, + "loss": 0.0684, "step": 5030 }, { - "epoch": 0.37427595425516114, - "grad_norm": 3.5075833797454834, - "learning_rate": 2.7754344274469036e-05, - "loss": 0.0974, + "epoch": 1.496881496881497, + "grad_norm": 0.877463161945343, + "learning_rate": 2.1018711018711018e-05, + "loss": 0.0622, "step": 5040 }, { - "epoch": 0.3750185652755087, - "grad_norm": 4.949690818786621, - "learning_rate": 2.7749888608346947e-05, - "loss": 0.0912, + "epoch": 1.4998514998514998, + "grad_norm": 0.44593024253845215, + "learning_rate": 2.1000891000891e-05, + "loss": 0.0829, "step": 5050 }, { - "epoch": 0.3757611762958562, - "grad_norm": 2.1641194820404053, - "learning_rate": 2.7745432942224862e-05, - "loss": 0.11, + "epoch": 1.5028215028215028, + "grad_norm": 0.7530653476715088, + "learning_rate": 2.0983070983070982e-05, + "loss": 0.0573, "step": 5060 }, { - "epoch": 0.37650378731620376, - "grad_norm": 2.0834977626800537, - "learning_rate": 2.7740977276102777e-05, - "loss": 0.055, + "epoch": 1.505791505791506, + "grad_norm": 0.6737465262413025, + "learning_rate": 2.0965250965250968e-05, + "loss": 0.0657, "step": 5070 }, { - "epoch": 0.3772463983365513, - "grad_norm": 1.6411371231079102, - "learning_rate": 2.7736521609980692e-05, - "loss": 0.0844, + "epoch": 1.5087615087615087, + "grad_norm": 1.2247141599655151, + "learning_rate": 2.094743094743095e-05, + "loss": 0.0715, "step": 5080 }, { - "epoch": 0.37798900935689883, - "grad_norm": 2.455378770828247, - "learning_rate": 2.7732065943858607e-05, - "loss": 0.086, + "epoch": 1.5117315117315118, + "grad_norm": 0.5894985795021057, + "learning_rate": 2.092961092961093e-05, + "loss": 0.0635, "step": 5090 }, { - "epoch": 0.3787316203772464, - "grad_norm": 0.9530849456787109, - "learning_rate": 2.7727610277736522e-05, - "loss": 0.0879, + "epoch": 1.5147015147015148, + "grad_norm": 0.6736055016517639, + "learning_rate": 2.091179091179091e-05, + "loss": 0.0699, "step": 5100 }, { - "epoch": 0.37947423139759395, - "grad_norm": 0.6833879947662354, - "learning_rate": 2.7723154611614437e-05, - "loss": 0.1207, + "epoch": 1.5176715176715176, + "grad_norm": 1.1154353618621826, + "learning_rate": 2.0893970893970893e-05, + "loss": 0.0662, "step": 5110 }, { - "epoch": 0.38021684241794146, - "grad_norm": 2.2070958614349365, - "learning_rate": 2.7718698945492352e-05, - "loss": 0.0749, + "epoch": 1.5206415206415207, + "grad_norm": 1.1602790355682373, + "learning_rate": 2.0876150876150875e-05, + "loss": 0.0611, "step": 5120 }, { - "epoch": 0.380959453438289, - "grad_norm": 4.673049449920654, - "learning_rate": 2.7714243279370264e-05, - "loss": 0.0855, + "epoch": 1.5236115236115237, + "grad_norm": 0.6141194701194763, + "learning_rate": 2.0858330858330858e-05, + "loss": 0.0655, "step": 5130 }, { - "epoch": 0.3817020644586366, - "grad_norm": 2.2408103942871094, - "learning_rate": 2.7709787613248182e-05, - "loss": 0.0863, + "epoch": 1.5265815265815266, + "grad_norm": 0.7376190423965454, + "learning_rate": 2.0840510840510843e-05, + "loss": 0.055, "step": 5140 }, { - "epoch": 0.3824446754789841, - "grad_norm": 1.7068830728530884, - "learning_rate": 2.7705331947126097e-05, - "loss": 0.0949, + "epoch": 1.5295515295515294, + "grad_norm": 1.0901671648025513, + "learning_rate": 2.0822690822690825e-05, + "loss": 0.0678, "step": 5150 }, { - "epoch": 0.38318728649933165, - "grad_norm": 1.8522627353668213, - "learning_rate": 2.770087628100401e-05, - "loss": 0.0875, + "epoch": 1.5325215325215327, + "grad_norm": 0.7825741171836853, + "learning_rate": 2.0804870804870808e-05, + "loss": 0.0722, "step": 5160 }, { - "epoch": 0.3839298975196792, - "grad_norm": 2.645232915878296, - "learning_rate": 2.7696420614881924e-05, - "loss": 0.0823, + "epoch": 1.5354915354915355, + "grad_norm": 1.3154313564300537, + "learning_rate": 2.0787050787050786e-05, + "loss": 0.0651, "step": 5170 }, { - "epoch": 0.3846725085400267, - "grad_norm": 3.677633047103882, - "learning_rate": 2.7691964948759842e-05, - "loss": 0.0726, + "epoch": 1.5384615384615383, + "grad_norm": 0.9979254603385925, + "learning_rate": 2.076923076923077e-05, + "loss": 0.0623, "step": 5180 }, { - "epoch": 0.3854151195603743, - "grad_norm": 2.5653793811798096, - "learning_rate": 2.7687509282637754e-05, - "loss": 0.087, + "epoch": 1.5414315414315416, + "grad_norm": 0.6638979911804199, + "learning_rate": 2.075141075141075e-05, + "loss": 0.0724, "step": 5190 }, { - "epoch": 0.38615773058072184, - "grad_norm": 3.1218738555908203, - "learning_rate": 2.768305361651567e-05, - "loss": 0.09, + "epoch": 1.5444015444015444, + "grad_norm": 0.9916415214538574, + "learning_rate": 2.0733590733590733e-05, + "loss": 0.0665, "step": 5200 }, { - "epoch": 0.38690034160106934, - "grad_norm": 1.5911304950714111, - "learning_rate": 2.7678597950393587e-05, - "loss": 0.0867, + "epoch": 1.5473715473715473, + "grad_norm": 1.4179552793502808, + "learning_rate": 2.071577071577072e-05, + "loss": 0.062, "step": 5210 }, { - "epoch": 0.3876429526214169, - "grad_norm": 1.051086187362671, - "learning_rate": 2.76741422842715e-05, - "loss": 0.1087, + "epoch": 1.5503415503415503, + "grad_norm": 0.7684439420700073, + "learning_rate": 2.06979506979507e-05, + "loss": 0.0575, "step": 5220 }, { - "epoch": 0.38838556364176446, - "grad_norm": 2.593616247177124, - "learning_rate": 2.7669686618149414e-05, - "loss": 0.0842, + "epoch": 1.5533115533115534, + "grad_norm": 0.7808251976966858, + "learning_rate": 2.0680130680130683e-05, + "loss": 0.065, "step": 5230 }, { - "epoch": 0.38912817466211197, - "grad_norm": 2.5163533687591553, - "learning_rate": 2.7665230952027326e-05, - "loss": 0.1173, + "epoch": 1.5562815562815562, + "grad_norm": 0.6247586011886597, + "learning_rate": 2.066231066231066e-05, + "loss": 0.0611, "step": 5240 }, { - "epoch": 0.38987078568245953, - "grad_norm": 4.386409759521484, - "learning_rate": 2.7660775285905244e-05, - "loss": 0.1035, + "epoch": 1.5592515592515592, + "grad_norm": 0.9080334305763245, + "learning_rate": 2.0644490644490644e-05, + "loss": 0.0591, "step": 5250 }, { - "epoch": 0.3906133967028071, - "grad_norm": 2.9560604095458984, - "learning_rate": 2.765631961978316e-05, - "loss": 0.0936, + "epoch": 1.5622215622215623, + "grad_norm": 1.0106734037399292, + "learning_rate": 2.0626670626670626e-05, + "loss": 0.055, "step": 5260 }, { - "epoch": 0.3913560077231546, - "grad_norm": 2.026900291442871, - "learning_rate": 2.765186395366107e-05, - "loss": 0.1084, + "epoch": 1.565191565191565, + "grad_norm": 0.8516258001327515, + "learning_rate": 2.0608850608850608e-05, + "loss": 0.0609, "step": 5270 }, { - "epoch": 0.39209861874350216, - "grad_norm": 2.574880361557007, - "learning_rate": 2.764740828753899e-05, - "loss": 0.0733, + "epoch": 1.5681615681615682, + "grad_norm": 0.7109993100166321, + "learning_rate": 2.0591030591030594e-05, + "loss": 0.0644, "step": 5280 }, { - "epoch": 0.3928412297638497, - "grad_norm": 1.350338339805603, - "learning_rate": 2.7642952621416904e-05, - "loss": 0.092, + "epoch": 1.5711315711315712, + "grad_norm": 1.2328643798828125, + "learning_rate": 2.0573210573210576e-05, + "loss": 0.0654, "step": 5290 }, { - "epoch": 0.3935838407841972, - "grad_norm": 1.7275868654251099, - "learning_rate": 2.7638496955294816e-05, - "loss": 0.0534, + "epoch": 1.574101574101574, + "grad_norm": 0.8040093183517456, + "learning_rate": 2.0555390555390558e-05, + "loss": 0.0558, "step": 5300 }, { - "epoch": 0.3943264518045448, - "grad_norm": 1.1320747137069702, - "learning_rate": 2.7634041289172734e-05, - "loss": 0.1091, + "epoch": 1.577071577071577, + "grad_norm": 0.4768785834312439, + "learning_rate": 2.0537570537570537e-05, + "loss": 0.0486, "step": 5310 }, { - "epoch": 0.39506906282489235, - "grad_norm": 1.5764305591583252, - "learning_rate": 2.762958562305065e-05, - "loss": 0.0697, + "epoch": 1.5800415800415801, + "grad_norm": 0.8780914545059204, + "learning_rate": 2.051975051975052e-05, + "loss": 0.0597, "step": 5320 }, { - "epoch": 0.39581167384523985, - "grad_norm": 1.4530662298202515, - "learning_rate": 2.762512995692856e-05, - "loss": 0.0823, + "epoch": 1.583011583011583, + "grad_norm": 0.5771424770355225, + "learning_rate": 2.05019305019305e-05, + "loss": 0.0696, "step": 5330 }, { - "epoch": 0.3965542848655874, - "grad_norm": 3.964816093444824, - "learning_rate": 2.7620674290806476e-05, - "loss": 0.0677, + "epoch": 1.585981585981586, + "grad_norm": 0.8761149644851685, + "learning_rate": 2.0484110484110483e-05, + "loss": 0.0617, "step": 5340 }, { - "epoch": 0.3972968958859349, - "grad_norm": 2.6048128604888916, - "learning_rate": 2.761621862468439e-05, - "loss": 0.0656, + "epoch": 1.588951588951589, + "grad_norm": 0.8866630792617798, + "learning_rate": 2.046629046629047e-05, + "loss": 0.0619, "step": 5350 }, { - "epoch": 0.3980395069062825, - "grad_norm": 1.2549293041229248, - "learning_rate": 2.7611762958562306e-05, - "loss": 0.0803, + "epoch": 1.5919215919215919, + "grad_norm": 0.8091713190078735, + "learning_rate": 2.044847044847045e-05, + "loss": 0.0705, "step": 5360 }, { - "epoch": 0.39878211792663004, - "grad_norm": 2.1924233436584473, - "learning_rate": 2.760730729244022e-05, - "loss": 0.0775, + "epoch": 1.594891594891595, + "grad_norm": 1.079195261001587, + "learning_rate": 2.0430650430650433e-05, + "loss": 0.0555, "step": 5370 }, { - "epoch": 0.39952472894697755, - "grad_norm": 1.1957290172576904, - "learning_rate": 2.7602851626318136e-05, - "loss": 0.0526, + "epoch": 1.597861597861598, + "grad_norm": 0.5570653676986694, + "learning_rate": 2.0412830412830412e-05, + "loss": 0.0759, "step": 5380 }, { - "epoch": 0.4002673399673251, - "grad_norm": 4.39811897277832, - "learning_rate": 2.759839596019605e-05, - "loss": 0.0843, + "epoch": 1.6008316008316008, + "grad_norm": 0.8151302933692932, + "learning_rate": 2.0395010395010394e-05, + "loss": 0.0644, "step": 5390 }, { - "epoch": 0.40100995098767267, - "grad_norm": 2.887032985687256, - "learning_rate": 2.7593940294073965e-05, - "loss": 0.105, + "epoch": 1.6038016038016036, + "grad_norm": 0.9399856925010681, + "learning_rate": 2.0377190377190376e-05, + "loss": 0.0507, "step": 5400 }, { - "epoch": 0.4017525620080202, - "grad_norm": 2.1287643909454346, - "learning_rate": 2.7589484627951877e-05, - "loss": 0.0919, + "epoch": 1.606771606771607, + "grad_norm": 0.7044445872306824, + "learning_rate": 2.035937035937036e-05, + "loss": 0.0638, "step": 5410 }, { - "epoch": 0.40249517302836774, - "grad_norm": 2.559832811355591, - "learning_rate": 2.7585028961829795e-05, - "loss": 0.0941, + "epoch": 1.6097416097416097, + "grad_norm": 0.9745432734489441, + "learning_rate": 2.0341550341550344e-05, + "loss": 0.0588, "step": 5420 }, { - "epoch": 0.4032377840487153, - "grad_norm": 3.4506430625915527, - "learning_rate": 2.758057329570771e-05, - "loss": 0.0959, + "epoch": 1.6127116127116126, + "grad_norm": 0.8628720045089722, + "learning_rate": 2.0323730323730326e-05, + "loss": 0.0627, "step": 5430 }, { - "epoch": 0.4039803950690628, - "grad_norm": 0.877765953540802, - "learning_rate": 2.7576117629585622e-05, - "loss": 0.1001, + "epoch": 1.6156816156816158, + "grad_norm": 0.8440065979957581, + "learning_rate": 2.0305910305910308e-05, + "loss": 0.056, "step": 5440 }, { - "epoch": 0.40472300608941036, - "grad_norm": 2.6283414363861084, - "learning_rate": 2.757166196346354e-05, - "loss": 0.0734, + "epoch": 1.6186516186516187, + "grad_norm": 0.7349764108657837, + "learning_rate": 2.0288090288090287e-05, + "loss": 0.0524, "step": 5450 }, { - "epoch": 0.4054656171097579, - "grad_norm": 2.917095899581909, - "learning_rate": 2.7567206297341452e-05, - "loss": 0.1153, + "epoch": 1.6216216216216215, + "grad_norm": 0.4457996189594269, + "learning_rate": 2.027027027027027e-05, + "loss": 0.0678, "step": 5460 }, { - "epoch": 0.40620822813010543, - "grad_norm": 1.10123872756958, - "learning_rate": 2.7562750631219367e-05, - "loss": 0.0765, + "epoch": 1.6245916245916245, + "grad_norm": 1.1030367612838745, + "learning_rate": 2.025245025245025e-05, + "loss": 0.075, "step": 5470 }, { - "epoch": 0.406950839150453, - "grad_norm": 4.8916096687316895, - "learning_rate": 2.7558294965097282e-05, - "loss": 0.0609, + "epoch": 1.6275616275616276, + "grad_norm": 0.6578485369682312, + "learning_rate": 2.0234630234630234e-05, + "loss": 0.0586, "step": 5480 }, { - "epoch": 0.40769345017080055, - "grad_norm": 1.0813095569610596, - "learning_rate": 2.7553839298975197e-05, - "loss": 0.0926, + "epoch": 1.6305316305316304, + "grad_norm": 0.8512071967124939, + "learning_rate": 2.021681021681022e-05, + "loss": 0.0619, "step": 5490 }, { - "epoch": 0.40843606119114806, - "grad_norm": 2.3865935802459717, - "learning_rate": 2.7549383632853112e-05, - "loss": 0.0769, + "epoch": 1.6335016335016335, + "grad_norm": 0.3718603551387787, + "learning_rate": 2.01989901989902e-05, + "loss": 0.077, "step": 5500 }, { - "epoch": 0.4091786722114956, - "grad_norm": 2.4773435592651367, - "learning_rate": 2.7544927966731027e-05, - "loss": 0.0767, + "epoch": 1.6364716364716365, + "grad_norm": 0.6400578618049622, + "learning_rate": 2.0181170181170183e-05, + "loss": 0.0443, "step": 5510 }, { - "epoch": 0.4099212832318432, - "grad_norm": 1.5695173740386963, - "learning_rate": 2.7540472300608942e-05, - "loss": 0.0639, + "epoch": 1.6394416394416393, + "grad_norm": 0.6334373354911804, + "learning_rate": 2.0163350163350162e-05, + "loss": 0.0749, "step": 5520 }, { - "epoch": 0.4106638942521907, - "grad_norm": 3.533438205718994, - "learning_rate": 2.7536016634486857e-05, - "loss": 0.0692, + "epoch": 1.6424116424116424, + "grad_norm": 1.0908116102218628, + "learning_rate": 2.0145530145530144e-05, + "loss": 0.0597, "step": 5530 }, { - "epoch": 0.41140650527253825, - "grad_norm": 1.1190873384475708, - "learning_rate": 2.7531560968364772e-05, - "loss": 0.0645, + "epoch": 1.6453816453816454, + "grad_norm": 0.8702746033668518, + "learning_rate": 2.0127710127710127e-05, + "loss": 0.0622, "step": 5540 }, { - "epoch": 0.4121491162928858, - "grad_norm": 2.1660842895507812, - "learning_rate": 2.7527105302242687e-05, - "loss": 0.0883, + "epoch": 1.6483516483516483, + "grad_norm": 0.816605269908905, + "learning_rate": 2.010989010989011e-05, + "loss": 0.0555, "step": 5550 }, { - "epoch": 0.4128917273132333, - "grad_norm": 1.7716519832611084, - "learning_rate": 2.7522649636120602e-05, - "loss": 0.0858, + "epoch": 1.6513216513216513, + "grad_norm": 1.7561945915222168, + "learning_rate": 2.0092070092070094e-05, + "loss": 0.0609, "step": 5560 }, { - "epoch": 0.4136343383335809, - "grad_norm": 1.537878155708313, - "learning_rate": 2.7518193969998514e-05, - "loss": 0.0814, + "epoch": 1.6542916542916544, + "grad_norm": 1.0770184993743896, + "learning_rate": 2.0074250074250076e-05, + "loss": 0.0737, "step": 5570 }, { - "epoch": 0.41437694935392844, - "grad_norm": 2.6977486610412598, - "learning_rate": 2.751373830387643e-05, - "loss": 0.0803, + "epoch": 1.6572616572616572, + "grad_norm": 0.8192417025566101, + "learning_rate": 2.005643005643006e-05, + "loss": 0.0607, "step": 5580 }, { - "epoch": 0.41511956037427594, - "grad_norm": 2.5686473846435547, - "learning_rate": 2.7509282637754347e-05, - "loss": 0.1299, + "epoch": 1.6602316602316602, + "grad_norm": 0.49099376797676086, + "learning_rate": 2.0038610038610037e-05, + "loss": 0.0621, "step": 5590 }, { - "epoch": 0.4158621713946235, - "grad_norm": 3.5624582767486572, - "learning_rate": 2.750482697163226e-05, - "loss": 0.0667, + "epoch": 1.6632016632016633, + "grad_norm": 0.8861784338951111, + "learning_rate": 2.002079002079002e-05, + "loss": 0.0548, "step": 5600 }, { - "epoch": 0.41660478241497106, - "grad_norm": 1.4908101558685303, - "learning_rate": 2.7500371305510174e-05, - "loss": 0.0869, + "epoch": 1.6661716661716661, + "grad_norm": 0.6367491483688354, + "learning_rate": 2.0002970002970002e-05, + "loss": 0.0685, "step": 5610 }, { - "epoch": 0.41734739343531857, - "grad_norm": 1.9675188064575195, - "learning_rate": 2.7495915639388092e-05, - "loss": 0.0645, + "epoch": 1.6691416691416692, + "grad_norm": 0.639358401298523, + "learning_rate": 1.9985149985149984e-05, + "loss": 0.0572, "step": 5620 }, { - "epoch": 0.41809000445566613, - "grad_norm": 3.775062322616577, - "learning_rate": 2.7491459973266004e-05, - "loss": 0.0985, + "epoch": 1.6721116721116722, + "grad_norm": 0.8413973450660706, + "learning_rate": 1.996732996732997e-05, + "loss": 0.063, "step": 5630 }, { - "epoch": 0.41883261547601364, - "grad_norm": 5.706444263458252, - "learning_rate": 2.748700430714392e-05, - "loss": 0.0996, + "epoch": 1.675081675081675, + "grad_norm": 1.1640899181365967, + "learning_rate": 1.994950994950995e-05, + "loss": 0.0683, "step": 5640 }, { - "epoch": 0.4195752264963612, - "grad_norm": 2.382413625717163, - "learning_rate": 2.748254864102183e-05, - "loss": 0.079, + "epoch": 1.678051678051678, + "grad_norm": 1.299309253692627, + "learning_rate": 1.9931689931689934e-05, + "loss": 0.0676, "step": 5650 }, { - "epoch": 0.42031783751670876, - "grad_norm": 2.5608088970184326, - "learning_rate": 2.747809297489975e-05, - "loss": 0.0893, + "epoch": 1.6810216810216811, + "grad_norm": 1.032827377319336, + "learning_rate": 1.9913869913869913e-05, + "loss": 0.0658, "step": 5660 }, { - "epoch": 0.42106044853705626, - "grad_norm": 2.507960796356201, - "learning_rate": 2.7473637308777664e-05, - "loss": 0.0689, + "epoch": 1.683991683991684, + "grad_norm": 0.36365097761154175, + "learning_rate": 1.9896049896049895e-05, + "loss": 0.0605, "step": 5670 }, { - "epoch": 0.4218030595574038, - "grad_norm": 2.9068281650543213, - "learning_rate": 2.7469181642655575e-05, - "loss": 0.0848, + "epoch": 1.6869616869616868, + "grad_norm": 0.8830529451370239, + "learning_rate": 1.9878229878229877e-05, + "loss": 0.0676, "step": 5680 }, { - "epoch": 0.4225456705777514, - "grad_norm": 3.1836397647857666, - "learning_rate": 2.7464725976533494e-05, - "loss": 0.0851, + "epoch": 1.68993168993169, + "grad_norm": 0.5479172468185425, + "learning_rate": 1.986040986040986e-05, + "loss": 0.0798, "step": 5690 }, { - "epoch": 0.4232882815980989, - "grad_norm": 3.9612765312194824, - "learning_rate": 2.746027031041141e-05, - "loss": 0.0785, + "epoch": 1.692901692901693, + "grad_norm": 1.1123652458190918, + "learning_rate": 1.9842589842589845e-05, + "loss": 0.0528, "step": 5700 }, { - "epoch": 0.42403089261844645, - "grad_norm": 5.2058210372924805, - "learning_rate": 2.745581464428932e-05, - "loss": 0.0883, + "epoch": 1.6958716958716957, + "grad_norm": 0.9681738018989563, + "learning_rate": 1.9824769824769827e-05, + "loss": 0.0641, "step": 5710 }, { - "epoch": 0.424773503638794, - "grad_norm": 2.7457072734832764, - "learning_rate": 2.745135897816724e-05, - "loss": 0.0654, + "epoch": 1.698841698841699, + "grad_norm": 0.7309338450431824, + "learning_rate": 1.980694980694981e-05, + "loss": 0.0764, "step": 5720 }, { - "epoch": 0.4255161146591415, - "grad_norm": 1.1056705713272095, - "learning_rate": 2.7446903312045154e-05, - "loss": 0.0996, + "epoch": 1.7018117018117018, + "grad_norm": 0.8220120668411255, + "learning_rate": 1.978912978912979e-05, + "loss": 0.0679, "step": 5730 }, { - "epoch": 0.4262587256794891, - "grad_norm": 2.1076269149780273, - "learning_rate": 2.7442447645923065e-05, - "loss": 0.057, + "epoch": 1.7047817047817047, + "grad_norm": 0.9567219018936157, + "learning_rate": 1.977130977130977e-05, + "loss": 0.0637, "step": 5740 }, { - "epoch": 0.42700133669983664, - "grad_norm": 2.5549466609954834, - "learning_rate": 2.743799197980098e-05, - "loss": 0.1045, + "epoch": 1.7077517077517077, + "grad_norm": 0.643904447555542, + "learning_rate": 1.9753489753489752e-05, + "loss": 0.0672, "step": 5750 }, { - "epoch": 0.42774394772018415, - "grad_norm": 1.2105517387390137, - "learning_rate": 2.7433536313678895e-05, - "loss": 0.0826, + "epoch": 1.7107217107217108, + "grad_norm": 1.4598885774612427, + "learning_rate": 1.9735669735669734e-05, + "loss": 0.067, "step": 5760 }, { - "epoch": 0.4284865587405317, - "grad_norm": 1.5219643115997314, - "learning_rate": 2.742908064755681e-05, - "loss": 0.0729, + "epoch": 1.7136917136917136, + "grad_norm": 0.8001982569694519, + "learning_rate": 1.971784971784972e-05, + "loss": 0.0579, "step": 5770 }, { - "epoch": 0.42922916976087927, - "grad_norm": 2.4484918117523193, - "learning_rate": 2.7424624981434725e-05, - "loss": 0.1096, + "epoch": 1.7166617166617166, + "grad_norm": 0.858055591583252, + "learning_rate": 1.9700029700029702e-05, + "loss": 0.0548, "step": 5780 }, { - "epoch": 0.4299717807812268, - "grad_norm": 2.4884450435638428, - "learning_rate": 2.742016931531264e-05, - "loss": 0.0927, + "epoch": 1.7196317196317197, + "grad_norm": 1.0781798362731934, + "learning_rate": 1.9682209682209684e-05, + "loss": 0.067, "step": 5790 }, { - "epoch": 0.43071439180157434, - "grad_norm": 2.647526502609253, - "learning_rate": 2.7415713649190555e-05, - "loss": 0.0918, + "epoch": 1.7226017226017225, + "grad_norm": 0.9981959462165833, + "learning_rate": 1.9664389664389666e-05, + "loss": 0.0689, "step": 5800 }, { - "epoch": 0.4314570028219219, - "grad_norm": 3.45865535736084, - "learning_rate": 2.741125798306847e-05, - "loss": 0.1012, + "epoch": 1.7255717255717256, + "grad_norm": 0.9247766137123108, + "learning_rate": 1.9646569646569645e-05, + "loss": 0.0595, "step": 5810 }, { - "epoch": 0.4321996138422694, - "grad_norm": 1.7236058712005615, - "learning_rate": 2.7406802316946382e-05, - "loss": 0.1037, + "epoch": 1.7285417285417286, + "grad_norm": 0.8209492564201355, + "learning_rate": 1.9628749628749627e-05, + "loss": 0.0627, "step": 5820 }, { - "epoch": 0.43294222486261696, - "grad_norm": 4.1282572746276855, - "learning_rate": 2.74023466508243e-05, - "loss": 0.0613, + "epoch": 1.7315117315117314, + "grad_norm": 1.1569684743881226, + "learning_rate": 1.961092961092961e-05, + "loss": 0.0727, "step": 5830 }, { - "epoch": 0.4336848358829645, - "grad_norm": 2.4355249404907227, - "learning_rate": 2.7397890984702215e-05, - "loss": 0.0899, + "epoch": 1.7344817344817345, + "grad_norm": 0.6315358281135559, + "learning_rate": 1.9593109593109595e-05, + "loss": 0.0673, "step": 5840 }, { - "epoch": 0.43442744690331203, - "grad_norm": 1.723847508430481, - "learning_rate": 2.7393435318580127e-05, - "loss": 0.101, + "epoch": 1.7374517374517375, + "grad_norm": 0.7351091504096985, + "learning_rate": 1.9575289575289577e-05, + "loss": 0.0673, "step": 5850 }, { - "epoch": 0.4351700579236596, - "grad_norm": 2.700627088546753, - "learning_rate": 2.7388979652458045e-05, - "loss": 0.0907, + "epoch": 1.7404217404217404, + "grad_norm": 0.8699774742126465, + "learning_rate": 1.955746955746956e-05, + "loss": 0.0732, "step": 5860 }, { - "epoch": 0.43591266894400715, - "grad_norm": 1.1109275817871094, - "learning_rate": 2.7384523986335957e-05, - "loss": 0.1118, + "epoch": 1.7433917433917434, + "grad_norm": 0.992885172367096, + "learning_rate": 1.953964953964954e-05, + "loss": 0.0674, "step": 5870 }, { - "epoch": 0.43665527996435466, - "grad_norm": 1.6550132036209106, - "learning_rate": 2.738006832021387e-05, - "loss": 0.1243, + "epoch": 1.7463617463617465, + "grad_norm": 0.8643231391906738, + "learning_rate": 1.952182952182952e-05, + "loss": 0.0755, "step": 5880 }, { - "epoch": 0.4373978909847022, - "grad_norm": 1.3361659049987793, - "learning_rate": 2.737561265409179e-05, - "loss": 0.1412, + "epoch": 1.7493317493317493, + "grad_norm": 0.8017232418060303, + "learning_rate": 1.9504009504009503e-05, + "loss": 0.0671, "step": 5890 }, { - "epoch": 0.4381405020050498, - "grad_norm": 2.4262852668762207, - "learning_rate": 2.73711569879697e-05, - "loss": 0.0814, + "epoch": 1.7523017523017523, + "grad_norm": 0.5877105593681335, + "learning_rate": 1.9486189486189485e-05, + "loss": 0.0658, "step": 5900 }, { - "epoch": 0.4388831130253973, - "grad_norm": 3.202860116958618, - "learning_rate": 2.7366701321847617e-05, - "loss": 0.0694, + "epoch": 1.7552717552717554, + "grad_norm": 1.359218716621399, + "learning_rate": 1.946836946836947e-05, + "loss": 0.0626, "step": 5910 }, { - "epoch": 0.43962572404574485, - "grad_norm": 1.6271086931228638, - "learning_rate": 2.736224565572553e-05, - "loss": 0.0871, + "epoch": 1.7582417582417582, + "grad_norm": 0.6869223117828369, + "learning_rate": 1.9450549450549452e-05, + "loss": 0.0775, "step": 5920 }, { - "epoch": 0.44036833506609235, - "grad_norm": 1.3334532976150513, - "learning_rate": 2.7357789989603447e-05, - "loss": 0.1081, + "epoch": 1.7612117612117613, + "grad_norm": 0.7375755906105042, + "learning_rate": 1.9432729432729435e-05, + "loss": 0.0599, "step": 5930 }, { - "epoch": 0.4411109460864399, - "grad_norm": 1.230716586112976, - "learning_rate": 2.735333432348136e-05, - "loss": 0.067, + "epoch": 1.7641817641817643, + "grad_norm": 0.9355472326278687, + "learning_rate": 1.9414909414909417e-05, + "loss": 0.0632, "step": 5940 }, { - "epoch": 0.4418535571067875, - "grad_norm": 1.8485809564590454, - "learning_rate": 2.7348878657359277e-05, - "loss": 0.069, + "epoch": 1.7671517671517671, + "grad_norm": 1.0422033071517944, + "learning_rate": 1.9397089397089396e-05, + "loss": 0.0644, "step": 5950 }, { - "epoch": 0.442596168127135, - "grad_norm": 1.9252151250839233, - "learning_rate": 2.734442299123719e-05, - "loss": 0.0703, + "epoch": 1.77012177012177, + "grad_norm": 0.7586382627487183, + "learning_rate": 1.9379269379269378e-05, + "loss": 0.0615, "step": 5960 }, { - "epoch": 0.44333877914748254, - "grad_norm": 0.5167465209960938, - "learning_rate": 2.7339967325115107e-05, - "loss": 0.0766, + "epoch": 1.7730917730917732, + "grad_norm": 1.0248056650161743, + "learning_rate": 1.936144936144936e-05, + "loss": 0.0708, "step": 5970 }, { - "epoch": 0.4440813901678301, - "grad_norm": 1.259781002998352, - "learning_rate": 2.7335511658993018e-05, - "loss": 0.0752, + "epoch": 1.776061776061776, + "grad_norm": 0.9860632419586182, + "learning_rate": 1.9343629343629345e-05, + "loss": 0.0735, "step": 5980 }, { - "epoch": 0.4448240011881776, - "grad_norm": 0.9502667188644409, - "learning_rate": 2.7331055992870933e-05, - "loss": 0.0699, + "epoch": 1.779031779031779, + "grad_norm": 0.8044900894165039, + "learning_rate": 1.9325809325809328e-05, + "loss": 0.051, "step": 5990 }, { - "epoch": 0.44556661220852517, - "grad_norm": 2.211690902709961, - "learning_rate": 2.732660032674885e-05, - "loss": 0.0806, + "epoch": 1.7820017820017822, + "grad_norm": 0.7025960087776184, + "learning_rate": 1.930798930798931e-05, + "loss": 0.0535, "step": 6000 }, { - "epoch": 0.44630922322887273, - "grad_norm": 4.159378528594971, - "learning_rate": 2.7322144660626763e-05, - "loss": 0.0766, + "epoch": 1.784971784971785, + "grad_norm": 0.9908897876739502, + "learning_rate": 1.9290169290169292e-05, + "loss": 0.0578, "step": 6010 }, { - "epoch": 0.44705183424922024, - "grad_norm": 2.38044810295105, - "learning_rate": 2.7317688994504678e-05, - "loss": 0.1117, + "epoch": 1.7879417879417878, + "grad_norm": 1.2506797313690186, + "learning_rate": 1.927234927234927e-05, + "loss": 0.065, "step": 6020 }, { - "epoch": 0.4477944452695678, - "grad_norm": 3.320197105407715, - "learning_rate": 2.7313233328382597e-05, - "loss": 0.0819, + "epoch": 1.7909117909117909, + "grad_norm": 1.023766040802002, + "learning_rate": 1.9254529254529253e-05, + "loss": 0.0631, "step": 6030 }, { - "epoch": 0.44853705628991536, - "grad_norm": 2.641312599182129, - "learning_rate": 2.7308777662260508e-05, - "loss": 0.0753, + "epoch": 1.793881793881794, + "grad_norm": 0.8149070739746094, + "learning_rate": 1.9236709236709235e-05, + "loss": 0.0621, "step": 6040 }, { - "epoch": 0.44927966731026286, - "grad_norm": 3.1988885402679443, - "learning_rate": 2.7304321996138423e-05, - "loss": 0.1066, + "epoch": 1.7968517968517967, + "grad_norm": 0.6476550698280334, + "learning_rate": 1.921888921888922e-05, + "loss": 0.066, "step": 6050 }, { - "epoch": 0.4500222783306104, - "grad_norm": 0.6954814195632935, - "learning_rate": 2.7299866330016335e-05, - "loss": 0.0885, + "epoch": 1.7998217998217998, + "grad_norm": 0.65140700340271, + "learning_rate": 1.9201069201069203e-05, + "loss": 0.0611, "step": 6060 }, { - "epoch": 0.450764889350958, - "grad_norm": 3.5615670680999756, - "learning_rate": 2.7295410663894253e-05, - "loss": 0.0942, + "epoch": 1.8027918027918028, + "grad_norm": 0.9502988457679749, + "learning_rate": 1.9183249183249185e-05, + "loss": 0.0615, "step": 6070 }, { - "epoch": 0.4515075003713055, - "grad_norm": 0.6206175088882446, - "learning_rate": 2.7290954997772168e-05, - "loss": 0.0688, + "epoch": 1.8057618057618057, + "grad_norm": 0.916491687297821, + "learning_rate": 1.9165429165429167e-05, + "loss": 0.0643, "step": 6080 }, { - "epoch": 0.45225011139165305, - "grad_norm": 1.3338674306869507, - "learning_rate": 2.728649933165008e-05, - "loss": 0.0969, + "epoch": 1.8087318087318087, + "grad_norm": 1.169666051864624, + "learning_rate": 1.9147609147609146e-05, + "loss": 0.0715, "step": 6090 }, { - "epoch": 0.4529927224120006, - "grad_norm": 2.5011191368103027, - "learning_rate": 2.7282043665527998e-05, - "loss": 0.0852, + "epoch": 1.8117018117018118, + "grad_norm": 1.0344257354736328, + "learning_rate": 1.9129789129789128e-05, + "loss": 0.0605, "step": 6100 }, { - "epoch": 0.4537353334323481, - "grad_norm": 4.930363655090332, - "learning_rate": 2.7277587999405913e-05, - "loss": 0.1074, + "epoch": 1.8146718146718146, + "grad_norm": 0.49185994267463684, + "learning_rate": 1.911196911196911e-05, + "loss": 0.0621, "step": 6110 }, { - "epoch": 0.4544779444526957, - "grad_norm": 2.0421066284179688, - "learning_rate": 2.7273132333283825e-05, - "loss": 0.0821, + "epoch": 1.8176418176418176, + "grad_norm": 0.9639203548431396, + "learning_rate": 1.9094149094149096e-05, + "loss": 0.0655, "step": 6120 }, { - "epoch": 0.45522055547304324, - "grad_norm": 1.314985752105713, - "learning_rate": 2.7268676667161743e-05, - "loss": 0.0847, + "epoch": 1.8206118206118207, + "grad_norm": 0.6532425284385681, + "learning_rate": 1.9076329076329078e-05, + "loss": 0.0542, "step": 6130 }, { - "epoch": 0.45596316649339075, - "grad_norm": 2.257136583328247, - "learning_rate": 2.7264221001039658e-05, - "loss": 0.0911, + "epoch": 1.8235818235818235, + "grad_norm": 0.455677330493927, + "learning_rate": 1.905850905850906e-05, + "loss": 0.0791, "step": 6140 }, { - "epoch": 0.4567057775137383, - "grad_norm": 2.229437828063965, - "learning_rate": 2.725976533491757e-05, - "loss": 0.081, + "epoch": 1.8265518265518266, + "grad_norm": 0.5976959466934204, + "learning_rate": 1.9040689040689042e-05, + "loss": 0.0586, "step": 6150 }, { - "epoch": 0.45744838853408587, - "grad_norm": 1.778793215751648, - "learning_rate": 2.7255309668795485e-05, - "loss": 0.1086, + "epoch": 1.8295218295218296, + "grad_norm": 0.6233227849006653, + "learning_rate": 1.902286902286902e-05, + "loss": 0.0765, "step": 6160 }, { - "epoch": 0.4581909995544334, - "grad_norm": 1.5188746452331543, - "learning_rate": 2.72508540026734e-05, - "loss": 0.0933, + "epoch": 1.8324918324918325, + "grad_norm": 1.4189893007278442, + "learning_rate": 1.9005049005049003e-05, + "loss": 0.0683, "step": 6170 }, { - "epoch": 0.45893361057478094, - "grad_norm": 1.7076901197433472, - "learning_rate": 2.7246398336551315e-05, - "loss": 0.0746, + "epoch": 1.8354618354618355, + "grad_norm": 0.5655115246772766, + "learning_rate": 1.8987228987228986e-05, + "loss": 0.067, "step": 6180 }, { - "epoch": 0.4596762215951285, - "grad_norm": 1.1018537282943726, - "learning_rate": 2.724194267042923e-05, - "loss": 0.0773, + "epoch": 1.8384318384318385, + "grad_norm": 0.7144678235054016, + "learning_rate": 1.896940896940897e-05, + "loss": 0.0537, "step": 6190 }, { - "epoch": 0.460418832615476, - "grad_norm": 1.8429148197174072, - "learning_rate": 2.7237487004307145e-05, - "loss": 0.0822, + "epoch": 1.8414018414018414, + "grad_norm": 1.3380944728851318, + "learning_rate": 1.8951588951588953e-05, + "loss": 0.0697, "step": 6200 }, { - "epoch": 0.46116144363582356, - "grad_norm": 4.57528829574585, - "learning_rate": 2.723303133818506e-05, - "loss": 0.1028, + "epoch": 1.8443718443718444, + "grad_norm": 0.6233878135681152, + "learning_rate": 1.8933768933768935e-05, + "loss": 0.0702, "step": 6210 }, { - "epoch": 0.46190405465617107, - "grad_norm": 2.2696962356567383, - "learning_rate": 2.7228575672062975e-05, - "loss": 0.0925, + "epoch": 1.8473418473418475, + "grad_norm": 0.7405056357383728, + "learning_rate": 1.8915948915948918e-05, + "loss": 0.0654, "step": 6220 }, { - "epoch": 0.46264666567651863, - "grad_norm": 1.2681903839111328, - "learning_rate": 2.7224120005940886e-05, - "loss": 0.1078, + "epoch": 1.8503118503118503, + "grad_norm": 1.0644950866699219, + "learning_rate": 1.8898128898128896e-05, + "loss": 0.0654, "step": 6230 }, { - "epoch": 0.4633892766968662, - "grad_norm": 0.9987069964408875, - "learning_rate": 2.7219664339818805e-05, - "loss": 0.0875, + "epoch": 1.8532818532818531, + "grad_norm": 1.2101881504058838, + "learning_rate": 1.888030888030888e-05, + "loss": 0.0612, "step": 6240 }, { - "epoch": 0.4641318877172137, - "grad_norm": 1.8749423027038574, - "learning_rate": 2.721520867369672e-05, - "loss": 0.1022, + "epoch": 1.8562518562518564, + "grad_norm": 0.7648733258247375, + "learning_rate": 1.886248886248886e-05, + "loss": 0.0492, "step": 6250 }, { - "epoch": 0.46487449873756126, - "grad_norm": 1.0351048707962036, - "learning_rate": 2.721075300757463e-05, - "loss": 0.0946, + "epoch": 1.8592218592218592, + "grad_norm": 0.764968991279602, + "learning_rate": 1.8844668844668846e-05, + "loss": 0.0582, "step": 6260 }, { - "epoch": 0.4656171097579088, - "grad_norm": 0.9065452814102173, - "learning_rate": 2.720629734145255e-05, - "loss": 0.0955, + "epoch": 1.862191862191862, + "grad_norm": 0.4919446110725403, + "learning_rate": 1.882684882684883e-05, + "loss": 0.0593, "step": 6270 }, { - "epoch": 0.4663597207782563, - "grad_norm": 0.9384631514549255, - "learning_rate": 2.720184167533046e-05, - "loss": 0.0789, + "epoch": 1.865161865161865, + "grad_norm": 0.784898579120636, + "learning_rate": 1.880902880902881e-05, + "loss": 0.0651, "step": 6280 }, { - "epoch": 0.4671023317986039, - "grad_norm": 0.5935912132263184, - "learning_rate": 2.7197386009208376e-05, - "loss": 0.0711, + "epoch": 1.8681318681318682, + "grad_norm": 0.4835127890110016, + "learning_rate": 1.8791208791208793e-05, + "loss": 0.0545, "step": 6290 }, { - "epoch": 0.46784494281895145, - "grad_norm": 2.0923197269439697, - "learning_rate": 2.7192930343086295e-05, - "loss": 0.0668, + "epoch": 1.871101871101871, + "grad_norm": 0.7325404286384583, + "learning_rate": 1.8773388773388775e-05, + "loss": 0.0622, "step": 6300 }, { - "epoch": 0.46858755383929895, - "grad_norm": 0.9946518540382385, - "learning_rate": 2.7188474676964206e-05, - "loss": 0.07, + "epoch": 1.874071874071874, + "grad_norm": 0.9660061001777649, + "learning_rate": 1.8755568755568754e-05, + "loss": 0.0767, "step": 6310 }, { - "epoch": 0.4693301648596465, - "grad_norm": 4.637099742889404, - "learning_rate": 2.718401901084212e-05, - "loss": 0.0938, + "epoch": 1.877041877041877, + "grad_norm": 1.0770442485809326, + "learning_rate": 1.8737748737748736e-05, + "loss": 0.0657, "step": 6320 }, { - "epoch": 0.4700727758799941, - "grad_norm": 3.6027259826660156, - "learning_rate": 2.7179563344720036e-05, - "loss": 0.0707, + "epoch": 1.88001188001188, + "grad_norm": 1.3359770774841309, + "learning_rate": 1.871992871992872e-05, + "loss": 0.0686, "step": 6330 }, { - "epoch": 0.4708153869003416, - "grad_norm": 2.179995059967041, - "learning_rate": 2.717510767859795e-05, - "loss": 0.0795, + "epoch": 1.882981882981883, + "grad_norm": 0.6175670027732849, + "learning_rate": 1.8702108702108704e-05, + "loss": 0.0665, "step": 6340 }, { - "epoch": 0.47155799792068914, - "grad_norm": 1.5892603397369385, - "learning_rate": 2.7170652012475866e-05, - "loss": 0.085, + "epoch": 1.885951885951886, + "grad_norm": 0.7021560072898865, + "learning_rate": 1.8684288684288686e-05, + "loss": 0.0621, "step": 6350 }, { - "epoch": 0.4723006089410367, - "grad_norm": 2.146799087524414, - "learning_rate": 2.716619634635378e-05, - "loss": 0.0842, + "epoch": 1.8889218889218888, + "grad_norm": 0.5976589918136597, + "learning_rate": 1.8666468666468668e-05, + "loss": 0.0612, "step": 6360 }, { - "epoch": 0.4730432199613842, - "grad_norm": 2.106539249420166, - "learning_rate": 2.7161740680231696e-05, - "loss": 0.0918, + "epoch": 1.8918918918918919, + "grad_norm": 0.6338089108467102, + "learning_rate": 1.864864864864865e-05, + "loss": 0.0636, "step": 6370 }, { - "epoch": 0.47378583098173177, - "grad_norm": 1.503983497619629, - "learning_rate": 2.715728501410961e-05, - "loss": 0.1038, + "epoch": 1.894861894861895, + "grad_norm": 0.9274225234985352, + "learning_rate": 1.863082863082863e-05, + "loss": 0.057, "step": 6380 }, { - "epoch": 0.47452844200207933, - "grad_norm": 1.7388819456100464, - "learning_rate": 2.7152829347987523e-05, - "loss": 0.1041, + "epoch": 1.8978318978318978, + "grad_norm": 0.42748093605041504, + "learning_rate": 1.861300861300861e-05, + "loss": 0.0583, "step": 6390 }, { - "epoch": 0.47527105302242684, - "grad_norm": 3.1437437534332275, - "learning_rate": 2.7148373681865438e-05, - "loss": 0.0824, + "epoch": 1.9008019008019008, + "grad_norm": 0.7819433808326721, + "learning_rate": 1.8595188595188597e-05, + "loss": 0.0581, "step": 6400 }, { - "epoch": 0.4760136640427744, - "grad_norm": 5.636854648590088, - "learning_rate": 2.7143918015743356e-05, - "loss": 0.0835, + "epoch": 1.9037719037719039, + "grad_norm": 0.5644189119338989, + "learning_rate": 1.857736857736858e-05, + "loss": 0.0554, "step": 6410 }, { - "epoch": 0.47675627506312196, - "grad_norm": 1.5559483766555786, - "learning_rate": 2.7139462349621268e-05, - "loss": 0.1108, + "epoch": 1.9067419067419067, + "grad_norm": 0.6357256770133972, + "learning_rate": 1.855954855954856e-05, + "loss": 0.0755, "step": 6420 }, { - "epoch": 0.47749888608346946, - "grad_norm": 2.2242963314056396, - "learning_rate": 2.7135006683499183e-05, - "loss": 0.133, + "epoch": 1.9097119097119097, + "grad_norm": 0.737325131893158, + "learning_rate": 1.8541728541728543e-05, + "loss": 0.0742, "step": 6430 }, { - "epoch": 0.478241497103817, - "grad_norm": 3.61586332321167, - "learning_rate": 2.71305510173771e-05, - "loss": 0.0996, + "epoch": 1.9126819126819128, + "grad_norm": 0.6249455809593201, + "learning_rate": 1.8523908523908525e-05, + "loss": 0.0624, "step": 6440 }, { - "epoch": 0.4789841081241646, - "grad_norm": 3.0987701416015625, - "learning_rate": 2.7126095351255013e-05, - "loss": 0.0924, + "epoch": 1.9156519156519156, + "grad_norm": 0.8265432119369507, + "learning_rate": 1.8506088506088504e-05, + "loss": 0.0535, "step": 6450 }, { - "epoch": 0.4797267191445121, - "grad_norm": 1.8545348644256592, - "learning_rate": 2.7121639685132928e-05, - "loss": 0.082, + "epoch": 1.9186219186219187, + "grad_norm": 0.5451765656471252, + "learning_rate": 1.8488268488268486e-05, + "loss": 0.0646, "step": 6460 }, { - "epoch": 0.48046933016485965, - "grad_norm": 0.8686532974243164, - "learning_rate": 2.7117184019010843e-05, - "loss": 0.068, + "epoch": 1.9215919215919217, + "grad_norm": 0.9091644883155823, + "learning_rate": 1.8470448470448472e-05, + "loss": 0.0579, "step": 6470 }, { - "epoch": 0.4812119411852072, - "grad_norm": 1.4402474164962769, - "learning_rate": 2.7112728352888758e-05, - "loss": 0.0888, + "epoch": 1.9245619245619245, + "grad_norm": 0.6501501798629761, + "learning_rate": 1.8452628452628454e-05, + "loss": 0.0518, "step": 6480 }, { - "epoch": 0.4819545522055547, - "grad_norm": 1.4960230588912964, - "learning_rate": 2.7108272686766673e-05, - "loss": 0.0626, + "epoch": 1.9275319275319274, + "grad_norm": 0.6668860912322998, + "learning_rate": 1.8434808434808436e-05, + "loss": 0.0484, "step": 6490 }, { - "epoch": 0.4826971632259023, - "grad_norm": 0.6626843214035034, - "learning_rate": 2.7103817020644584e-05, - "loss": 0.0459, + "epoch": 1.9305019305019306, + "grad_norm": 0.6235078573226929, + "learning_rate": 1.841698841698842e-05, + "loss": 0.0571, "step": 6500 }, { - "epoch": 0.4834397742462498, - "grad_norm": 2.2946035861968994, - "learning_rate": 2.7099361354522503e-05, - "loss": 0.0836, + "epoch": 1.9334719334719335, + "grad_norm": 0.5391401648521423, + "learning_rate": 1.83991683991684e-05, + "loss": 0.042, "step": 6510 }, { - "epoch": 0.48418238526659735, - "grad_norm": 3.0957255363464355, - "learning_rate": 2.7094905688400418e-05, - "loss": 0.0909, + "epoch": 1.9364419364419363, + "grad_norm": 0.4596608877182007, + "learning_rate": 1.838134838134838e-05, + "loss": 0.0666, "step": 6520 }, { - "epoch": 0.4849249962869449, - "grad_norm": 4.055625915527344, - "learning_rate": 2.709045002227833e-05, - "loss": 0.1138, + "epoch": 1.9394119394119396, + "grad_norm": 0.9191550612449646, + "learning_rate": 1.836352836352836e-05, + "loss": 0.0731, "step": 6530 }, { - "epoch": 0.4856676073072924, - "grad_norm": 1.6780099868774414, - "learning_rate": 2.7085994356156248e-05, - "loss": 0.0946, + "epoch": 1.9423819423819424, + "grad_norm": 0.8240092992782593, + "learning_rate": 1.8345708345708347e-05, + "loss": 0.0635, "step": 6540 }, { - "epoch": 0.48641021832764, - "grad_norm": 1.8075953722000122, - "learning_rate": 2.7081538690034163e-05, - "loss": 0.0697, + "epoch": 1.9453519453519452, + "grad_norm": 0.8021003007888794, + "learning_rate": 1.832788832788833e-05, + "loss": 0.0675, "step": 6550 }, { - "epoch": 0.48715282934798754, - "grad_norm": 1.8275692462921143, - "learning_rate": 2.7077083023912074e-05, - "loss": 0.0973, + "epoch": 1.9483219483219483, + "grad_norm": 1.1347585916519165, + "learning_rate": 1.831006831006831e-05, + "loss": 0.0621, "step": 6560 }, { - "epoch": 0.48789544036833504, - "grad_norm": 2.2628328800201416, - "learning_rate": 2.707262735778999e-05, - "loss": 0.1178, + "epoch": 1.9512919512919513, + "grad_norm": 0.6139947175979614, + "learning_rate": 1.8292248292248294e-05, + "loss": 0.0543, "step": 6570 }, { - "epoch": 0.4886380513886826, - "grad_norm": 1.0537023544311523, - "learning_rate": 2.7068171691667904e-05, - "loss": 0.0633, + "epoch": 1.9542619542619541, + "grad_norm": 0.7635927796363831, + "learning_rate": 1.8274428274428276e-05, + "loss": 0.0592, "step": 6580 }, { - "epoch": 0.48938066240903016, - "grad_norm": 0.39916807413101196, - "learning_rate": 2.706371602554582e-05, - "loss": 0.061, + "epoch": 1.9572319572319572, + "grad_norm": 0.43468376994132996, + "learning_rate": 1.8256608256608254e-05, + "loss": 0.0612, "step": 6590 }, { - "epoch": 0.49012327342937767, - "grad_norm": 2.486980438232422, - "learning_rate": 2.7059260359423734e-05, - "loss": 0.0502, + "epoch": 1.9602019602019602, + "grad_norm": 0.7748926877975464, + "learning_rate": 1.8238788238788237e-05, + "loss": 0.0604, "step": 6600 }, { - "epoch": 0.49086588444972523, - "grad_norm": 1.5549534559249878, - "learning_rate": 2.705480469330165e-05, - "loss": 0.096, + "epoch": 1.963171963171963, + "grad_norm": 0.8012065887451172, + "learning_rate": 1.8220968220968222e-05, + "loss": 0.0611, "step": 6610 }, { - "epoch": 0.4916084954700728, - "grad_norm": 1.8436572551727295, - "learning_rate": 2.7050349027179564e-05, - "loss": 0.0435, + "epoch": 1.9661419661419661, + "grad_norm": 0.9480249285697937, + "learning_rate": 1.8203148203148204e-05, + "loss": 0.0557, "step": 6620 }, { - "epoch": 0.4923511064904203, - "grad_norm": 2.9370453357696533, - "learning_rate": 2.704589336105748e-05, - "loss": 0.1154, + "epoch": 1.9691119691119692, + "grad_norm": 0.8358305096626282, + "learning_rate": 1.8185328185328187e-05, + "loss": 0.0598, "step": 6630 }, { - "epoch": 0.49309371751076786, - "grad_norm": 1.7184120416641235, - "learning_rate": 2.704143769493539e-05, - "loss": 0.1227, + "epoch": 1.972081972081972, + "grad_norm": 0.5806179642677307, + "learning_rate": 1.816750816750817e-05, + "loss": 0.0646, "step": 6640 }, { - "epoch": 0.4938363285311154, - "grad_norm": 1.9661284685134888, - "learning_rate": 2.703698202881331e-05, - "loss": 0.0861, + "epoch": 1.975051975051975, + "grad_norm": 1.0674052238464355, + "learning_rate": 1.814968814968815e-05, + "loss": 0.075, "step": 6650 }, { - "epoch": 0.4945789395514629, - "grad_norm": 5.240865230560303, - "learning_rate": 2.7032526362691224e-05, - "loss": 0.0973, + "epoch": 1.978021978021978, + "grad_norm": 0.7299931645393372, + "learning_rate": 1.813186813186813e-05, + "loss": 0.0526, "step": 6660 }, { - "epoch": 0.4953215505718105, - "grad_norm": 0.6822782158851624, - "learning_rate": 2.7028070696569136e-05, - "loss": 0.0736, + "epoch": 1.980991980991981, + "grad_norm": 0.7215370535850525, + "learning_rate": 1.8114048114048112e-05, + "loss": 0.063, "step": 6670 }, { - "epoch": 0.49606416159215805, - "grad_norm": 2.3852436542510986, - "learning_rate": 2.7023615030447054e-05, - "loss": 0.1097, + "epoch": 1.983961983961984, + "grad_norm": 0.6642675995826721, + "learning_rate": 1.8096228096228097e-05, + "loss": 0.0534, "step": 6680 }, { - "epoch": 0.49680677261250555, - "grad_norm": 2.0364935398101807, - "learning_rate": 2.7019159364324966e-05, - "loss": 0.0749, + "epoch": 1.986931986931987, + "grad_norm": 1.6818110942840576, + "learning_rate": 1.807840807840808e-05, + "loss": 0.0663, "step": 6690 }, { - "epoch": 0.4975493836328531, - "grad_norm": 1.774452805519104, - "learning_rate": 2.701470369820288e-05, - "loss": 0.0769, + "epoch": 1.9899019899019899, + "grad_norm": 0.8374834060668945, + "learning_rate": 1.8060588060588062e-05, + "loss": 0.057, "step": 6700 }, { - "epoch": 0.4982919946532007, - "grad_norm": 1.5295710563659668, - "learning_rate": 2.70102480320808e-05, - "loss": 0.0939, + "epoch": 1.992871992871993, + "grad_norm": 0.5147525668144226, + "learning_rate": 1.8042768042768044e-05, + "loss": 0.0629, "step": 6710 }, { - "epoch": 0.4990346056735482, - "grad_norm": 3.159693956375122, - "learning_rate": 2.700579236595871e-05, - "loss": 0.0719, + "epoch": 1.995841995841996, + "grad_norm": 0.8637810945510864, + "learning_rate": 1.8024948024948026e-05, + "loss": 0.068, "step": 6720 }, { - "epoch": 0.49977721669389574, - "grad_norm": 1.0851925611495972, - "learning_rate": 2.7001336699836626e-05, - "loss": 0.0661, + "epoch": 1.9988119988119988, + "grad_norm": 1.2453491687774658, + "learning_rate": 1.8007128007128005e-05, + "loss": 0.0679, "step": 6730 }, { - "epoch": 0.5005198277142433, - "grad_norm": 1.9622503519058228, - "learning_rate": 2.699688103371454e-05, - "loss": 0.0874, + "epoch": 2.0, + "eval_f1": 0.33031292965957215, + "eval_loss": 0.04602367803454399, + "eval_runtime": 175.7476, + "eval_samples_per_second": 216.327, + "eval_steps_per_second": 3.386, + "step": 6734 + }, + { + "epoch": 2.0017820017820016, + "grad_norm": 1.147544264793396, + "learning_rate": 1.7989307989307987e-05, + "loss": 0.0478, "step": 6740 }, { - "epoch": 0.5012624387345909, - "grad_norm": 1.293068528175354, - "learning_rate": 2.6992425367592456e-05, - "loss": 0.1071, + "epoch": 2.004752004752005, + "grad_norm": 0.7158094644546509, + "learning_rate": 1.7971487971487973e-05, + "loss": 0.0597, "step": 6750 }, { - "epoch": 0.5020050497549383, - "grad_norm": 2.094120740890503, - "learning_rate": 2.698796970147037e-05, - "loss": 0.0936, + "epoch": 2.0077220077220077, + "grad_norm": 0.5224217176437378, + "learning_rate": 1.7953667953667955e-05, + "loss": 0.0655, "step": 6760 }, { - "epoch": 0.5027476607752859, - "grad_norm": 2.2570743560791016, - "learning_rate": 2.6983514035348286e-05, - "loss": 0.0788, + "epoch": 2.0106920106920105, + "grad_norm": 1.072407603263855, + "learning_rate": 1.7935847935847937e-05, + "loss": 0.058, "step": 6770 }, { - "epoch": 0.5034902717956334, - "grad_norm": 2.327422857284546, - "learning_rate": 2.69790583692262e-05, - "loss": 0.0765, + "epoch": 2.013662013662014, + "grad_norm": 1.3633906841278076, + "learning_rate": 1.791802791802792e-05, + "loss": 0.0601, "step": 6780 }, { - "epoch": 0.504232882815981, - "grad_norm": 2.4832825660705566, - "learning_rate": 2.6974602703104116e-05, - "loss": 0.0789, + "epoch": 2.0166320166320166, + "grad_norm": 1.6582151651382446, + "learning_rate": 1.79002079002079e-05, + "loss": 0.0629, "step": 6790 }, { - "epoch": 0.5049754938363286, - "grad_norm": 2.0305325984954834, - "learning_rate": 2.6970147036982027e-05, - "loss": 0.1098, + "epoch": 2.0196020196020195, + "grad_norm": 0.9400825500488281, + "learning_rate": 1.788238788238788e-05, + "loss": 0.06, "step": 6800 }, { - "epoch": 0.5057181048566761, - "grad_norm": 1.299988865852356, - "learning_rate": 2.6965691370859942e-05, - "loss": 0.1285, + "epoch": 2.0225720225720227, + "grad_norm": 0.9094600677490234, + "learning_rate": 1.7864567864567862e-05, + "loss": 0.0583, "step": 6810 }, { - "epoch": 0.5064607158770236, - "grad_norm": 2.6327931880950928, - "learning_rate": 2.696123570473786e-05, - "loss": 0.0914, + "epoch": 2.0255420255420256, + "grad_norm": 0.7568134069442749, + "learning_rate": 1.7846747846747848e-05, + "loss": 0.0584, "step": 6820 }, { - "epoch": 0.5072033268973711, - "grad_norm": 2.3139307498931885, - "learning_rate": 2.6956780038615772e-05, - "loss": 0.0831, + "epoch": 2.0285120285120284, + "grad_norm": 0.4175792634487152, + "learning_rate": 1.782892782892783e-05, + "loss": 0.056, "step": 6830 }, { - "epoch": 0.5079459379177187, - "grad_norm": 1.7000758647918701, - "learning_rate": 2.6952324372493687e-05, - "loss": 0.0996, + "epoch": 2.0314820314820317, + "grad_norm": 0.7946698665618896, + "learning_rate": 1.7811107811107812e-05, + "loss": 0.0611, "step": 6840 }, { - "epoch": 0.5086885489380663, - "grad_norm": 2.333949089050293, - "learning_rate": 2.6947868706371606e-05, - "loss": 0.1131, + "epoch": 2.0344520344520345, + "grad_norm": 0.5856500267982483, + "learning_rate": 1.7793287793287794e-05, + "loss": 0.056, "step": 6850 }, { - "epoch": 0.5094311599584138, - "grad_norm": 0.8475568294525146, - "learning_rate": 2.6943413040249517e-05, - "loss": 0.0658, + "epoch": 2.0374220374220373, + "grad_norm": 0.7179090976715088, + "learning_rate": 1.7775467775467776e-05, + "loss": 0.0602, "step": 6860 }, { - "epoch": 0.5101737709787614, - "grad_norm": 2.65226411819458, - "learning_rate": 2.6938957374127432e-05, - "loss": 0.0892, + "epoch": 2.0403920403920406, + "grad_norm": 1.6203516721725464, + "learning_rate": 1.7757647757647755e-05, + "loss": 0.059, "step": 6870 }, { - "epoch": 0.5109163819991088, - "grad_norm": 2.692626953125, - "learning_rate": 2.6934501708005347e-05, - "loss": 0.1133, + "epoch": 2.0433620433620434, + "grad_norm": 0.522436797618866, + "learning_rate": 1.7739827739827737e-05, + "loss": 0.0616, "step": 6880 }, { - "epoch": 0.5116589930194564, - "grad_norm": 1.8590973615646362, - "learning_rate": 2.6930046041883262e-05, - "loss": 0.0935, + "epoch": 2.0463320463320462, + "grad_norm": 1.3147087097167969, + "learning_rate": 1.7722007722007723e-05, + "loss": 0.0658, "step": 6890 }, { - "epoch": 0.512401604039804, - "grad_norm": 1.9496850967407227, - "learning_rate": 2.6925590375761177e-05, - "loss": 0.0604, + "epoch": 2.0493020493020495, + "grad_norm": 0.7292523980140686, + "learning_rate": 1.7704187704187705e-05, + "loss": 0.0559, "step": 6900 }, { - "epoch": 0.5131442150601515, - "grad_norm": 3.5495307445526123, - "learning_rate": 2.692113470963909e-05, - "loss": 0.0863, + "epoch": 2.0522720522720523, + "grad_norm": 0.7819983959197998, + "learning_rate": 1.7686367686367687e-05, + "loss": 0.0581, "step": 6910 }, { - "epoch": 0.5138868260804991, - "grad_norm": 5.097157001495361, - "learning_rate": 2.6916679043517007e-05, - "loss": 0.108, + "epoch": 2.055242055242055, + "grad_norm": 0.7037408947944641, + "learning_rate": 1.766854766854767e-05, + "loss": 0.0653, "step": 6920 }, { - "epoch": 0.5146294371008466, - "grad_norm": 3.4224822521209717, - "learning_rate": 2.6912223377394922e-05, - "loss": 0.1177, + "epoch": 2.0582120582120584, + "grad_norm": 0.940587043762207, + "learning_rate": 1.765072765072765e-05, + "loss": 0.063, "step": 6930 }, { - "epoch": 0.5153720481211941, - "grad_norm": 2.013091564178467, - "learning_rate": 2.6907767711272834e-05, - "loss": 0.1062, + "epoch": 2.0611820611820613, + "grad_norm": 0.7256175875663757, + "learning_rate": 1.7632907632907634e-05, + "loss": 0.0611, "step": 6940 }, { - "epoch": 0.5161146591415416, - "grad_norm": 1.1934363842010498, - "learning_rate": 2.6903312045150752e-05, - "loss": 0.0608, + "epoch": 2.064152064152064, + "grad_norm": 0.7367241978645325, + "learning_rate": 1.7615087615087613e-05, + "loss": 0.0548, "step": 6950 }, { - "epoch": 0.5168572701618892, - "grad_norm": 3.092979669570923, - "learning_rate": 2.6898856379028667e-05, - "loss": 0.1076, + "epoch": 2.067122067122067, + "grad_norm": 0.9881260991096497, + "learning_rate": 1.7597267597267598e-05, + "loss": 0.054, "step": 6960 }, { - "epoch": 0.5175998811822368, - "grad_norm": 1.059200644493103, - "learning_rate": 2.689440071290658e-05, - "loss": 0.0554, + "epoch": 2.07009207009207, + "grad_norm": 0.8852285742759705, + "learning_rate": 1.757944757944758e-05, + "loss": 0.065, "step": 6970 }, { - "epoch": 0.5183424922025843, - "grad_norm": 4.116779804229736, - "learning_rate": 2.6889945046784494e-05, - "loss": 0.0815, + "epoch": 2.073062073062073, + "grad_norm": 0.8763765692710876, + "learning_rate": 1.7561627561627563e-05, + "loss": 0.0714, "step": 6980 }, { - "epoch": 0.5190851032229318, - "grad_norm": 1.1455808877944946, - "learning_rate": 2.6885489380662412e-05, - "loss": 0.0623, + "epoch": 2.076032076032076, + "grad_norm": 0.9801186919212341, + "learning_rate": 1.7543807543807545e-05, + "loss": 0.0644, "step": 6990 }, { - "epoch": 0.5198277142432793, - "grad_norm": 0.5762424468994141, - "learning_rate": 2.6881033714540324e-05, - "loss": 0.0587, + "epoch": 2.079002079002079, + "grad_norm": 0.8674045205116272, + "learning_rate": 1.7525987525987527e-05, + "loss": 0.0597, "step": 7000 }, { - "epoch": 0.5205703252636269, - "grad_norm": 2.309788942337036, - "learning_rate": 2.687657804841824e-05, - "loss": 0.0826, + "epoch": 2.081972081972082, + "grad_norm": 0.8817034959793091, + "learning_rate": 1.750816750816751e-05, + "loss": 0.0558, "step": 7010 }, { - "epoch": 0.5213129362839745, - "grad_norm": 3.214755058288574, - "learning_rate": 2.6872122382296154e-05, - "loss": 0.0845, + "epoch": 2.0849420849420848, + "grad_norm": 0.8658657073974609, + "learning_rate": 1.7490347490347488e-05, + "loss": 0.0673, "step": 7020 }, { - "epoch": 0.522055547304322, - "grad_norm": 1.238411545753479, - "learning_rate": 2.686766671617407e-05, - "loss": 0.046, + "epoch": 2.087912087912088, + "grad_norm": 0.5216271877288818, + "learning_rate": 1.7472527472527473e-05, + "loss": 0.057, "step": 7030 }, { - "epoch": 0.5227981583246696, - "grad_norm": 0.6192235946655273, - "learning_rate": 2.6863211050051984e-05, - "loss": 0.0523, + "epoch": 2.090882090882091, + "grad_norm": 1.3478199243545532, + "learning_rate": 1.7454707454707456e-05, + "loss": 0.0623, "step": 7040 }, { - "epoch": 0.523540769345017, - "grad_norm": 1.7982896566390991, - "learning_rate": 2.6858755383929895e-05, - "loss": 0.0863, + "epoch": 2.0938520938520937, + "grad_norm": 1.1730366945266724, + "learning_rate": 1.7436887436887438e-05, + "loss": 0.0663, "step": 7050 }, { - "epoch": 0.5242833803653646, - "grad_norm": 1.1628367900848389, - "learning_rate": 2.6854299717807814e-05, - "loss": 0.094, + "epoch": 2.096822096822097, + "grad_norm": 0.7896762490272522, + "learning_rate": 1.741906741906742e-05, + "loss": 0.0677, "step": 7060 }, { - "epoch": 0.5250259913857122, - "grad_norm": 0.698805034160614, - "learning_rate": 2.684984405168573e-05, - "loss": 0.0845, + "epoch": 2.0997920997921, + "grad_norm": 0.6003497838973999, + "learning_rate": 1.7401247401247402e-05, + "loss": 0.062, "step": 7070 }, { - "epoch": 0.5257686024060597, - "grad_norm": 0.9980218410491943, - "learning_rate": 2.684538838556364e-05, - "loss": 0.0718, + "epoch": 2.1027621027621026, + "grad_norm": 1.0001591444015503, + "learning_rate": 1.7383427383427384e-05, + "loss": 0.0653, "step": 7080 }, { - "epoch": 0.5265112134264073, - "grad_norm": 2.250861883163452, - "learning_rate": 2.684093271944156e-05, - "loss": 0.0761, + "epoch": 2.105732105732106, + "grad_norm": 1.3104040622711182, + "learning_rate": 1.7365607365607363e-05, + "loss": 0.0653, "step": 7090 }, { - "epoch": 0.5272538244467548, - "grad_norm": 0.3348923921585083, - "learning_rate": 2.683647705331947e-05, - "loss": 0.0697, + "epoch": 2.1087021087021087, + "grad_norm": 0.6923945546150208, + "learning_rate": 1.734778734778735e-05, + "loss": 0.0474, "step": 7100 }, { - "epoch": 0.5279964354671023, - "grad_norm": 1.0973154306411743, - "learning_rate": 2.6832021387197385e-05, - "loss": 0.0842, + "epoch": 2.1116721116721116, + "grad_norm": 0.6234051585197449, + "learning_rate": 1.732996732996733e-05, + "loss": 0.0617, "step": 7110 }, { - "epoch": 0.5287390464874498, - "grad_norm": 1.2300523519515991, - "learning_rate": 2.6827565721075304e-05, - "loss": 0.0847, + "epoch": 2.114642114642115, + "grad_norm": 0.9373548626899719, + "learning_rate": 1.7312147312147313e-05, + "loss": 0.055, "step": 7120 }, { - "epoch": 0.5294816575077974, - "grad_norm": 1.7506873607635498, - "learning_rate": 2.6823110054953215e-05, - "loss": 0.0802, + "epoch": 2.1176121176121177, + "grad_norm": 0.9294105768203735, + "learning_rate": 1.7294327294327295e-05, + "loss": 0.0573, "step": 7130 }, { - "epoch": 0.530224268528145, - "grad_norm": 2.3556385040283203, - "learning_rate": 2.681865438883113e-05, - "loss": 0.0903, + "epoch": 2.1205821205821205, + "grad_norm": 0.7299237847328186, + "learning_rate": 1.7276507276507277e-05, + "loss": 0.0528, "step": 7140 }, { - "epoch": 0.5309668795484925, - "grad_norm": 8.310062408447266, - "learning_rate": 2.6814198722709045e-05, - "loss": 0.0855, + "epoch": 2.1235521235521237, + "grad_norm": 0.5744438767433167, + "learning_rate": 1.725868725868726e-05, + "loss": 0.0606, "step": 7150 }, { - "epoch": 0.5317094905688401, - "grad_norm": 3.2438154220581055, - "learning_rate": 2.680974305658696e-05, - "loss": 0.1041, + "epoch": 2.1265221265221266, + "grad_norm": 0.7229251861572266, + "learning_rate": 1.7240867240867238e-05, + "loss": 0.0543, "step": 7160 }, { - "epoch": 0.5324521015891875, - "grad_norm": 1.3913241624832153, - "learning_rate": 2.6805287390464875e-05, - "loss": 0.1044, + "epoch": 2.1294921294921294, + "grad_norm": 1.3666146993637085, + "learning_rate": 1.7223047223047224e-05, + "loss": 0.0617, "step": 7170 }, { - "epoch": 0.5331947126095351, - "grad_norm": 2.0883166790008545, - "learning_rate": 2.680083172434279e-05, - "loss": 0.0913, + "epoch": 2.1324621324621322, + "grad_norm": 0.6104298830032349, + "learning_rate": 1.7205227205227206e-05, + "loss": 0.0507, "step": 7180 }, { - "epoch": 0.5339373236298827, - "grad_norm": 1.3850668668746948, - "learning_rate": 2.6796376058220705e-05, - "loss": 0.0936, + "epoch": 2.1354321354321355, + "grad_norm": 0.5680792331695557, + "learning_rate": 1.7187407187407188e-05, + "loss": 0.052, "step": 7190 }, { - "epoch": 0.5346799346502302, - "grad_norm": 2.546489953994751, - "learning_rate": 2.679192039209862e-05, - "loss": 0.0746, + "epoch": 2.1384021384021383, + "grad_norm": 0.5435447692871094, + "learning_rate": 1.716958716958717e-05, + "loss": 0.0578, "step": 7200 }, { - "epoch": 0.5354225456705778, - "grad_norm": 2.2672359943389893, - "learning_rate": 2.6787464725976532e-05, - "loss": 0.0837, + "epoch": 2.141372141372141, + "grad_norm": 0.6102536916732788, + "learning_rate": 1.7151767151767152e-05, + "loss": 0.0717, "step": 7210 }, { - "epoch": 0.5361651566909253, - "grad_norm": 1.1645616292953491, - "learning_rate": 2.6783009059854447e-05, - "loss": 0.0967, + "epoch": 2.1443421443421444, + "grad_norm": 0.9239901304244995, + "learning_rate": 1.7133947133947135e-05, + "loss": 0.0595, "step": 7220 }, { - "epoch": 0.5369077677112728, - "grad_norm": 0.8145351409912109, - "learning_rate": 2.6778553393732365e-05, - "loss": 0.0901, + "epoch": 2.1473121473121473, + "grad_norm": 0.7742301225662231, + "learning_rate": 1.7116127116127117e-05, + "loss": 0.0613, "step": 7230 }, { - "epoch": 0.5376503787316204, - "grad_norm": 1.619238257408142, - "learning_rate": 2.6774097727610277e-05, - "loss": 0.0894, + "epoch": 2.15028215028215, + "grad_norm": 0.7156841158866882, + "learning_rate": 1.70983070983071e-05, + "loss": 0.0673, "step": 7240 }, { - "epoch": 0.5383929897519679, - "grad_norm": 2.810974597930908, - "learning_rate": 2.6769642061488192e-05, - "loss": 0.0848, + "epoch": 2.1532521532521534, + "grad_norm": 0.8358856439590454, + "learning_rate": 1.708048708048708e-05, + "loss": 0.0662, "step": 7250 }, { - "epoch": 0.5391356007723155, - "grad_norm": 1.5741685628890991, - "learning_rate": 2.676518639536611e-05, - "loss": 0.0748, + "epoch": 2.156222156222156, + "grad_norm": 1.2078478336334229, + "learning_rate": 1.7062667062667063e-05, + "loss": 0.0612, "step": 7260 }, { - "epoch": 0.539878211792663, - "grad_norm": 0.8893304467201233, - "learning_rate": 2.6760730729244022e-05, - "loss": 0.0754, + "epoch": 2.159192159192159, + "grad_norm": 0.7166781425476074, + "learning_rate": 1.7044847044847045e-05, + "loss": 0.0758, "step": 7270 }, { - "epoch": 0.5406208228130105, - "grad_norm": 1.7500449419021606, - "learning_rate": 2.6756275063121937e-05, - "loss": 0.0704, + "epoch": 2.1621621621621623, + "grad_norm": 0.5501638650894165, + "learning_rate": 1.7027027027027028e-05, + "loss": 0.0544, "step": 7280 }, { - "epoch": 0.541363433833358, - "grad_norm": 1.3175913095474243, - "learning_rate": 2.6751819396999855e-05, - "loss": 0.103, + "epoch": 2.165132165132165, + "grad_norm": 0.6720577478408813, + "learning_rate": 1.700920700920701e-05, + "loss": 0.0682, "step": 7290 }, { - "epoch": 0.5421060448537056, - "grad_norm": 4.421326160430908, - "learning_rate": 2.6747363730877767e-05, - "loss": 0.1161, + "epoch": 2.168102168102168, + "grad_norm": 1.2910141944885254, + "learning_rate": 1.6991386991386992e-05, + "loss": 0.0563, "step": 7300 }, { - "epoch": 0.5428486558740532, - "grad_norm": 3.222348213195801, - "learning_rate": 2.6742908064755682e-05, - "loss": 0.1346, + "epoch": 2.171072171072171, + "grad_norm": 0.834705114364624, + "learning_rate": 1.6973566973566974e-05, + "loss": 0.0562, "step": 7310 }, { - "epoch": 0.5435912668944007, - "grad_norm": 2.507253885269165, - "learning_rate": 2.6738452398633594e-05, - "loss": 0.0985, + "epoch": 2.174042174042174, + "grad_norm": 0.9644896984100342, + "learning_rate": 1.6955746955746956e-05, + "loss": 0.0582, "step": 7320 }, { - "epoch": 0.5443338779147483, - "grad_norm": 1.2689093351364136, - "learning_rate": 2.6733996732511512e-05, - "loss": 0.087, + "epoch": 2.177012177012177, + "grad_norm": 0.7104148864746094, + "learning_rate": 1.693792693792694e-05, + "loss": 0.0624, "step": 7330 }, { - "epoch": 0.5450764889350957, - "grad_norm": 2.3243536949157715, - "learning_rate": 2.6729541066389427e-05, - "loss": 0.0801, + "epoch": 2.17998217998218, + "grad_norm": 1.1333881616592407, + "learning_rate": 1.692010692010692e-05, + "loss": 0.0681, "step": 7340 }, { - "epoch": 0.5458190999554433, - "grad_norm": 1.2682521343231201, - "learning_rate": 2.672508540026734e-05, - "loss": 0.0924, + "epoch": 2.182952182952183, + "grad_norm": 0.5454867482185364, + "learning_rate": 1.6902286902286903e-05, + "loss": 0.0504, "step": 7350 }, { - "epoch": 0.5465617109757909, - "grad_norm": 3.67933988571167, - "learning_rate": 2.6720629734145257e-05, - "loss": 0.0958, + "epoch": 2.185922185922186, + "grad_norm": 0.7545517086982727, + "learning_rate": 1.6884466884466885e-05, + "loss": 0.0546, "step": 7360 }, { - "epoch": 0.5473043219961384, - "grad_norm": 1.3130589723587036, - "learning_rate": 2.6716174068023172e-05, - "loss": 0.0962, + "epoch": 2.188892188892189, + "grad_norm": 0.8815241456031799, + "learning_rate": 1.6866646866646867e-05, + "loss": 0.0645, "step": 7370 }, { - "epoch": 0.548046933016486, - "grad_norm": 2.8078672885894775, - "learning_rate": 2.6711718401901084e-05, - "loss": 0.0919, + "epoch": 2.191862191862192, + "grad_norm": 0.8765430450439453, + "learning_rate": 1.684882684882685e-05, + "loss": 0.0658, "step": 7380 }, { - "epoch": 0.5487895440368336, - "grad_norm": 0.7422177195549011, - "learning_rate": 2.6707262735779e-05, - "loss": 0.0666, + "epoch": 2.1948321948321947, + "grad_norm": 0.8555752038955688, + "learning_rate": 1.683100683100683e-05, + "loss": 0.06, "step": 7390 }, { - "epoch": 0.549532155057181, - "grad_norm": 1.9167085886001587, - "learning_rate": 2.6702807069656917e-05, - "loss": 0.1218, + "epoch": 2.197802197802198, + "grad_norm": 0.7630652785301208, + "learning_rate": 1.6813186813186814e-05, + "loss": 0.0701, "step": 7400 }, { - "epoch": 0.5502747660775286, - "grad_norm": 2.299405336380005, - "learning_rate": 2.669835140353483e-05, - "loss": 0.0889, + "epoch": 2.200772200772201, + "grad_norm": 0.6194639205932617, + "learning_rate": 1.6795366795366796e-05, + "loss": 0.057, "step": 7410 }, { - "epoch": 0.5510173770978761, - "grad_norm": 1.3493014574050903, - "learning_rate": 2.6693895737412744e-05, - "loss": 0.0697, + "epoch": 2.2037422037422036, + "grad_norm": 0.4202905297279358, + "learning_rate": 1.6777546777546778e-05, + "loss": 0.0486, "step": 7420 }, { - "epoch": 0.5517599881182237, - "grad_norm": 1.426650881767273, - "learning_rate": 2.668944007129066e-05, - "loss": 0.0771, + "epoch": 2.206712206712207, + "grad_norm": 0.6983931660652161, + "learning_rate": 1.675972675972676e-05, + "loss": 0.0638, "step": 7430 }, { - "epoch": 0.5525025991385712, - "grad_norm": 1.6747151613235474, - "learning_rate": 2.6684984405168573e-05, - "loss": 0.0956, + "epoch": 2.2096822096822097, + "grad_norm": 0.521522045135498, + "learning_rate": 1.6741906741906742e-05, + "loss": 0.0659, "step": 7440 }, { - "epoch": 0.5532452101589188, - "grad_norm": 2.746018171310425, - "learning_rate": 2.668052873904649e-05, - "loss": 0.1107, + "epoch": 2.2126522126522126, + "grad_norm": 0.4995703101158142, + "learning_rate": 1.6724086724086725e-05, + "loss": 0.0631, "step": 7450 }, { - "epoch": 0.5539878211792663, - "grad_norm": 2.0050714015960693, - "learning_rate": 2.66760730729244e-05, - "loss": 0.0637, + "epoch": 2.215622215622216, + "grad_norm": 1.18669593334198, + "learning_rate": 1.6706266706266707e-05, + "loss": 0.0589, "step": 7460 }, { - "epoch": 0.5547304321996138, - "grad_norm": 1.4880417585372925, - "learning_rate": 2.667161740680232e-05, - "loss": 0.13, + "epoch": 2.2185922185922187, + "grad_norm": 1.027181625366211, + "learning_rate": 1.668844668844669e-05, + "loss": 0.0664, "step": 7470 }, { - "epoch": 0.5554730432199614, - "grad_norm": 0.5780320167541504, - "learning_rate": 2.6667161740680233e-05, - "loss": 0.0815, + "epoch": 2.2215622215622215, + "grad_norm": 0.9086653590202332, + "learning_rate": 1.667062667062667e-05, + "loss": 0.0589, "step": 7480 }, { - "epoch": 0.5562156542403089, - "grad_norm": 4.6798882484436035, - "learning_rate": 2.6662706074558145e-05, - "loss": 0.1212, + "epoch": 2.2245322245322248, + "grad_norm": 0.6986064314842224, + "learning_rate": 1.6652806652806653e-05, + "loss": 0.061, "step": 7490 }, { - "epoch": 0.5569582652606565, - "grad_norm": 1.5042418241500854, - "learning_rate": 2.6658250408436063e-05, - "loss": 0.0902, + "epoch": 2.2275022275022276, + "grad_norm": 1.728353500366211, + "learning_rate": 1.6634986634986635e-05, + "loss": 0.0496, "step": 7500 }, { - "epoch": 0.5577008762810041, - "grad_norm": 1.637436032295227, - "learning_rate": 2.6653794742313975e-05, - "loss": 0.0608, + "epoch": 2.2304722304722304, + "grad_norm": 0.635563313961029, + "learning_rate": 1.6617166617166618e-05, + "loss": 0.0711, "step": 7510 }, { - "epoch": 0.5584434873013515, - "grad_norm": 0.7876498699188232, - "learning_rate": 2.664933907619189e-05, - "loss": 0.0649, + "epoch": 2.2334422334422332, + "grad_norm": 0.9441576600074768, + "learning_rate": 1.65993465993466e-05, + "loss": 0.0554, "step": 7520 }, { - "epoch": 0.5591860983216991, - "grad_norm": 1.2821192741394043, - "learning_rate": 2.664488341006981e-05, - "loss": 0.1155, + "epoch": 2.2364122364122365, + "grad_norm": 0.984162449836731, + "learning_rate": 1.6581526581526582e-05, + "loss": 0.059, "step": 7530 }, { - "epoch": 0.5599287093420466, - "grad_norm": 1.3642898797988892, - "learning_rate": 2.664042774394772e-05, - "loss": 0.0802, + "epoch": 2.2393822393822393, + "grad_norm": 0.7663610577583313, + "learning_rate": 1.6563706563706564e-05, + "loss": 0.0654, "step": 7540 }, { - "epoch": 0.5606713203623942, - "grad_norm": 1.7505029439926147, - "learning_rate": 2.6635972077825635e-05, - "loss": 0.1007, + "epoch": 2.242352242352242, + "grad_norm": 0.8404085636138916, + "learning_rate": 1.6545886545886546e-05, + "loss": 0.0723, "step": 7550 }, { - "epoch": 0.5614139313827418, - "grad_norm": 0.7114013433456421, - "learning_rate": 2.663151641170355e-05, - "loss": 0.0941, + "epoch": 2.2453222453222454, + "grad_norm": 0.9056734442710876, + "learning_rate": 1.652806652806653e-05, + "loss": 0.0632, "step": 7560 }, { - "epoch": 0.5621565424030892, - "grad_norm": 1.6477638483047485, - "learning_rate": 2.6627060745581465e-05, - "loss": 0.1173, + "epoch": 2.2482922482922483, + "grad_norm": 1.249064326286316, + "learning_rate": 1.651024651024651e-05, + "loss": 0.0762, "step": 7570 }, { - "epoch": 0.5628991534234368, - "grad_norm": 2.2498269081115723, - "learning_rate": 2.662260507945938e-05, - "loss": 0.1056, + "epoch": 2.251262251262251, + "grad_norm": 0.7174326181411743, + "learning_rate": 1.6492426492426496e-05, + "loss": 0.054, "step": 7580 }, { - "epoch": 0.5636417644437843, - "grad_norm": 1.7520428895950317, - "learning_rate": 2.6618149413337295e-05, - "loss": 0.0584, + "epoch": 2.2542322542322544, + "grad_norm": 0.9042089581489563, + "learning_rate": 1.6474606474606475e-05, + "loss": 0.0668, "step": 7590 }, { - "epoch": 0.5643843754641319, - "grad_norm": 1.958605170249939, - "learning_rate": 2.661369374721521e-05, - "loss": 0.0787, + "epoch": 2.257202257202257, + "grad_norm": 0.6631519794464111, + "learning_rate": 1.6456786456786457e-05, + "loss": 0.0599, "step": 7600 }, { - "epoch": 0.5651269864844795, - "grad_norm": 4.134826183319092, - "learning_rate": 2.6609238081093125e-05, - "loss": 0.1431, + "epoch": 2.26017226017226, + "grad_norm": 0.6724472045898438, + "learning_rate": 1.643896643896644e-05, + "loss": 0.0675, "step": 7610 }, { - "epoch": 0.565869597504827, - "grad_norm": 1.3306151628494263, - "learning_rate": 2.6604782414971037e-05, - "loss": 0.083, + "epoch": 2.2631422631422633, + "grad_norm": 0.48432090878486633, + "learning_rate": 1.642114642114642e-05, + "loss": 0.0565, "step": 7620 }, { - "epoch": 0.5666122085251745, - "grad_norm": 2.795405626296997, - "learning_rate": 2.660032674884895e-05, - "loss": 0.1119, + "epoch": 2.266112266112266, + "grad_norm": 0.5798602104187012, + "learning_rate": 1.6403326403326404e-05, + "loss": 0.0584, "step": 7630 }, { - "epoch": 0.567354819545522, - "grad_norm": 1.0165034532546997, - "learning_rate": 2.659587108272687e-05, - "loss": 0.0823, + "epoch": 2.269082269082269, + "grad_norm": 0.676134467124939, + "learning_rate": 1.6385506385506386e-05, + "loss": 0.0651, "step": 7640 }, { - "epoch": 0.5680974305658696, - "grad_norm": 4.020211219787598, - "learning_rate": 2.659141541660478e-05, - "loss": 0.0867, + "epoch": 2.2720522720522722, + "grad_norm": 0.6854906678199768, + "learning_rate": 1.636768636768637e-05, + "loss": 0.0711, "step": 7650 }, { - "epoch": 0.5688400415862171, - "grad_norm": 1.6462618112564087, - "learning_rate": 2.6586959750482697e-05, - "loss": 0.0917, + "epoch": 2.275022275022275, + "grad_norm": 1.0705246925354004, + "learning_rate": 1.634986634986635e-05, + "loss": 0.0635, "step": 7660 }, { - "epoch": 0.5695826526065647, - "grad_norm": 1.3625034093856812, - "learning_rate": 2.6582504084360615e-05, - "loss": 0.0833, + "epoch": 2.277992277992278, + "grad_norm": 0.6841071248054504, + "learning_rate": 1.6332046332046332e-05, + "loss": 0.0534, "step": 7670 }, { - "epoch": 0.5703252636269123, - "grad_norm": 4.22099494934082, - "learning_rate": 2.6578048418238527e-05, - "loss": 0.065, + "epoch": 2.280962280962281, + "grad_norm": 1.0041640996932983, + "learning_rate": 1.6314226314226314e-05, + "loss": 0.053, "step": 7680 }, { - "epoch": 0.5710678746472597, - "grad_norm": 2.1432032585144043, - "learning_rate": 2.657359275211644e-05, - "loss": 0.1104, + "epoch": 2.283932283932284, + "grad_norm": 0.7719654440879822, + "learning_rate": 1.6296406296406297e-05, + "loss": 0.0715, "step": 7690 }, { - "epoch": 0.5718104856676073, - "grad_norm": 1.7687879800796509, - "learning_rate": 2.656913708599436e-05, - "loss": 0.0829, + "epoch": 2.286902286902287, + "grad_norm": 0.5428666472434998, + "learning_rate": 1.627858627858628e-05, + "loss": 0.0483, "step": 7700 }, { - "epoch": 0.5725530966879548, - "grad_norm": 2.1364059448242188, - "learning_rate": 2.656468141987227e-05, - "loss": 0.0949, + "epoch": 2.2898722898722896, + "grad_norm": 0.9091588854789734, + "learning_rate": 1.626076626076626e-05, + "loss": 0.0525, "step": 7710 }, { - "epoch": 0.5732957077083024, - "grad_norm": 1.6391818523406982, - "learning_rate": 2.6560225753750187e-05, - "loss": 0.0945, + "epoch": 2.292842292842293, + "grad_norm": 0.8281648755073547, + "learning_rate": 1.6242946242946247e-05, + "loss": 0.0662, "step": 7720 }, { - "epoch": 0.57403831872865, - "grad_norm": 1.3026098012924194, - "learning_rate": 2.6555770087628098e-05, - "loss": 0.0608, + "epoch": 2.2958122958122957, + "grad_norm": 1.2040926218032837, + "learning_rate": 1.6225126225126225e-05, + "loss": 0.0561, "step": 7730 }, { - "epoch": 0.5747809297489975, - "grad_norm": 3.263688325881958, - "learning_rate": 2.6551314421506017e-05, - "loss": 0.0749, + "epoch": 2.2987822987822986, + "grad_norm": 1.1443513631820679, + "learning_rate": 1.6207306207306207e-05, + "loss": 0.0462, "step": 7740 }, { - "epoch": 0.575523540769345, - "grad_norm": 1.6053320169448853, - "learning_rate": 2.654685875538393e-05, - "loss": 0.0739, + "epoch": 2.301752301752302, + "grad_norm": 0.990245521068573, + "learning_rate": 1.618948618948619e-05, + "loss": 0.0647, "step": 7750 }, { - "epoch": 0.5762661517896925, - "grad_norm": 2.198606491088867, - "learning_rate": 2.6542403089261843e-05, - "loss": 0.1319, + "epoch": 2.3047223047223047, + "grad_norm": 0.6413756608963013, + "learning_rate": 1.6171666171666172e-05, + "loss": 0.0713, "step": 7760 }, { - "epoch": 0.5770087628100401, - "grad_norm": 2.7301955223083496, - "learning_rate": 2.653794742313976e-05, - "loss": 0.1023, + "epoch": 2.3076923076923075, + "grad_norm": 0.5494916439056396, + "learning_rate": 1.6153846153846154e-05, + "loss": 0.0594, "step": 7770 }, { - "epoch": 0.5777513738303877, - "grad_norm": 1.7216728925704956, - "learning_rate": 2.6533491757017677e-05, - "loss": 0.0893, + "epoch": 2.3106623106623108, + "grad_norm": 0.6467922329902649, + "learning_rate": 1.6136026136026136e-05, + "loss": 0.0586, "step": 7780 }, { - "epoch": 0.5784939848507352, - "grad_norm": 2.964611053466797, - "learning_rate": 2.6529036090895588e-05, - "loss": 0.0759, + "epoch": 2.3136323136323136, + "grad_norm": 0.5509008169174194, + "learning_rate": 1.6118206118206122e-05, + "loss": 0.0574, "step": 7790 }, { - "epoch": 0.5792365958710828, - "grad_norm": 1.5537538528442383, - "learning_rate": 2.6524580424773503e-05, - "loss": 0.0987, + "epoch": 2.3166023166023164, + "grad_norm": 0.7247257232666016, + "learning_rate": 1.61003861003861e-05, + "loss": 0.0676, "step": 7800 }, { - "epoch": 0.5799792068914302, - "grad_norm": 5.19601583480835, - "learning_rate": 2.652012475865142e-05, - "loss": 0.0846, + "epoch": 2.3195723195723197, + "grad_norm": 0.7993832230567932, + "learning_rate": 1.6082566082566083e-05, + "loss": 0.0666, "step": 7810 }, { - "epoch": 0.5807218179117778, - "grad_norm": 1.2774734497070312, - "learning_rate": 2.6515669092529333e-05, - "loss": 0.0721, + "epoch": 2.3225423225423225, + "grad_norm": 0.5373691916465759, + "learning_rate": 1.6064746064746065e-05, + "loss": 0.0551, "step": 7820 }, { - "epoch": 0.5814644289321254, - "grad_norm": 1.1469454765319824, - "learning_rate": 2.6511213426407248e-05, - "loss": 0.114, + "epoch": 2.3255123255123253, + "grad_norm": 0.8234823942184448, + "learning_rate": 1.6046926046926047e-05, + "loss": 0.0696, "step": 7830 }, { - "epoch": 0.5822070399524729, - "grad_norm": 2.6085078716278076, - "learning_rate": 2.6506757760285163e-05, - "loss": 0.0769, + "epoch": 2.3284823284823286, + "grad_norm": 0.6193700432777405, + "learning_rate": 1.602910602910603e-05, + "loss": 0.0742, "step": 7840 }, { - "epoch": 0.5829496509728205, - "grad_norm": 0.7333324551582336, - "learning_rate": 2.6502302094163078e-05, - "loss": 0.0583, + "epoch": 2.3314523314523314, + "grad_norm": 0.9918487071990967, + "learning_rate": 1.601128601128601e-05, + "loss": 0.0636, "step": 7850 }, { - "epoch": 0.5836922619931679, - "grad_norm": 3.896169900894165, - "learning_rate": 2.6497846428040993e-05, - "loss": 0.0971, + "epoch": 2.3344223344223343, + "grad_norm": 0.47731947898864746, + "learning_rate": 1.5993465993465997e-05, + "loss": 0.0605, "step": 7860 }, { - "epoch": 0.5844348730135155, - "grad_norm": 0.8232213854789734, - "learning_rate": 2.6493390761918908e-05, - "loss": 0.0795, + "epoch": 2.3373923373923375, + "grad_norm": 1.2060436010360718, + "learning_rate": 1.5975645975645976e-05, + "loss": 0.0532, "step": 7870 }, { - "epoch": 0.585177484033863, - "grad_norm": 2.7149336338043213, - "learning_rate": 2.6488935095796823e-05, - "loss": 0.0847, + "epoch": 2.3403623403623404, + "grad_norm": 0.9285519123077393, + "learning_rate": 1.5957825957825958e-05, + "loss": 0.0638, "step": 7880 }, { - "epoch": 0.5859200950542106, - "grad_norm": 2.990295886993408, - "learning_rate": 2.6484479429674738e-05, - "loss": 0.0692, + "epoch": 2.343332343332343, + "grad_norm": 1.4281196594238281, + "learning_rate": 1.594000594000594e-05, + "loss": 0.0678, "step": 7890 }, { - "epoch": 0.5866627060745582, - "grad_norm": 3.7285399436950684, - "learning_rate": 2.648002376355265e-05, - "loss": 0.0734, + "epoch": 2.3463023463023465, + "grad_norm": 1.6847813129425049, + "learning_rate": 1.5922185922185922e-05, + "loss": 0.0629, "step": 7900 }, { - "epoch": 0.5874053170949057, - "grad_norm": 1.7510877847671509, - "learning_rate": 2.6475568097430568e-05, - "loss": 0.0944, + "epoch": 2.3492723492723493, + "grad_norm": 0.5582916140556335, + "learning_rate": 1.5904365904365904e-05, + "loss": 0.0614, "step": 7910 }, { - "epoch": 0.5881479281152532, - "grad_norm": 2.186464786529541, - "learning_rate": 2.6471112431308483e-05, - "loss": 0.1009, + "epoch": 2.352242352242352, + "grad_norm": 0.5601425170898438, + "learning_rate": 1.5886545886545887e-05, + "loss": 0.0549, "step": 7920 }, { - "epoch": 0.5888905391356007, - "grad_norm": 3.2270450592041016, - "learning_rate": 2.6466656765186395e-05, - "loss": 0.0815, + "epoch": 2.3552123552123554, + "grad_norm": 0.8730356097221375, + "learning_rate": 1.5868725868725872e-05, + "loss": 0.051, "step": 7930 }, { - "epoch": 0.5896331501559483, - "grad_norm": 2.993773937225342, - "learning_rate": 2.6462201099064313e-05, - "loss": 0.0764, + "epoch": 2.358182358182358, + "grad_norm": 0.9987317323684692, + "learning_rate": 1.585090585090585e-05, + "loss": 0.0598, "step": 7940 }, { - "epoch": 0.5903757611762959, - "grad_norm": 0.9298529624938965, - "learning_rate": 2.6457745432942225e-05, - "loss": 0.0933, + "epoch": 2.361152361152361, + "grad_norm": 0.689275860786438, + "learning_rate": 1.5833085833085833e-05, + "loss": 0.0477, "step": 7950 }, { - "epoch": 0.5911183721966434, - "grad_norm": 3.5018069744110107, - "learning_rate": 2.645328976682014e-05, - "loss": 0.1001, + "epoch": 2.3641223641223643, + "grad_norm": 0.8642667531967163, + "learning_rate": 1.5815265815265815e-05, + "loss": 0.0619, "step": 7960 }, { - "epoch": 0.591860983216991, - "grad_norm": 1.2015966176986694, - "learning_rate": 2.6448834100698055e-05, - "loss": 0.0839, + "epoch": 2.367092367092367, + "grad_norm": 0.675285279750824, + "learning_rate": 1.5797445797445797e-05, + "loss": 0.0671, "step": 7970 }, { - "epoch": 0.5926035942373384, - "grad_norm": 1.9032807350158691, - "learning_rate": 2.644437843457597e-05, - "loss": 0.1019, + "epoch": 2.37006237006237, + "grad_norm": 0.9872873425483704, + "learning_rate": 1.577962577962578e-05, + "loss": 0.0523, "step": 7980 }, { - "epoch": 0.593346205257686, - "grad_norm": 2.2178187370300293, - "learning_rate": 2.6439922768453885e-05, - "loss": 0.0525, + "epoch": 2.3730323730323732, + "grad_norm": 0.7550463676452637, + "learning_rate": 1.5761805761805762e-05, + "loss": 0.0616, "step": 7990 }, { - "epoch": 0.5940888162780336, - "grad_norm": 2.323493719100952, - "learning_rate": 2.64354671023318e-05, - "loss": 0.1142, + "epoch": 2.376002376002376, + "grad_norm": 1.0801265239715576, + "learning_rate": 1.5743985743985747e-05, + "loss": 0.0565, "step": 8000 }, { - "epoch": 0.5948314272983811, - "grad_norm": 5.971455097198486, - "learning_rate": 2.6431011436209715e-05, - "loss": 0.1032, + "epoch": 2.378972378972379, + "grad_norm": 1.093015432357788, + "learning_rate": 1.5726165726165726e-05, + "loss": 0.0666, "step": 8010 }, { - "epoch": 0.5955740383187287, - "grad_norm": 1.563317060470581, - "learning_rate": 2.642655577008763e-05, - "loss": 0.0701, + "epoch": 2.381942381942382, + "grad_norm": 0.882056713104248, + "learning_rate": 1.5708345708345708e-05, + "loss": 0.0609, "step": 8020 }, { - "epoch": 0.5963166493390762, - "grad_norm": 0.8707819581031799, - "learning_rate": 2.642210010396554e-05, - "loss": 0.1017, + "epoch": 2.384912384912385, + "grad_norm": 0.7684606909751892, + "learning_rate": 1.569052569052569e-05, + "loss": 0.063, "step": 8030 }, { - "epoch": 0.5970592603594237, - "grad_norm": 1.7506704330444336, - "learning_rate": 2.6417644437843456e-05, - "loss": 0.0926, + "epoch": 2.387882387882388, + "grad_norm": 0.9712556600570679, + "learning_rate": 1.5672705672705673e-05, + "loss": 0.0598, "step": 8040 }, { - "epoch": 0.5978018713797713, - "grad_norm": 1.1731964349746704, - "learning_rate": 2.6413188771721375e-05, - "loss": 0.0853, + "epoch": 2.390852390852391, + "grad_norm": 0.7231224179267883, + "learning_rate": 1.5654885654885655e-05, + "loss": 0.0671, "step": 8050 }, { - "epoch": 0.5985444824001188, - "grad_norm": 2.8358826637268066, - "learning_rate": 2.6408733105599286e-05, - "loss": 0.0797, + "epoch": 2.393822393822394, + "grad_norm": 0.6880158185958862, + "learning_rate": 1.5637065637065637e-05, + "loss": 0.0638, "step": 8060 }, { - "epoch": 0.5992870934204664, - "grad_norm": 2.624128818511963, - "learning_rate": 2.64042774394772e-05, - "loss": 0.1109, + "epoch": 2.3967923967923968, + "grad_norm": 0.8751171231269836, + "learning_rate": 1.5619245619245622e-05, + "loss": 0.0607, "step": 8070 }, { - "epoch": 0.6000297044408139, - "grad_norm": 1.6009690761566162, - "learning_rate": 2.639982177335512e-05, - "loss": 0.0783, + "epoch": 2.3997623997623996, + "grad_norm": 1.0133845806121826, + "learning_rate": 1.56014256014256e-05, + "loss": 0.0663, "step": 8080 }, { - "epoch": 0.6007723154611615, - "grad_norm": 1.9895691871643066, - "learning_rate": 2.639536610723303e-05, - "loss": 0.1047, + "epoch": 2.402732402732403, + "grad_norm": 0.4937121272087097, + "learning_rate": 1.5583605583605583e-05, + "loss": 0.0586, "step": 8090 }, { - "epoch": 0.601514926481509, - "grad_norm": 0.5253069996833801, - "learning_rate": 2.6390910441110946e-05, - "loss": 0.0767, + "epoch": 2.4057024057024057, + "grad_norm": 0.6717655062675476, + "learning_rate": 1.5565785565785566e-05, + "loss": 0.0709, "step": 8100 }, { - "epoch": 0.6022575375018565, - "grad_norm": 2.7466979026794434, - "learning_rate": 2.6386454774988865e-05, - "loss": 0.1141, + "epoch": 2.4086724086724085, + "grad_norm": 0.9288782477378845, + "learning_rate": 1.5547965547965548e-05, + "loss": 0.0728, "step": 8110 }, { - "epoch": 0.6030001485222041, - "grad_norm": 0.8582619428634644, - "learning_rate": 2.6381999108866776e-05, - "loss": 0.0747, + "epoch": 2.4116424116424118, + "grad_norm": 0.9981745481491089, + "learning_rate": 1.553014553014553e-05, + "loss": 0.0518, "step": 8120 }, { - "epoch": 0.6037427595425516, - "grad_norm": 1.5232957601547241, - "learning_rate": 2.637754344274469e-05, - "loss": 0.0756, + "epoch": 2.4146124146124146, + "grad_norm": 0.6858815550804138, + "learning_rate": 1.5512325512325512e-05, + "loss": 0.059, "step": 8130 }, { - "epoch": 0.6044853705628992, - "grad_norm": 1.6779173612594604, - "learning_rate": 2.6373087776622603e-05, - "loss": 0.06, + "epoch": 2.4175824175824174, + "grad_norm": 1.0234894752502441, + "learning_rate": 1.5494505494505498e-05, + "loss": 0.0656, "step": 8140 }, { - "epoch": 0.6052279815832466, - "grad_norm": 1.4858782291412354, - "learning_rate": 2.636863211050052e-05, - "loss": 0.0934, + "epoch": 2.4205524205524207, + "grad_norm": 1.0168789625167847, + "learning_rate": 1.547668547668548e-05, + "loss": 0.0661, "step": 8150 }, { - "epoch": 0.6059705926035942, - "grad_norm": 1.1481568813323975, - "learning_rate": 2.6364176444378436e-05, - "loss": 0.1104, + "epoch": 2.4235224235224235, + "grad_norm": 0.5430660247802734, + "learning_rate": 1.545886545886546e-05, + "loss": 0.0498, "step": 8160 }, { - "epoch": 0.6067132036239418, - "grad_norm": 2.9699254035949707, - "learning_rate": 2.6359720778256348e-05, - "loss": 0.0974, + "epoch": 2.4264924264924264, + "grad_norm": 0.6519795656204224, + "learning_rate": 1.544104544104544e-05, + "loss": 0.0565, "step": 8170 }, { - "epoch": 0.6074558146442893, - "grad_norm": 2.6445741653442383, - "learning_rate": 2.6355265112134266e-05, - "loss": 0.0873, + "epoch": 2.4294624294624296, + "grad_norm": 0.4860493540763855, + "learning_rate": 1.5423225423225423e-05, + "loss": 0.0502, "step": 8180 }, { - "epoch": 0.6081984256646369, - "grad_norm": 0.4445909559726715, - "learning_rate": 2.635080944601218e-05, - "loss": 0.0713, + "epoch": 2.4324324324324325, + "grad_norm": 1.287544846534729, + "learning_rate": 1.5405405405405405e-05, + "loss": 0.0619, "step": 8190 }, { - "epoch": 0.6089410366849844, - "grad_norm": 2.068956136703491, - "learning_rate": 2.6346353779890093e-05, - "loss": 0.0632, + "epoch": 2.4354024354024353, + "grad_norm": 0.7595537900924683, + "learning_rate": 1.5387585387585387e-05, + "loss": 0.062, "step": 8200 }, { - "epoch": 0.6096836477053319, - "grad_norm": 2.9205379486083984, - "learning_rate": 2.6341898113768008e-05, - "loss": 0.061, + "epoch": 2.4383724383724386, + "grad_norm": 1.2238869667053223, + "learning_rate": 1.5369765369765373e-05, + "loss": 0.0602, "step": 8210 }, { - "epoch": 0.6104262587256795, - "grad_norm": 0.8605203032493591, - "learning_rate": 2.6337442447645926e-05, - "loss": 0.1332, + "epoch": 2.4413424413424414, + "grad_norm": 0.6044248342514038, + "learning_rate": 1.5351945351945355e-05, + "loss": 0.064, "step": 8220 }, { - "epoch": 0.611168869746027, - "grad_norm": 1.654402256011963, - "learning_rate": 2.6332986781523838e-05, - "loss": 0.0808, + "epoch": 2.444312444312444, + "grad_norm": 1.0515096187591553, + "learning_rate": 1.5334125334125334e-05, + "loss": 0.0623, "step": 8230 }, { - "epoch": 0.6119114807663746, - "grad_norm": 3.046501636505127, - "learning_rate": 2.6328531115401753e-05, - "loss": 0.1068, + "epoch": 2.447282447282447, + "grad_norm": 0.3388879895210266, + "learning_rate": 1.5316305316305316e-05, + "loss": 0.0508, "step": 8240 }, { - "epoch": 0.6126540917867221, - "grad_norm": 2.973254680633545, - "learning_rate": 2.6324075449279668e-05, - "loss": 0.0951, + "epoch": 2.4502524502524503, + "grad_norm": 1.4969494342803955, + "learning_rate": 1.5298485298485298e-05, + "loss": 0.0562, "step": 8250 }, { - "epoch": 0.6133967028070697, - "grad_norm": 0.9132028222084045, - "learning_rate": 2.6319619783157583e-05, - "loss": 0.0798, + "epoch": 2.453222453222453, + "grad_norm": 1.1669758558273315, + "learning_rate": 1.528066528066528e-05, + "loss": 0.0694, "step": 8260 }, { - "epoch": 0.6141393138274172, - "grad_norm": 1.8893526792526245, - "learning_rate": 2.6315164117035498e-05, - "loss": 0.0725, + "epoch": 2.456192456192456, + "grad_norm": 0.8401341438293457, + "learning_rate": 1.5262845262845263e-05, + "loss": 0.0596, "step": 8270 }, { - "epoch": 0.6148819248477647, - "grad_norm": 2.337425708770752, - "learning_rate": 2.6310708450913413e-05, - "loss": 0.0709, + "epoch": 2.4591624591624592, + "grad_norm": 0.8268706798553467, + "learning_rate": 1.5245025245025246e-05, + "loss": 0.0595, "step": 8280 }, { - "epoch": 0.6156245358681123, - "grad_norm": 1.1997939348220825, - "learning_rate": 2.6306252784791328e-05, - "loss": 0.0892, + "epoch": 2.462132462132462, + "grad_norm": 1.1533724069595337, + "learning_rate": 1.5227205227205229e-05, + "loss": 0.0586, "step": 8290 }, { - "epoch": 0.6163671468884598, - "grad_norm": 2.1006369590759277, - "learning_rate": 2.6301797118669243e-05, - "loss": 0.0877, + "epoch": 2.465102465102465, + "grad_norm": 0.972823441028595, + "learning_rate": 1.520938520938521e-05, + "loss": 0.0431, "step": 8300 }, { - "epoch": 0.6171097579088074, - "grad_norm": 1.0404902696609497, - "learning_rate": 2.6297341452547154e-05, - "loss": 0.0733, + "epoch": 2.468072468072468, + "grad_norm": 0.6986684799194336, + "learning_rate": 1.5191565191565193e-05, + "loss": 0.0591, "step": 8310 }, { - "epoch": 0.617852368929155, - "grad_norm": 1.4689126014709473, - "learning_rate": 2.6292885786425073e-05, - "loss": 0.0738, + "epoch": 2.471042471042471, + "grad_norm": 0.6188581585884094, + "learning_rate": 1.5173745173745173e-05, + "loss": 0.0557, "step": 8320 }, { - "epoch": 0.6185949799495024, - "grad_norm": 1.669219970703125, - "learning_rate": 2.6288430120302988e-05, - "loss": 0.0795, + "epoch": 2.474012474012474, + "grad_norm": 0.7166512608528137, + "learning_rate": 1.5155925155925156e-05, + "loss": 0.0562, "step": 8330 }, { - "epoch": 0.61933759096985, - "grad_norm": 1.8779352903366089, - "learning_rate": 2.62839744541809e-05, - "loss": 0.0967, + "epoch": 2.476982476982477, + "grad_norm": 0.6496429443359375, + "learning_rate": 1.5138105138105138e-05, + "loss": 0.0555, "step": 8340 }, { - "epoch": 0.6200802019901975, - "grad_norm": 2.112928867340088, - "learning_rate": 2.6279518788058818e-05, - "loss": 0.0695, + "epoch": 2.47995247995248, + "grad_norm": 0.9905614852905273, + "learning_rate": 1.5120285120285122e-05, + "loss": 0.0574, "step": 8350 }, { - "epoch": 0.6208228130105451, - "grad_norm": 1.240665078163147, - "learning_rate": 2.627506312193673e-05, - "loss": 0.1107, + "epoch": 2.4829224829224827, + "grad_norm": 0.5973777770996094, + "learning_rate": 1.5102465102465104e-05, + "loss": 0.0636, "step": 8360 }, { - "epoch": 0.6215654240308927, - "grad_norm": 1.4730993509292603, - "learning_rate": 2.6270607455814644e-05, - "loss": 0.0955, + "epoch": 2.485892485892486, + "grad_norm": 0.4405939280986786, + "learning_rate": 1.5084645084645086e-05, + "loss": 0.0795, "step": 8370 }, { - "epoch": 0.6223080350512402, - "grad_norm": 1.134709119796753, - "learning_rate": 2.626615178969256e-05, - "loss": 0.1074, + "epoch": 2.488862488862489, + "grad_norm": 0.6318250894546509, + "learning_rate": 1.5066825066825068e-05, + "loss": 0.0599, "step": 8380 }, { - "epoch": 0.6230506460715877, - "grad_norm": 2.589599132537842, - "learning_rate": 2.6261696123570474e-05, - "loss": 0.0796, + "epoch": 2.4918324918324917, + "grad_norm": 0.5132951140403748, + "learning_rate": 1.5049005049005049e-05, + "loss": 0.0632, "step": 8390 }, { - "epoch": 0.6237932570919352, - "grad_norm": 2.0985918045043945, - "learning_rate": 2.625724045744839e-05, - "loss": 0.0877, + "epoch": 2.494802494802495, + "grad_norm": 1.1621384620666504, + "learning_rate": 1.503118503118503e-05, + "loss": 0.0716, "step": 8400 }, { - "epoch": 0.6245358681122828, - "grad_norm": 4.56246280670166, - "learning_rate": 2.6252784791326304e-05, - "loss": 0.0804, + "epoch": 2.4977724977724978, + "grad_norm": 0.7687697410583496, + "learning_rate": 1.5013365013365013e-05, + "loss": 0.0543, "step": 8410 }, { - "epoch": 0.6252784791326303, - "grad_norm": 2.3391928672790527, - "learning_rate": 2.624832912520422e-05, - "loss": 0.0987, + "epoch": 2.5007425007425006, + "grad_norm": 1.1648590564727783, + "learning_rate": 1.4995544995544995e-05, + "loss": 0.0525, "step": 8420 }, { - "epoch": 0.6260210901529779, - "grad_norm": 2.231879472732544, - "learning_rate": 2.6243873459082134e-05, - "loss": 0.0951, + "epoch": 2.503712503712504, + "grad_norm": 1.093809723854065, + "learning_rate": 1.4977724977724977e-05, + "loss": 0.0577, "step": 8430 }, { - "epoch": 0.6267637011733254, - "grad_norm": 0.44721463322639465, - "learning_rate": 2.623941779296005e-05, - "loss": 0.0713, + "epoch": 2.5066825066825067, + "grad_norm": 0.5859802961349487, + "learning_rate": 1.4959904959904961e-05, + "loss": 0.0505, "step": 8440 }, { - "epoch": 0.6275063121936729, - "grad_norm": 1.6443843841552734, - "learning_rate": 2.623496212683796e-05, - "loss": 0.1187, + "epoch": 2.5096525096525095, + "grad_norm": 0.818012535572052, + "learning_rate": 1.4942084942084943e-05, + "loss": 0.06, "step": 8450 }, { - "epoch": 0.6282489232140205, - "grad_norm": 3.586520195007324, - "learning_rate": 2.623050646071588e-05, - "loss": 0.0957, + "epoch": 2.512622512622513, + "grad_norm": 0.4179311990737915, + "learning_rate": 1.4924264924264924e-05, + "loss": 0.0538, "step": 8460 }, { - "epoch": 0.628991534234368, - "grad_norm": 1.5607584714889526, - "learning_rate": 2.622605079459379e-05, - "loss": 0.0959, + "epoch": 2.5155925155925156, + "grad_norm": 1.3955974578857422, + "learning_rate": 1.4906444906444908e-05, + "loss": 0.0721, "step": 8470 }, { - "epoch": 0.6297341452547156, - "grad_norm": 2.6211838722229004, - "learning_rate": 2.6221595128471706e-05, - "loss": 0.1139, + "epoch": 2.5185625185625184, + "grad_norm": 0.8818016052246094, + "learning_rate": 1.488862488862489e-05, + "loss": 0.0578, "step": 8480 }, { - "epoch": 0.6304767562750632, - "grad_norm": 2.3073689937591553, - "learning_rate": 2.6217139462349624e-05, - "loss": 0.0912, + "epoch": 2.5215325215325217, + "grad_norm": 0.7490390539169312, + "learning_rate": 1.487080487080487e-05, + "loss": 0.0528, "step": 8490 }, { - "epoch": 0.6312193672954106, - "grad_norm": 1.4929405450820923, - "learning_rate": 2.6212683796227536e-05, - "loss": 0.0652, + "epoch": 2.5245025245025245, + "grad_norm": 0.7568694949150085, + "learning_rate": 1.4852984852984852e-05, + "loss": 0.0649, "step": 8500 }, { - "epoch": 0.6319619783157582, - "grad_norm": 4.748650074005127, - "learning_rate": 2.620822813010545e-05, - "loss": 0.0861, + "epoch": 2.5274725274725274, + "grad_norm": 0.7171285152435303, + "learning_rate": 1.4835164835164836e-05, + "loss": 0.0474, "step": 8510 }, { - "epoch": 0.6327045893361057, - "grad_norm": 1.6058298349380493, - "learning_rate": 2.620377246398337e-05, - "loss": 0.094, + "epoch": 2.5304425304425306, + "grad_norm": 0.5041258931159973, + "learning_rate": 1.4817344817344818e-05, + "loss": 0.0562, "step": 8520 }, { - "epoch": 0.6334472003564533, - "grad_norm": 0.9887398481369019, - "learning_rate": 2.619931679786128e-05, - "loss": 0.0531, + "epoch": 2.5334125334125335, + "grad_norm": 0.5090057253837585, + "learning_rate": 1.4799524799524799e-05, + "loss": 0.0709, "step": 8530 }, { - "epoch": 0.6341898113768009, - "grad_norm": 1.5614607334136963, - "learning_rate": 2.6194861131739196e-05, - "loss": 0.0575, + "epoch": 2.5363825363825363, + "grad_norm": 0.5413128137588501, + "learning_rate": 1.4781704781704783e-05, + "loss": 0.0619, "step": 8540 }, { - "epoch": 0.6349324223971484, - "grad_norm": 0.485836923122406, - "learning_rate": 2.6190405465617107e-05, - "loss": 0.0738, + "epoch": 2.5393525393525396, + "grad_norm": 0.8878335952758789, + "learning_rate": 1.4763884763884765e-05, + "loss": 0.0625, "step": 8550 }, { - "epoch": 0.6356750334174959, - "grad_norm": 2.3071041107177734, - "learning_rate": 2.6185949799495026e-05, - "loss": 0.0965, + "epoch": 2.5423225423225424, + "grad_norm": 0.7137174606323242, + "learning_rate": 1.4746064746064745e-05, + "loss": 0.057, "step": 8560 }, { - "epoch": 0.6364176444378434, - "grad_norm": 1.3034030199050903, - "learning_rate": 2.618149413337294e-05, - "loss": 0.0766, + "epoch": 2.5452925452925452, + "grad_norm": 0.7620881795883179, + "learning_rate": 1.4728244728244728e-05, + "loss": 0.0681, "step": 8570 }, { - "epoch": 0.637160255458191, - "grad_norm": 2.9087538719177246, - "learning_rate": 2.6177038467250852e-05, - "loss": 0.0808, + "epoch": 2.5482625482625485, + "grad_norm": 1.1073065996170044, + "learning_rate": 1.4710424710424711e-05, + "loss": 0.0708, "step": 8580 }, { - "epoch": 0.6379028664785386, - "grad_norm": 1.7364327907562256, - "learning_rate": 2.617258280112877e-05, - "loss": 0.0727, + "epoch": 2.5512325512325513, + "grad_norm": 0.7626820802688599, + "learning_rate": 1.4692604692604694e-05, + "loss": 0.0616, "step": 8590 }, { - "epoch": 0.6386454774988861, - "grad_norm": 1.3691768646240234, - "learning_rate": 2.6168127135006686e-05, - "loss": 0.1037, + "epoch": 2.554202554202554, + "grad_norm": 0.849026083946228, + "learning_rate": 1.4674784674784674e-05, + "loss": 0.0511, "step": 8600 }, { - "epoch": 0.6393880885192337, - "grad_norm": 3.924298048019409, - "learning_rate": 2.6163671468884597e-05, - "loss": 0.1055, + "epoch": 2.5571725571725574, + "grad_norm": 0.7794767618179321, + "learning_rate": 1.4656964656964658e-05, + "loss": 0.0527, "step": 8610 }, { - "epoch": 0.6401306995395811, - "grad_norm": 4.720126152038574, - "learning_rate": 2.6159215802762512e-05, - "loss": 0.0634, + "epoch": 2.5601425601425603, + "grad_norm": 0.6230559349060059, + "learning_rate": 1.463914463914464e-05, + "loss": 0.0615, "step": 8620 }, { - "epoch": 0.6408733105599287, - "grad_norm": 0.719524621963501, - "learning_rate": 2.615476013664043e-05, - "loss": 0.105, + "epoch": 2.563112563112563, + "grad_norm": 0.517436146736145, + "learning_rate": 1.4621324621324622e-05, + "loss": 0.0562, "step": 8630 }, { - "epoch": 0.6416159215802762, - "grad_norm": 2.0264840126037598, - "learning_rate": 2.6150304470518342e-05, - "loss": 0.0827, + "epoch": 2.5660825660825664, + "grad_norm": 0.835220217704773, + "learning_rate": 1.4603504603504603e-05, + "loss": 0.0548, "step": 8640 }, { - "epoch": 0.6423585326006238, - "grad_norm": 2.5915403366088867, - "learning_rate": 2.6145848804396257e-05, - "loss": 0.0989, + "epoch": 2.569052569052569, + "grad_norm": 0.8735977411270142, + "learning_rate": 1.4585684585684587e-05, + "loss": 0.0728, "step": 8650 }, { - "epoch": 0.6431011436209714, - "grad_norm": 1.406114935874939, - "learning_rate": 2.6141393138274172e-05, - "loss": 0.0776, + "epoch": 2.572022572022572, + "grad_norm": 0.27027377486228943, + "learning_rate": 1.4567864567864569e-05, + "loss": 0.0512, "step": 8660 }, { - "epoch": 0.6438437546413188, - "grad_norm": 1.9596368074417114, - "learning_rate": 2.6136937472152087e-05, - "loss": 0.1162, + "epoch": 2.574992574992575, + "grad_norm": 0.804161548614502, + "learning_rate": 1.455004455004455e-05, + "loss": 0.0598, "step": 8670 }, { - "epoch": 0.6445863656616664, - "grad_norm": 1.9101582765579224, - "learning_rate": 2.6132481806030002e-05, - "loss": 0.1058, + "epoch": 2.577962577962578, + "grad_norm": 0.9159836173057556, + "learning_rate": 1.4532224532224533e-05, + "loss": 0.0755, "step": 8680 }, { - "epoch": 0.6453289766820139, - "grad_norm": 3.665165424346924, - "learning_rate": 2.6128026139907917e-05, - "loss": 0.0834, + "epoch": 2.580932580932581, + "grad_norm": 1.0621222257614136, + "learning_rate": 1.4514404514404515e-05, + "loss": 0.0599, "step": 8690 }, { - "epoch": 0.6460715877023615, - "grad_norm": 1.8130497932434082, - "learning_rate": 2.6123570473785832e-05, - "loss": 0.0572, + "epoch": 2.5839025839025838, + "grad_norm": 0.61686110496521, + "learning_rate": 1.4496584496584498e-05, + "loss": 0.0606, "step": 8700 }, { - "epoch": 0.6468141987227091, - "grad_norm": 2.0652337074279785, - "learning_rate": 2.6119114807663747e-05, - "loss": 0.1246, + "epoch": 2.586872586872587, + "grad_norm": 0.3363722860813141, + "learning_rate": 1.4478764478764478e-05, + "loss": 0.0451, "step": 8710 }, { - "epoch": 0.6475568097430566, - "grad_norm": 1.8479968309402466, - "learning_rate": 2.611465914154166e-05, - "loss": 0.0879, + "epoch": 2.58984258984259, + "grad_norm": 0.5933698415756226, + "learning_rate": 1.4460944460944462e-05, + "loss": 0.0635, "step": 8720 }, { - "epoch": 0.6482994207634041, - "grad_norm": 1.4413061141967773, - "learning_rate": 2.6110203475419577e-05, - "loss": 0.0743, + "epoch": 2.5928125928125927, + "grad_norm": 0.6424260139465332, + "learning_rate": 1.4443124443124444e-05, + "loss": 0.0537, "step": 8730 }, { - "epoch": 0.6490420317837516, - "grad_norm": 2.89367413520813, - "learning_rate": 2.6105747809297492e-05, - "loss": 0.0707, + "epoch": 2.5957825957825955, + "grad_norm": 0.981282651424408, + "learning_rate": 1.4425304425304425e-05, + "loss": 0.0635, "step": 8740 }, { - "epoch": 0.6497846428040992, - "grad_norm": 3.1368815898895264, - "learning_rate": 2.6101292143175404e-05, - "loss": 0.0773, + "epoch": 2.598752598752599, + "grad_norm": 1.4847664833068848, + "learning_rate": 1.4407484407484408e-05, + "loss": 0.0664, "step": 8750 }, { - "epoch": 0.6505272538244468, - "grad_norm": 2.096843719482422, - "learning_rate": 2.6096836477053322e-05, - "loss": 0.0732, + "epoch": 2.6017226017226016, + "grad_norm": 0.5225690007209778, + "learning_rate": 1.438966438966439e-05, + "loss": 0.061, "step": 8760 }, { - "epoch": 0.6512698648447943, - "grad_norm": 2.454930067062378, - "learning_rate": 2.6092380810931234e-05, - "loss": 0.0926, + "epoch": 2.6046926046926044, + "grad_norm": 0.6397286057472229, + "learning_rate": 1.4371844371844373e-05, + "loss": 0.0674, "step": 8770 }, { - "epoch": 0.6520124758651419, - "grad_norm": 2.2393689155578613, - "learning_rate": 2.608792514480915e-05, - "loss": 0.113, + "epoch": 2.6076626076626077, + "grad_norm": 0.8102043867111206, + "learning_rate": 1.4354024354024353e-05, + "loss": 0.067, "step": 8780 }, { - "epoch": 0.6527550868854893, - "grad_norm": 1.3184117078781128, - "learning_rate": 2.6083469478687064e-05, - "loss": 0.0803, + "epoch": 2.6106326106326105, + "grad_norm": 0.9729601144790649, + "learning_rate": 1.4336204336204337e-05, + "loss": 0.0569, "step": 8790 }, { - "epoch": 0.6534976979058369, - "grad_norm": 1.2592401504516602, - "learning_rate": 2.607901381256498e-05, - "loss": 0.06, + "epoch": 2.6136026136026134, + "grad_norm": 0.6307176947593689, + "learning_rate": 1.431838431838432e-05, + "loss": 0.056, "step": 8800 }, { - "epoch": 0.6542403089261845, - "grad_norm": 1.8193804025650024, - "learning_rate": 2.6074558146442894e-05, - "loss": 0.0818, + "epoch": 2.6165726165726166, + "grad_norm": 0.8650787472724915, + "learning_rate": 1.43005643005643e-05, + "loss": 0.0674, "step": 8810 }, { - "epoch": 0.654982919946532, - "grad_norm": 0.5750879049301147, - "learning_rate": 2.607010248032081e-05, - "loss": 0.0704, + "epoch": 2.6195426195426195, + "grad_norm": 0.9029502868652344, + "learning_rate": 1.4282744282744284e-05, + "loss": 0.0533, "step": 8820 }, { - "epoch": 0.6557255309668796, - "grad_norm": 2.028292655944824, - "learning_rate": 2.6065646814198724e-05, - "loss": 0.0577, + "epoch": 2.6225126225126223, + "grad_norm": 0.817317008972168, + "learning_rate": 1.4264924264924266e-05, + "loss": 0.0596, "step": 8830 }, { - "epoch": 0.6564681419872271, - "grad_norm": 2.086024522781372, - "learning_rate": 2.606119114807664e-05, - "loss": 0.0675, + "epoch": 2.6254826254826256, + "grad_norm": 0.9638757705688477, + "learning_rate": 1.4247104247104248e-05, + "loss": 0.0603, "step": 8840 }, { - "epoch": 0.6572107530075746, - "grad_norm": 3.66861891746521, - "learning_rate": 2.6056735481954554e-05, - "loss": 0.066, + "epoch": 2.6284526284526284, + "grad_norm": 0.9621570110321045, + "learning_rate": 1.4229284229284228e-05, + "loss": 0.0612, "step": 8850 }, { - "epoch": 0.6579533640279221, - "grad_norm": 1.3219988346099854, - "learning_rate": 2.6052279815832465e-05, - "loss": 0.0709, + "epoch": 2.631422631422631, + "grad_norm": 0.504012942314148, + "learning_rate": 1.4211464211464212e-05, + "loss": 0.0747, "step": 8860 }, { - "epoch": 0.6586959750482697, - "grad_norm": 1.395115852355957, - "learning_rate": 2.6047824149710384e-05, - "loss": 0.1049, + "epoch": 2.6343926343926345, + "grad_norm": 1.0219043493270874, + "learning_rate": 1.4193644193644194e-05, + "loss": 0.052, "step": 8870 }, { - "epoch": 0.6594385860686173, - "grad_norm": 2.2025349140167236, - "learning_rate": 2.6043368483588295e-05, - "loss": 0.0716, + "epoch": 2.6373626373626373, + "grad_norm": 0.7350105047225952, + "learning_rate": 1.4175824175824177e-05, + "loss": 0.0604, "step": 8880 }, { - "epoch": 0.6601811970889648, - "grad_norm": 0.7800239324569702, - "learning_rate": 2.603891281746621e-05, - "loss": 0.0774, + "epoch": 2.64033264033264, + "grad_norm": 0.5058907270431519, + "learning_rate": 1.4158004158004159e-05, + "loss": 0.0558, "step": 8890 }, { - "epoch": 0.6609238081093124, - "grad_norm": 1.6750237941741943, - "learning_rate": 2.603445715134413e-05, - "loss": 0.0824, + "epoch": 2.6433026433026434, + "grad_norm": 0.6319795846939087, + "learning_rate": 1.4140184140184141e-05, + "loss": 0.0505, "step": 8900 }, { - "epoch": 0.6616664191296598, - "grad_norm": 3.150371789932251, - "learning_rate": 2.603000148522204e-05, - "loss": 0.1005, + "epoch": 2.6462726462726462, + "grad_norm": 0.6916008591651917, + "learning_rate": 1.4122364122364123e-05, + "loss": 0.0686, "step": 8910 }, { - "epoch": 0.6624090301500074, - "grad_norm": 2.6133267879486084, - "learning_rate": 2.6025545819099955e-05, - "loss": 0.0906, + "epoch": 2.649242649242649, + "grad_norm": 1.57939612865448, + "learning_rate": 1.4104544104544104e-05, + "loss": 0.0534, "step": 8920 }, { - "epoch": 0.663151641170355, - "grad_norm": 2.1227505207061768, - "learning_rate": 2.6021090152977874e-05, - "loss": 0.094, + "epoch": 2.6522126522126523, + "grad_norm": 1.0057226419448853, + "learning_rate": 1.4086724086724087e-05, + "loss": 0.0573, "step": 8930 }, { - "epoch": 0.6638942521907025, - "grad_norm": 3.7070045471191406, - "learning_rate": 2.6016634486855785e-05, - "loss": 0.1054, + "epoch": 2.655182655182655, + "grad_norm": 0.6452613472938538, + "learning_rate": 1.406890406890407e-05, + "loss": 0.0678, "step": 8940 }, { - "epoch": 0.6646368632110501, - "grad_norm": 2.8598554134368896, - "learning_rate": 2.60121788207337e-05, - "loss": 0.101, + "epoch": 2.658152658152658, + "grad_norm": 0.45321300625801086, + "learning_rate": 1.4051084051084052e-05, + "loss": 0.0453, "step": 8950 }, { - "epoch": 0.6653794742313975, - "grad_norm": 1.542912483215332, - "learning_rate": 2.6007723154611615e-05, - "loss": 0.0896, + "epoch": 2.6611226611226613, + "grad_norm": 0.46493178606033325, + "learning_rate": 1.4033264033264034e-05, + "loss": 0.0579, "step": 8960 }, { - "epoch": 0.6661220852517451, - "grad_norm": 2.263106346130371, - "learning_rate": 2.600326748848953e-05, - "loss": 0.0893, + "epoch": 2.664092664092664, + "grad_norm": 0.6382163763046265, + "learning_rate": 1.4015444015444016e-05, + "loss": 0.0537, "step": 8970 }, { - "epoch": 0.6668646962720927, - "grad_norm": 1.0385371446609497, - "learning_rate": 2.5998811822367445e-05, - "loss": 0.1132, + "epoch": 2.667062667062667, + "grad_norm": 0.5830327272415161, + "learning_rate": 1.3997623997623998e-05, + "loss": 0.0529, "step": 8980 }, { - "epoch": 0.6676073072924402, - "grad_norm": 3.194511890411377, - "learning_rate": 2.5994356156245357e-05, - "loss": 0.0824, + "epoch": 2.67003267003267, + "grad_norm": 0.8241320252418518, + "learning_rate": 1.3979803979803979e-05, + "loss": 0.0501, "step": 8990 }, { - "epoch": 0.6683499183127878, - "grad_norm": 1.4233129024505615, - "learning_rate": 2.5989900490123275e-05, - "loss": 0.0689, + "epoch": 2.673002673002673, + "grad_norm": 1.3200924396514893, + "learning_rate": 1.3961983961983963e-05, + "loss": 0.0637, "step": 9000 }, { - "epoch": 0.6690925293331353, - "grad_norm": 1.2096024751663208, - "learning_rate": 2.598544482400119e-05, - "loss": 0.0736, + "epoch": 2.675972675972676, + "grad_norm": 0.5963950157165527, + "learning_rate": 1.3944163944163945e-05, + "loss": 0.0581, "step": 9010 }, { - "epoch": 0.6698351403534828, - "grad_norm": 2.155372381210327, - "learning_rate": 2.5980989157879102e-05, - "loss": 0.077, + "epoch": 2.678942678942679, + "grad_norm": 0.5137681365013123, + "learning_rate": 1.3926343926343927e-05, + "loss": 0.0504, "step": 9020 }, { - "epoch": 0.6705777513738304, - "grad_norm": 1.661603331565857, - "learning_rate": 2.5976533491757017e-05, - "loss": 0.097, + "epoch": 2.681912681912682, + "grad_norm": 0.8717916011810303, + "learning_rate": 1.390852390852391e-05, + "loss": 0.0681, "step": 9030 }, { - "epoch": 0.6713203623941779, - "grad_norm": 2.2005343437194824, - "learning_rate": 2.5972077825634935e-05, - "loss": 0.0742, + "epoch": 2.684882684882685, + "grad_norm": 0.5380828976631165, + "learning_rate": 1.3890703890703891e-05, + "loss": 0.0568, "step": 9040 }, { - "epoch": 0.6720629734145255, - "grad_norm": 1.2867567539215088, - "learning_rate": 2.5967622159512847e-05, - "loss": 0.0691, + "epoch": 2.687852687852688, + "grad_norm": 0.8956130743026733, + "learning_rate": 1.3872883872883874e-05, + "loss": 0.0623, "step": 9050 }, { - "epoch": 0.672805584434873, - "grad_norm": 2.7160210609436035, - "learning_rate": 2.5963166493390762e-05, - "loss": 0.1008, + "epoch": 2.690822690822691, + "grad_norm": 0.6086248159408569, + "learning_rate": 1.3855063855063854e-05, + "loss": 0.0643, "step": 9060 }, { - "epoch": 0.6735481954552206, - "grad_norm": 2.456948757171631, - "learning_rate": 2.5958710827268677e-05, - "loss": 0.0676, + "epoch": 2.6937926937926937, + "grad_norm": 0.8992329835891724, + "learning_rate": 1.3837243837243838e-05, + "loss": 0.0649, "step": 9070 }, { - "epoch": 0.674290806475568, - "grad_norm": 1.7581907510757446, - "learning_rate": 2.5954255161146592e-05, - "loss": 0.0949, + "epoch": 2.696762696762697, + "grad_norm": 0.3477851450443268, + "learning_rate": 1.381942381942382e-05, + "loss": 0.0479, "step": 9080 }, { - "epoch": 0.6750334174959156, - "grad_norm": 1.2283096313476562, - "learning_rate": 2.5949799495024507e-05, - "loss": 0.0792, + "epoch": 2.6997326997327, + "grad_norm": 1.4136226177215576, + "learning_rate": 1.3801603801603802e-05, + "loss": 0.0529, "step": 9090 }, { - "epoch": 0.6757760285162632, - "grad_norm": 1.879252552986145, - "learning_rate": 2.5945343828902422e-05, - "loss": 0.0732, + "epoch": 2.7027027027027026, + "grad_norm": 1.031639814376831, + "learning_rate": 1.3783783783783784e-05, + "loss": 0.0607, "step": 9100 }, { - "epoch": 0.6765186395366107, - "grad_norm": 2.652205228805542, - "learning_rate": 2.5940888162780337e-05, - "loss": 0.0759, + "epoch": 2.705672705672706, + "grad_norm": 0.9110945463180542, + "learning_rate": 1.3765963765963767e-05, + "loss": 0.054, "step": 9110 }, { - "epoch": 0.6772612505569583, - "grad_norm": 4.162420749664307, - "learning_rate": 2.5936432496658252e-05, - "loss": 0.0737, + "epoch": 2.7086427086427087, + "grad_norm": 1.111244797706604, + "learning_rate": 1.3748143748143749e-05, + "loss": 0.065, "step": 9120 }, { - "epoch": 0.6780038615773059, - "grad_norm": 1.89590585231781, - "learning_rate": 2.5931976830536163e-05, - "loss": 0.0805, + "epoch": 2.7116127116127116, + "grad_norm": 0.507455587387085, + "learning_rate": 1.373032373032373e-05, + "loss": 0.0605, "step": 9130 }, { - "epoch": 0.6787464725976533, - "grad_norm": 1.6626734733581543, - "learning_rate": 2.5927521164414082e-05, - "loss": 0.1003, + "epoch": 2.714582714582715, + "grad_norm": 1.2011653184890747, + "learning_rate": 1.3712503712503713e-05, + "loss": 0.0535, "step": 9140 }, { - "epoch": 0.6794890836180009, - "grad_norm": 1.87484610080719, - "learning_rate": 2.5923065498291997e-05, - "loss": 0.0795, + "epoch": 2.7175527175527177, + "grad_norm": 0.48414701223373413, + "learning_rate": 1.3694683694683695e-05, + "loss": 0.0488, "step": 9150 }, { - "epoch": 0.6802316946383484, - "grad_norm": 1.9725035429000854, - "learning_rate": 2.591860983216991e-05, - "loss": 0.0936, + "epoch": 2.7205227205227205, + "grad_norm": 1.0523313283920288, + "learning_rate": 1.3676863676863677e-05, + "loss": 0.0657, "step": 9160 }, { - "epoch": 0.680974305658696, - "grad_norm": 2.27907395362854, - "learning_rate": 2.5914154166047827e-05, - "loss": 0.0865, + "epoch": 2.7234927234927238, + "grad_norm": 0.8370203971862793, + "learning_rate": 1.365904365904366e-05, + "loss": 0.0563, "step": 9170 }, { - "epoch": 0.6817169166790435, - "grad_norm": 1.4247010946273804, - "learning_rate": 2.590969849992574e-05, - "loss": 0.0751, + "epoch": 2.7264627264627266, + "grad_norm": 0.6961177587509155, + "learning_rate": 1.3641223641223642e-05, + "loss": 0.0587, "step": 9180 }, { - "epoch": 0.6824595276993911, - "grad_norm": 2.569737195968628, - "learning_rate": 2.5905242833803653e-05, - "loss": 0.1007, + "epoch": 2.7294327294327294, + "grad_norm": 1.089735507965088, + "learning_rate": 1.3623403623403624e-05, + "loss": 0.0606, "step": 9190 }, { - "epoch": 0.6832021387197386, - "grad_norm": 3.3012797832489014, - "learning_rate": 2.590078716768157e-05, - "loss": 0.0889, + "epoch": 2.7324027324027322, + "grad_norm": 0.8496239185333252, + "learning_rate": 1.3605583605583606e-05, + "loss": 0.0572, "step": 9200 }, { - "epoch": 0.6839447497400861, - "grad_norm": 2.0903170108795166, - "learning_rate": 2.5896331501559483e-05, - "loss": 0.082, + "epoch": 2.7353727353727355, + "grad_norm": 0.8729379773139954, + "learning_rate": 1.3587763587763588e-05, + "loss": 0.0604, "step": 9210 }, { - "epoch": 0.6846873607604337, - "grad_norm": 1.6836172342300415, - "learning_rate": 2.58918758354374e-05, - "loss": 0.0873, + "epoch": 2.7383427383427383, + "grad_norm": 0.8139004111289978, + "learning_rate": 1.356994356994357e-05, + "loss": 0.052, "step": 9220 }, { - "epoch": 0.6854299717807812, - "grad_norm": 3.3756263256073, - "learning_rate": 2.5887420169315313e-05, - "loss": 0.0769, + "epoch": 2.741312741312741, + "grad_norm": 1.0032668113708496, + "learning_rate": 1.3552123552123553e-05, + "loss": 0.0578, "step": 9230 }, { - "epoch": 0.6861725828011288, - "grad_norm": 1.1910730600357056, - "learning_rate": 2.588296450319323e-05, - "loss": 0.0894, + "epoch": 2.7442827442827444, + "grad_norm": 0.5807051062583923, + "learning_rate": 1.3534303534303535e-05, + "loss": 0.0581, "step": 9240 }, { - "epoch": 0.6869151938214763, - "grad_norm": 1.0612378120422363, - "learning_rate": 2.5878508837071143e-05, - "loss": 0.062, + "epoch": 2.7472527472527473, + "grad_norm": 0.5372444987297058, + "learning_rate": 1.3516483516483517e-05, + "loss": 0.0606, "step": 9250 }, { - "epoch": 0.6876578048418238, - "grad_norm": 1.0237765312194824, - "learning_rate": 2.587405317094906e-05, - "loss": 0.0904, + "epoch": 2.75022275022275, + "grad_norm": 2.1565425395965576, + "learning_rate": 1.3498663498663499e-05, + "loss": 0.0545, "step": 9260 }, { - "epoch": 0.6884004158621714, - "grad_norm": 2.666456460952759, - "learning_rate": 2.5869597504826973e-05, - "loss": 0.0861, + "epoch": 2.753192753192753, + "grad_norm": 0.6508318185806274, + "learning_rate": 1.3480843480843481e-05, + "loss": 0.0591, "step": 9270 }, { - "epoch": 0.6891430268825189, - "grad_norm": 1.1967474222183228, - "learning_rate": 2.586514183870489e-05, - "loss": 0.086, + "epoch": 2.756162756162756, + "grad_norm": 0.36386728286743164, + "learning_rate": 1.3463023463023463e-05, + "loss": 0.049, "step": 9280 }, { - "epoch": 0.6898856379028665, - "grad_norm": 3.264155626296997, - "learning_rate": 2.58606861725828e-05, - "loss": 0.0903, + "epoch": 2.759132759132759, + "grad_norm": 1.4432202577590942, + "learning_rate": 1.3445203445203446e-05, + "loss": 0.067, "step": 9290 }, { - "epoch": 0.6906282489232141, - "grad_norm": 2.126134157180786, - "learning_rate": 2.5856230506460715e-05, - "loss": 0.1036, + "epoch": 2.762102762102762, + "grad_norm": 0.7495296001434326, + "learning_rate": 1.3427383427383428e-05, + "loss": 0.0551, "step": 9300 }, { - "epoch": 0.6913708599435615, - "grad_norm": 1.6895121335983276, - "learning_rate": 2.5851774840338633e-05, - "loss": 0.067, + "epoch": 2.765072765072765, + "grad_norm": 0.5541219711303711, + "learning_rate": 1.340956340956341e-05, + "loss": 0.0594, "step": 9310 }, { - "epoch": 0.6921134709639091, - "grad_norm": 2.2356975078582764, - "learning_rate": 2.5847319174216545e-05, - "loss": 0.0838, + "epoch": 2.768042768042768, + "grad_norm": 0.8535263538360596, + "learning_rate": 1.3391743391743392e-05, + "loss": 0.0583, "step": 9320 }, { - "epoch": 0.6928560819842566, - "grad_norm": 1.7429089546203613, - "learning_rate": 2.584286350809446e-05, - "loss": 0.0731, + "epoch": 2.7710127710127708, + "grad_norm": 0.784618616104126, + "learning_rate": 1.3373923373923374e-05, + "loss": 0.0705, "step": 9330 }, { - "epoch": 0.6935986930046042, - "grad_norm": 1.1210354566574097, - "learning_rate": 2.583840784197238e-05, - "loss": 0.0932, + "epoch": 2.773982773982774, + "grad_norm": 0.6920385360717773, + "learning_rate": 1.3356103356103356e-05, + "loss": 0.0686, "step": 9340 }, { - "epoch": 0.6943413040249518, - "grad_norm": 1.4460147619247437, - "learning_rate": 2.583395217585029e-05, - "loss": 0.0817, + "epoch": 2.776952776952777, + "grad_norm": 0.7097575068473816, + "learning_rate": 1.3338283338283339e-05, + "loss": 0.0546, "step": 9350 }, { - "epoch": 0.6950839150452993, - "grad_norm": 1.1217153072357178, - "learning_rate": 2.5829496509728205e-05, - "loss": 0.0671, + "epoch": 2.7799227799227797, + "grad_norm": 0.9163897633552551, + "learning_rate": 1.332046332046332e-05, + "loss": 0.0604, "step": 9360 }, { - "epoch": 0.6958265260656468, - "grad_norm": 2.2373554706573486, - "learning_rate": 2.582504084360612e-05, - "loss": 0.0681, + "epoch": 2.782892782892783, + "grad_norm": 0.5482726097106934, + "learning_rate": 1.3302643302643303e-05, + "loss": 0.0561, "step": 9370 }, { - "epoch": 0.6965691370859943, - "grad_norm": 2.8909049034118652, - "learning_rate": 2.5820585177484035e-05, - "loss": 0.1121, + "epoch": 2.785862785862786, + "grad_norm": 0.5546408891677856, + "learning_rate": 1.3284823284823285e-05, + "loss": 0.0664, "step": 9380 }, { - "epoch": 0.6973117481063419, - "grad_norm": 0.4152112603187561, - "learning_rate": 2.581612951136195e-05, - "loss": 0.0894, + "epoch": 2.7888327888327886, + "grad_norm": 0.9671948552131653, + "learning_rate": 1.3267003267003267e-05, + "loss": 0.049, "step": 9390 }, { - "epoch": 0.6980543591266894, - "grad_norm": 3.5851147174835205, - "learning_rate": 2.581167384523986e-05, - "loss": 0.0797, + "epoch": 2.791802791802792, + "grad_norm": 0.8189311027526855, + "learning_rate": 1.324918324918325e-05, + "loss": 0.0464, "step": 9400 }, { - "epoch": 0.698796970147037, - "grad_norm": 1.1283321380615234, - "learning_rate": 2.580721817911778e-05, - "loss": 0.0966, + "epoch": 2.7947727947727947, + "grad_norm": 0.5405160188674927, + "learning_rate": 1.3231363231363232e-05, + "loss": 0.0673, "step": 9410 }, { - "epoch": 0.6995395811673846, - "grad_norm": 2.237506151199341, - "learning_rate": 2.5802762512995695e-05, - "loss": 0.0554, + "epoch": 2.7977427977427975, + "grad_norm": 0.8173415660858154, + "learning_rate": 1.3213543213543214e-05, + "loss": 0.0589, "step": 9420 }, { - "epoch": 0.700282192187732, - "grad_norm": 2.4891796112060547, - "learning_rate": 2.5798306846873607e-05, - "loss": 0.0808, + "epoch": 2.800712800712801, + "grad_norm": 0.6421013474464417, + "learning_rate": 1.3195723195723196e-05, + "loss": 0.0591, "step": 9430 }, { - "epoch": 0.7010248032080796, - "grad_norm": 1.4225846529006958, - "learning_rate": 2.579385118075152e-05, - "loss": 0.0733, + "epoch": 2.8036828036828036, + "grad_norm": 1.540049433708191, + "learning_rate": 1.3177903177903178e-05, + "loss": 0.072, "step": 9440 }, { - "epoch": 0.7017674142284271, - "grad_norm": 3.312795400619507, - "learning_rate": 2.578939551462944e-05, - "loss": 0.0979, + "epoch": 2.8066528066528065, + "grad_norm": 0.6752909421920776, + "learning_rate": 1.316008316008316e-05, + "loss": 0.049, "step": 9450 }, { - "epoch": 0.7025100252487747, - "grad_norm": 1.2239809036254883, - "learning_rate": 2.578493984850735e-05, - "loss": 0.0735, + "epoch": 2.8096228096228097, + "grad_norm": 0.9367174506187439, + "learning_rate": 1.3142263142263142e-05, + "loss": 0.0738, "step": 9460 }, { - "epoch": 0.7032526362691223, - "grad_norm": 3.1901540756225586, - "learning_rate": 2.5780484182385266e-05, - "loss": 0.0929, + "epoch": 2.8125928125928126, + "grad_norm": 1.2231054306030273, + "learning_rate": 1.3124443124443125e-05, + "loss": 0.0625, "step": 9470 }, { - "epoch": 0.7039952472894698, - "grad_norm": 1.4800280332565308, - "learning_rate": 2.577602851626318e-05, - "loss": 0.0693, + "epoch": 2.8155628155628154, + "grad_norm": 0.4123198091983795, + "learning_rate": 1.3106623106623107e-05, + "loss": 0.0518, "step": 9480 }, { - "epoch": 0.7047378583098173, - "grad_norm": 3.378511667251587, - "learning_rate": 2.5771572850141096e-05, - "loss": 0.0794, + "epoch": 2.8185328185328187, + "grad_norm": 0.6436600089073181, + "learning_rate": 1.3088803088803089e-05, + "loss": 0.0725, "step": 9490 }, { - "epoch": 0.7054804693301648, - "grad_norm": 2.557231903076172, - "learning_rate": 2.576711718401901e-05, - "loss": 0.0825, + "epoch": 2.8215028215028215, + "grad_norm": 0.6609872579574585, + "learning_rate": 1.3070983070983071e-05, + "loss": 0.0594, "step": 9500 }, { - "epoch": 0.7062230803505124, - "grad_norm": 1.7998268604278564, - "learning_rate": 2.5762661517896926e-05, - "loss": 0.0679, + "epoch": 2.8244728244728243, + "grad_norm": 0.4559807777404785, + "learning_rate": 1.3053163053163053e-05, + "loss": 0.063, "step": 9510 }, { - "epoch": 0.70696569137086, - "grad_norm": 2.5356063842773438, - "learning_rate": 2.575820585177484e-05, - "loss": 0.0851, + "epoch": 2.8274428274428276, + "grad_norm": 1.0290307998657227, + "learning_rate": 1.3035343035343037e-05, + "loss": 0.0618, "step": 9520 }, { - "epoch": 0.7077083023912075, - "grad_norm": 3.3451857566833496, - "learning_rate": 2.5753750185652756e-05, - "loss": 0.0934, + "epoch": 2.8304128304128304, + "grad_norm": 0.7586894035339355, + "learning_rate": 1.3017523017523018e-05, + "loss": 0.0584, "step": 9530 }, { - "epoch": 0.708450913411555, - "grad_norm": 2.2727510929107666, - "learning_rate": 2.5749294519530668e-05, - "loss": 0.065, + "epoch": 2.8333828333828333, + "grad_norm": 0.714316725730896, + "learning_rate": 1.2999702999703e-05, + "loss": 0.0448, "step": 9540 }, { - "epoch": 0.7091935244319025, - "grad_norm": 3.0308828353881836, - "learning_rate": 2.5744838853408586e-05, - "loss": 0.1067, + "epoch": 2.8363528363528365, + "grad_norm": 0.6114319562911987, + "learning_rate": 1.2981882981882982e-05, + "loss": 0.0689, "step": 9550 }, { - "epoch": 0.7099361354522501, - "grad_norm": 0.393522173166275, - "learning_rate": 2.57403831872865e-05, - "loss": 0.0824, + "epoch": 2.8393228393228394, + "grad_norm": 0.7381054162979126, + "learning_rate": 1.2964062964062964e-05, + "loss": 0.0577, "step": 9560 }, { - "epoch": 0.7106787464725977, - "grad_norm": 1.6205034255981445, - "learning_rate": 2.5735927521164413e-05, - "loss": 0.0474, + "epoch": 2.842292842292842, + "grad_norm": 0.7892597317695618, + "learning_rate": 1.2946242946242946e-05, + "loss": 0.0697, "step": 9570 }, { - "epoch": 0.7114213574929452, - "grad_norm": 1.4009572267532349, - "learning_rate": 2.573147185504233e-05, - "loss": 0.095, + "epoch": 2.8452628452628455, + "grad_norm": 0.6848814487457275, + "learning_rate": 1.2928422928422929e-05, + "loss": 0.0521, "step": 9580 }, { - "epoch": 0.7121639685132928, - "grad_norm": 1.9968441724777222, - "learning_rate": 2.5727016188920243e-05, - "loss": 0.0957, + "epoch": 2.8482328482328483, + "grad_norm": 0.5784212946891785, + "learning_rate": 1.2910602910602912e-05, + "loss": 0.0647, "step": 9590 }, { - "epoch": 0.7129065795336402, - "grad_norm": 1.6015273332595825, - "learning_rate": 2.5722560522798158e-05, - "loss": 0.0857, + "epoch": 2.851202851202851, + "grad_norm": 0.6273770332336426, + "learning_rate": 1.2892782892782893e-05, + "loss": 0.0677, "step": 9600 }, { - "epoch": 0.7136491905539878, - "grad_norm": 1.4251041412353516, - "learning_rate": 2.5718104856676073e-05, - "loss": 0.0932, + "epoch": 2.8541728541728544, + "grad_norm": 0.7045747637748718, + "learning_rate": 1.2874962874962875e-05, + "loss": 0.0686, "step": 9610 }, { - "epoch": 0.7143918015743354, - "grad_norm": 5.090855598449707, - "learning_rate": 2.5713649190553988e-05, - "loss": 0.1032, + "epoch": 2.857142857142857, + "grad_norm": 0.8756235241889954, + "learning_rate": 1.2857142857142857e-05, + "loss": 0.0551, "step": 9620 }, { - "epoch": 0.7151344125946829, - "grad_norm": 2.4273598194122314, - "learning_rate": 2.5709193524431903e-05, - "loss": 0.0827, + "epoch": 2.86011286011286, + "grad_norm": 1.0615090131759644, + "learning_rate": 1.283932283932284e-05, + "loss": 0.0676, "step": 9630 }, { - "epoch": 0.7158770236150305, - "grad_norm": 1.8204997777938843, - "learning_rate": 2.5704737858309818e-05, - "loss": 0.1167, + "epoch": 2.8630828630828633, + "grad_norm": 0.42012715339660645, + "learning_rate": 1.2821502821502822e-05, + "loss": 0.0492, "step": 9640 }, { - "epoch": 0.716619634635378, - "grad_norm": 1.7066177129745483, - "learning_rate": 2.5700282192187733e-05, - "loss": 0.1037, + "epoch": 2.866052866052866, + "grad_norm": 0.8934495449066162, + "learning_rate": 1.2803682803682804e-05, + "loss": 0.0523, "step": 9650 }, { - "epoch": 0.7173622456557255, - "grad_norm": 2.3941705226898193, - "learning_rate": 2.5695826526065648e-05, - "loss": 0.1003, + "epoch": 2.869022869022869, + "grad_norm": 1.0009864568710327, + "learning_rate": 1.2785862785862788e-05, + "loss": 0.0662, "step": 9660 }, { - "epoch": 0.718104856676073, - "grad_norm": 2.3168444633483887, - "learning_rate": 2.5691370859943563e-05, - "loss": 0.1067, + "epoch": 2.8719928719928722, + "grad_norm": 0.8075212836265564, + "learning_rate": 1.2768042768042768e-05, + "loss": 0.0553, "step": 9670 }, { - "epoch": 0.7188474676964206, - "grad_norm": 1.6166632175445557, - "learning_rate": 2.5686915193821478e-05, - "loss": 0.0904, + "epoch": 2.874962874962875, + "grad_norm": 0.7965303063392639, + "learning_rate": 1.275022275022275e-05, + "loss": 0.0573, "step": 9680 }, { - "epoch": 0.7195900787167682, - "grad_norm": 0.9666265845298767, - "learning_rate": 2.5682459527699393e-05, - "loss": 0.0967, + "epoch": 2.877932877932878, + "grad_norm": 0.6948648691177368, + "learning_rate": 1.2732402732402732e-05, + "loss": 0.0557, "step": 9690 }, { - "epoch": 0.7203326897371157, - "grad_norm": 0.7397652864456177, - "learning_rate": 2.5678003861577305e-05, - "loss": 0.0604, + "epoch": 2.880902880902881, + "grad_norm": 0.7285399436950684, + "learning_rate": 1.2714582714582715e-05, + "loss": 0.0624, "step": 9700 }, { - "epoch": 0.7210753007574633, - "grad_norm": 3.255927324295044, - "learning_rate": 2.567354819545522e-05, - "loss": 0.0784, + "epoch": 2.883872883872884, + "grad_norm": 0.37577903270721436, + "learning_rate": 1.2696762696762697e-05, + "loss": 0.0517, "step": 9710 }, { - "epoch": 0.7218179117778107, - "grad_norm": 2.8680319786071777, - "learning_rate": 2.5669092529333138e-05, - "loss": 0.1131, + "epoch": 2.886842886842887, + "grad_norm": 0.36712825298309326, + "learning_rate": 1.2678942678942679e-05, + "loss": 0.0567, "step": 9720 }, { - "epoch": 0.7225605227981583, - "grad_norm": 1.343375325202942, - "learning_rate": 2.566463686321105e-05, - "loss": 0.1042, + "epoch": 2.88981288981289, + "grad_norm": 1.1397478580474854, + "learning_rate": 1.2661122661122663e-05, + "loss": 0.0636, "step": 9730 }, { - "epoch": 0.7233031338185059, - "grad_norm": 2.072066307067871, - "learning_rate": 2.5660181197088965e-05, - "loss": 0.1135, + "epoch": 2.892782892782893, + "grad_norm": 0.5818475484848022, + "learning_rate": 1.2643302643302643e-05, + "loss": 0.0535, "step": 9740 }, { - "epoch": 0.7240457448388534, - "grad_norm": 2.82025408744812, - "learning_rate": 2.5655725530966883e-05, - "loss": 0.0621, + "epoch": 2.8957528957528957, + "grad_norm": 0.7430572509765625, + "learning_rate": 1.2625482625482625e-05, + "loss": 0.0484, "step": 9750 }, { - "epoch": 0.724788355859201, - "grad_norm": 4.173225402832031, - "learning_rate": 2.5651269864844795e-05, - "loss": 0.0756, + "epoch": 2.8987228987228986, + "grad_norm": 1.0265908241271973, + "learning_rate": 1.2607662607662608e-05, + "loss": 0.0566, "step": 9760 }, { - "epoch": 0.7255309668795485, - "grad_norm": 0.6784592866897583, - "learning_rate": 2.564681419872271e-05, - "loss": 0.0859, + "epoch": 2.901692901692902, + "grad_norm": 1.191502332687378, + "learning_rate": 1.258984258984259e-05, + "loss": 0.0574, "step": 9770 }, { - "epoch": 0.726273577899896, - "grad_norm": 2.3363256454467773, - "learning_rate": 2.5642358532600625e-05, - "loss": 0.0596, + "epoch": 2.9046629046629047, + "grad_norm": 0.7020824551582336, + "learning_rate": 1.2572022572022572e-05, + "loss": 0.0498, "step": 9780 }, { - "epoch": 0.7270161889202436, - "grad_norm": 1.6436067819595337, - "learning_rate": 2.563790286647854e-05, - "loss": 0.0875, + "epoch": 2.9076329076329075, + "grad_norm": 1.4909850358963013, + "learning_rate": 1.2554202554202554e-05, + "loss": 0.0648, "step": 9790 }, { - "epoch": 0.7277587999405911, - "grad_norm": 2.9929933547973633, - "learning_rate": 2.5633447200356455e-05, - "loss": 0.1146, + "epoch": 2.9106029106029108, + "grad_norm": 0.9916106462478638, + "learning_rate": 1.2536382536382538e-05, + "loss": 0.068, "step": 9800 }, { - "epoch": 0.7285014109609387, - "grad_norm": 2.5027360916137695, - "learning_rate": 2.5628991534234366e-05, - "loss": 0.0916, + "epoch": 2.9135729135729136, + "grad_norm": 0.7778554558753967, + "learning_rate": 1.2518562518562518e-05, + "loss": 0.0518, "step": 9810 }, { - "epoch": 0.7292440219812862, - "grad_norm": 0.6115292310714722, - "learning_rate": 2.5624535868112285e-05, - "loss": 0.0381, + "epoch": 2.9165429165429164, + "grad_norm": 1.0887690782546997, + "learning_rate": 1.25007425007425e-05, + "loss": 0.0668, "step": 9820 }, { - "epoch": 0.7299866330016337, - "grad_norm": 3.5652284622192383, - "learning_rate": 2.56200802019902e-05, - "loss": 0.0678, + "epoch": 2.9195129195129192, + "grad_norm": 0.5052692294120789, + "learning_rate": 1.2482922482922483e-05, + "loss": 0.0489, "step": 9830 }, { - "epoch": 0.7307292440219813, - "grad_norm": 2.814704179763794, - "learning_rate": 2.561562453586811e-05, - "loss": 0.1019, + "epoch": 2.9224829224829225, + "grad_norm": 0.9965596199035645, + "learning_rate": 1.2465102465102467e-05, + "loss": 0.0596, "step": 9840 }, { - "epoch": 0.7314718550423288, - "grad_norm": 2.0167160034179688, - "learning_rate": 2.5611168869746026e-05, - "loss": 0.0719, + "epoch": 2.9254529254529253, + "grad_norm": 0.6010634303092957, + "learning_rate": 1.2447282447282447e-05, + "loss": 0.0556, "step": 9850 }, { - "epoch": 0.7322144660626764, - "grad_norm": 1.6718881130218506, - "learning_rate": 2.5606713203623944e-05, - "loss": 0.0965, + "epoch": 2.928422928422928, + "grad_norm": 0.7271102070808411, + "learning_rate": 1.242946242946243e-05, + "loss": 0.0539, "step": 9860 }, { - "epoch": 0.7329570770830239, - "grad_norm": 1.5811102390289307, - "learning_rate": 2.5602257537501856e-05, - "loss": 0.1263, + "epoch": 2.9313929313929314, + "grad_norm": 0.5833537578582764, + "learning_rate": 1.2411642411642413e-05, + "loss": 0.0551, "step": 9870 }, { - "epoch": 0.7336996881033715, - "grad_norm": 3.2773425579071045, - "learning_rate": 2.559780187137977e-05, - "loss": 0.0771, + "epoch": 2.9343629343629343, + "grad_norm": 1.5592166185379028, + "learning_rate": 1.2393822393822394e-05, + "loss": 0.0649, "step": 9880 }, { - "epoch": 0.734442299123719, - "grad_norm": 1.7898057699203491, - "learning_rate": 2.559334620525769e-05, - "loss": 0.0871, + "epoch": 2.937332937332937, + "grad_norm": 0.6563842296600342, + "learning_rate": 1.2376002376002376e-05, + "loss": 0.047, "step": 9890 }, { - "epoch": 0.7351849101440665, - "grad_norm": 2.309032917022705, - "learning_rate": 2.55888905391356e-05, - "loss": 0.0703, + "epoch": 2.9403029403029404, + "grad_norm": 0.8599936366081238, + "learning_rate": 1.2358182358182358e-05, + "loss": 0.0665, "step": 9900 }, { - "epoch": 0.7359275211644141, - "grad_norm": 1.4760417938232422, - "learning_rate": 2.5584434873013516e-05, - "loss": 0.0877, + "epoch": 2.943272943272943, + "grad_norm": 0.6941415667533875, + "learning_rate": 1.2340362340362342e-05, + "loss": 0.0603, "step": 9910 }, { - "epoch": 0.7366701321847616, - "grad_norm": 1.4691712856292725, - "learning_rate": 2.557997920689143e-05, - "loss": 0.0909, + "epoch": 2.946242946242946, + "grad_norm": 0.6341265439987183, + "learning_rate": 1.2322542322542322e-05, + "loss": 0.0701, "step": 9920 }, { - "epoch": 0.7374127432051092, - "grad_norm": 1.479776382446289, - "learning_rate": 2.5575523540769346e-05, - "loss": 0.0983, + "epoch": 2.9492129492129493, + "grad_norm": 0.8768404722213745, + "learning_rate": 1.2304722304722305e-05, + "loss": 0.0635, "step": 9930 }, { - "epoch": 0.7381553542254568, - "grad_norm": 2.160743474960327, - "learning_rate": 2.557106787464726e-05, - "loss": 0.0964, + "epoch": 2.952182952182952, + "grad_norm": 0.7118542194366455, + "learning_rate": 1.2286902286902288e-05, + "loss": 0.0592, "step": 9940 }, { - "epoch": 0.7388979652458042, - "grad_norm": 1.2513461112976074, - "learning_rate": 2.5566612208525173e-05, - "loss": 0.0811, + "epoch": 2.955152955152955, + "grad_norm": 1.1415071487426758, + "learning_rate": 1.2269082269082269e-05, + "loss": 0.0649, "step": 9950 }, { - "epoch": 0.7396405762661518, - "grad_norm": 1.080775499343872, - "learning_rate": 2.556215654240309e-05, - "loss": 0.0922, + "epoch": 2.9581229581229582, + "grad_norm": 1.042884349822998, + "learning_rate": 1.2251262251262251e-05, + "loss": 0.0522, "step": 9960 }, { - "epoch": 0.7403831872864993, - "grad_norm": 2.420680284500122, - "learning_rate": 2.5557700876281006e-05, - "loss": 0.0645, + "epoch": 2.961092961092961, + "grad_norm": 0.5634980201721191, + "learning_rate": 1.2233442233442233e-05, + "loss": 0.0532, "step": 9970 }, { - "epoch": 0.7411257983068469, - "grad_norm": 2.0995841026306152, - "learning_rate": 2.5553245210158918e-05, - "loss": 0.106, + "epoch": 2.964062964062964, + "grad_norm": 0.8798786401748657, + "learning_rate": 1.2215622215622217e-05, + "loss": 0.0575, "step": 9980 }, { - "epoch": 0.7418684093271944, - "grad_norm": 3.2964303493499756, - "learning_rate": 2.5548789544036836e-05, - "loss": 0.0891, + "epoch": 2.967032967032967, + "grad_norm": 0.651884913444519, + "learning_rate": 1.2197802197802198e-05, + "loss": 0.0469, "step": 9990 }, { - "epoch": 0.742611020347542, - "grad_norm": 1.5144083499908447, - "learning_rate": 2.5544333877914748e-05, - "loss": 0.0698, + "epoch": 2.97000297000297, + "grad_norm": 0.9131767749786377, + "learning_rate": 1.217998217998218e-05, + "loss": 0.0576, "step": 10000 }, { - "epoch": 0.7433536313678895, - "grad_norm": 3.1648800373077393, - "learning_rate": 2.5539878211792663e-05, - "loss": 0.1023, + "epoch": 2.972972972972973, + "grad_norm": 1.2109713554382324, + "learning_rate": 1.2162162162162164e-05, + "loss": 0.0663, "step": 10010 }, { - "epoch": 0.744096242388237, - "grad_norm": 2.7684147357940674, - "learning_rate": 2.5535422545670578e-05, - "loss": 0.0946, + "epoch": 2.975942975942976, + "grad_norm": 0.9525397419929504, + "learning_rate": 1.2144342144342144e-05, + "loss": 0.0531, "step": 10020 }, { - "epoch": 0.7448388534085846, - "grad_norm": 2.4703927040100098, - "learning_rate": 2.5530966879548493e-05, - "loss": 0.0917, + "epoch": 2.978912978912979, + "grad_norm": 0.7746742963790894, + "learning_rate": 1.2126522126522126e-05, + "loss": 0.0619, "step": 10030 }, { - "epoch": 0.7455814644289321, - "grad_norm": 4.016003131866455, - "learning_rate": 2.5526511213426408e-05, - "loss": 0.0888, + "epoch": 2.9818829818829817, + "grad_norm": 0.526714026927948, + "learning_rate": 1.2108702108702108e-05, + "loss": 0.0639, "step": 10040 }, { - "epoch": 0.7463240754492797, - "grad_norm": 0.4022844135761261, - "learning_rate": 2.5522055547304323e-05, + "epoch": 2.984852984852985, + "grad_norm": 0.42681199312210083, + "learning_rate": 1.2090882090882092e-05, "loss": 0.059, "step": 10050 }, { - "epoch": 0.7470666864696273, - "grad_norm": 0.8048895597457886, - "learning_rate": 2.5517599881182238e-05, - "loss": 0.0885, + "epoch": 2.987822987822988, + "grad_norm": 1.26163911819458, + "learning_rate": 1.2073062073062073e-05, + "loss": 0.0534, "step": 10060 }, { - "epoch": 0.7478092974899747, - "grad_norm": 3.6403074264526367, - "learning_rate": 2.5513144215060153e-05, - "loss": 0.0704, + "epoch": 2.9907929907929907, + "grad_norm": 0.6416770815849304, + "learning_rate": 1.2055242055242055e-05, + "loss": 0.0668, "step": 10070 }, { - "epoch": 0.7485519085103223, - "grad_norm": 1.1787481307983398, - "learning_rate": 2.5508688548938068e-05, - "loss": 0.0869, + "epoch": 2.993762993762994, + "grad_norm": 0.7979917526245117, + "learning_rate": 1.2037422037422039e-05, + "loss": 0.0553, "step": 10080 }, { - "epoch": 0.7492945195306698, - "grad_norm": 2.7455785274505615, - "learning_rate": 2.5504232882815983e-05, - "loss": 0.0837, + "epoch": 2.9967329967329968, + "grad_norm": 0.6158313751220703, + "learning_rate": 1.2019602019602021e-05, + "loss": 0.0388, "step": 10090 }, { - "epoch": 0.7500371305510174, - "grad_norm": 1.82301664352417, - "learning_rate": 2.5499777216693898e-05, - "loss": 0.0581, + "epoch": 2.9997029997029996, + "grad_norm": 0.5872673392295837, + "learning_rate": 1.2001782001782001e-05, + "loss": 0.0508, "step": 10100 }, { - "epoch": 0.750779741571365, - "grad_norm": 1.8503745794296265, - "learning_rate": 2.549532155057181e-05, - "loss": 0.0519, + "epoch": 3.0, + "eval_f1": 0.33031292965957215, + "eval_loss": 0.04517492279410362, + "eval_runtime": 166.1304, + "eval_samples_per_second": 228.85, + "eval_steps_per_second": 3.582, + "step": 10101 + }, + { + "epoch": 3.002673002673003, + "grad_norm": 0.6985778212547302, + "learning_rate": 1.1983961983961984e-05, + "loss": 0.056, "step": 10110 }, { - "epoch": 0.7515223525917124, - "grad_norm": 1.0572456121444702, - "learning_rate": 2.5490865884449724e-05, - "loss": 0.055, + "epoch": 3.0056430056430057, + "grad_norm": 0.88740074634552, + "learning_rate": 1.1966141966141967e-05, + "loss": 0.0639, "step": 10120 }, { - "epoch": 0.75226496361206, - "grad_norm": 1.1795002222061157, - "learning_rate": 2.5486410218327643e-05, - "loss": 0.1019, + "epoch": 3.0086130086130085, + "grad_norm": 1.2202911376953125, + "learning_rate": 1.1948321948321948e-05, + "loss": 0.0697, "step": 10130 }, { - "epoch": 0.7530075746324075, - "grad_norm": 2.340430736541748, - "learning_rate": 2.5481954552205554e-05, - "loss": 0.0892, + "epoch": 3.011583011583012, + "grad_norm": 0.9488741755485535, + "learning_rate": 1.193050193050193e-05, + "loss": 0.0473, "step": 10140 }, { - "epoch": 0.7537501856527551, - "grad_norm": 2.2384378910064697, - "learning_rate": 2.547749888608347e-05, - "loss": 0.0461, + "epoch": 3.0145530145530146, + "grad_norm": 0.6430271863937378, + "learning_rate": 1.1912681912681914e-05, + "loss": 0.0654, "step": 10150 }, { - "epoch": 0.7544927966731027, - "grad_norm": 3.9596447944641113, - "learning_rate": 2.5473043219961388e-05, - "loss": 0.0699, + "epoch": 3.0175230175230174, + "grad_norm": 0.4088257849216461, + "learning_rate": 1.1894861894861896e-05, + "loss": 0.0608, "step": 10160 }, { - "epoch": 0.7552354076934502, - "grad_norm": 2.694197654724121, - "learning_rate": 2.54685875538393e-05, - "loss": 0.087, + "epoch": 3.0204930204930207, + "grad_norm": 0.8268032670021057, + "learning_rate": 1.1877041877041877e-05, + "loss": 0.0543, "step": 10170 }, { - "epoch": 0.7559780187137977, - "grad_norm": 1.5229603052139282, - "learning_rate": 2.5464131887717214e-05, - "loss": 0.0611, + "epoch": 3.0234630234630235, + "grad_norm": 0.7411820888519287, + "learning_rate": 1.1859221859221859e-05, + "loss": 0.045, "step": 10180 }, { - "epoch": 0.7567206297341452, - "grad_norm": 1.1745027303695679, - "learning_rate": 2.545967622159513e-05, - "loss": 0.0937, + "epoch": 3.0264330264330264, + "grad_norm": 1.4683622121810913, + "learning_rate": 1.1841401841401843e-05, + "loss": 0.0657, "step": 10190 }, { - "epoch": 0.7574632407544928, - "grad_norm": 2.827160120010376, - "learning_rate": 2.5455220555473044e-05, - "loss": 0.0918, + "epoch": 3.029403029403029, + "grad_norm": 0.926177978515625, + "learning_rate": 1.1823581823581823e-05, + "loss": 0.0535, "step": 10200 }, { - "epoch": 0.7582058517748403, - "grad_norm": 0.49699798226356506, - "learning_rate": 2.545076488935096e-05, - "loss": 0.039, + "epoch": 3.0323730323730325, + "grad_norm": 0.816768229007721, + "learning_rate": 1.1805761805761805e-05, + "loss": 0.0591, "step": 10210 }, { - "epoch": 0.7589484627951879, - "grad_norm": 0.5466452240943909, - "learning_rate": 2.544630922322887e-05, - "loss": 0.0774, + "epoch": 3.0353430353430353, + "grad_norm": 0.4738346040248871, + "learning_rate": 1.1787941787941789e-05, + "loss": 0.0697, "step": 10220 }, { - "epoch": 0.7596910738155355, - "grad_norm": 1.8753949403762817, - "learning_rate": 2.544185355710679e-05, - "loss": 0.0982, + "epoch": 3.038313038313038, + "grad_norm": 0.748884379863739, + "learning_rate": 1.1770121770121771e-05, + "loss": 0.0489, "step": 10230 }, { - "epoch": 0.7604336848358829, - "grad_norm": 2.802274465560913, - "learning_rate": 2.5437397890984704e-05, - "loss": 0.114, + "epoch": 3.0412830412830414, + "grad_norm": 0.4384136199951172, + "learning_rate": 1.1752301752301752e-05, + "loss": 0.058, "step": 10240 }, { - "epoch": 0.7611762958562305, - "grad_norm": 2.2179017066955566, - "learning_rate": 2.5432942224862616e-05, - "loss": 0.0404, + "epoch": 3.044253044253044, + "grad_norm": 0.8452009558677673, + "learning_rate": 1.1734481734481734e-05, + "loss": 0.0579, "step": 10250 }, { - "epoch": 0.761918906876578, - "grad_norm": 1.2496877908706665, - "learning_rate": 2.5428486558740534e-05, - "loss": 0.0764, + "epoch": 3.047223047223047, + "grad_norm": 1.2820180654525757, + "learning_rate": 1.1716661716661718e-05, + "loss": 0.0506, "step": 10260 }, { - "epoch": 0.7626615178969256, - "grad_norm": 1.20204496383667, - "learning_rate": 2.542403089261845e-05, - "loss": 0.0817, + "epoch": 3.0501930501930503, + "grad_norm": 0.6100145578384399, + "learning_rate": 1.1698841698841698e-05, + "loss": 0.0493, "step": 10270 }, { - "epoch": 0.7634041289172732, - "grad_norm": 2.656388521194458, - "learning_rate": 2.541957522649636e-05, - "loss": 0.0801, + "epoch": 3.053163053163053, + "grad_norm": 0.8073909282684326, + "learning_rate": 1.168102168102168e-05, + "loss": 0.059, "step": 10280 }, { - "epoch": 0.7641467399376207, - "grad_norm": 0.9805976748466492, - "learning_rate": 2.5415119560374276e-05, - "loss": 0.0655, + "epoch": 3.056133056133056, + "grad_norm": 0.6318356394767761, + "learning_rate": 1.1663201663201664e-05, + "loss": 0.0595, "step": 10290 }, { - "epoch": 0.7648893509579682, - "grad_norm": 1.0946846008300781, - "learning_rate": 2.5410663894252194e-05, - "loss": 0.0897, + "epoch": 3.0591030591030592, + "grad_norm": 0.7712961435317993, + "learning_rate": 1.1645381645381647e-05, + "loss": 0.0636, "step": 10300 }, { - "epoch": 0.7656319619783157, - "grad_norm": 1.9143744707107544, - "learning_rate": 2.5406208228130106e-05, - "loss": 0.1053, + "epoch": 3.062073062073062, + "grad_norm": 0.9575150012969971, + "learning_rate": 1.1627561627561627e-05, + "loss": 0.067, "step": 10310 }, { - "epoch": 0.7663745729986633, - "grad_norm": 2.236309766769409, - "learning_rate": 2.540175256200802e-05, - "loss": 0.0781, + "epoch": 3.065043065043065, + "grad_norm": 0.9202025532722473, + "learning_rate": 1.160974160974161e-05, + "loss": 0.0711, "step": 10320 }, { - "epoch": 0.7671171840190109, - "grad_norm": 0.839529275894165, - "learning_rate": 2.5397296895885936e-05, - "loss": 0.0727, + "epoch": 3.068013068013068, + "grad_norm": 1.038837194442749, + "learning_rate": 1.1591921591921593e-05, + "loss": 0.0487, "step": 10330 }, { - "epoch": 0.7678597950393584, - "grad_norm": 1.2142996788024902, - "learning_rate": 2.539284122976385e-05, - "loss": 0.069, + "epoch": 3.070983070983071, + "grad_norm": 0.6200097799301147, + "learning_rate": 1.1574101574101574e-05, + "loss": 0.0578, "step": 10340 }, { - "epoch": 0.768602406059706, - "grad_norm": 3.3854808807373047, - "learning_rate": 2.5388385563641766e-05, - "loss": 0.086, + "epoch": 3.073953073953074, + "grad_norm": 0.6585675477981567, + "learning_rate": 1.1556281556281556e-05, + "loss": 0.0648, "step": 10350 }, { - "epoch": 0.7693450170800534, - "grad_norm": 1.9810289144515991, - "learning_rate": 2.5383929897519677e-05, - "loss": 0.0621, + "epoch": 3.076923076923077, + "grad_norm": 0.8432527184486389, + "learning_rate": 1.153846153846154e-05, + "loss": 0.0574, "step": 10360 }, { - "epoch": 0.770087628100401, - "grad_norm": 1.3424344062805176, - "learning_rate": 2.5379474231397596e-05, - "loss": 0.0884, + "epoch": 3.07989307989308, + "grad_norm": 0.8519158959388733, + "learning_rate": 1.1520641520641522e-05, + "loss": 0.0435, "step": 10370 }, { - "epoch": 0.7708302391207486, - "grad_norm": 1.7278804779052734, - "learning_rate": 2.537501856527551e-05, - "loss": 0.0618, + "epoch": 3.0828630828630827, + "grad_norm": 0.5639305710792542, + "learning_rate": 1.1502821502821502e-05, + "loss": 0.0548, "step": 10380 }, { - "epoch": 0.7715728501410961, - "grad_norm": 2.9425151348114014, - "learning_rate": 2.5370562899153422e-05, - "loss": 0.1162, + "epoch": 3.085833085833086, + "grad_norm": 1.1483186483383179, + "learning_rate": 1.1485001485001484e-05, + "loss": 0.067, "step": 10390 }, { - "epoch": 0.7723154611614437, - "grad_norm": 0.7557898759841919, - "learning_rate": 2.536610723303134e-05, - "loss": 0.1009, + "epoch": 3.088803088803089, + "grad_norm": 0.7661743760108948, + "learning_rate": 1.1467181467181468e-05, + "loss": 0.0409, "step": 10400 }, { - "epoch": 0.7730580721817911, - "grad_norm": 0.9816102981567383, - "learning_rate": 2.5361651566909256e-05, - "loss": 0.0806, + "epoch": 3.0917730917730917, + "grad_norm": 0.42964890599250793, + "learning_rate": 1.144936144936145e-05, + "loss": 0.0656, "step": 10410 }, { - "epoch": 0.7738006832021387, - "grad_norm": 0.9218798875808716, - "learning_rate": 2.5357195900787167e-05, - "loss": 0.0424, + "epoch": 3.094743094743095, + "grad_norm": 0.4453743100166321, + "learning_rate": 1.1431541431541431e-05, + "loss": 0.0638, "step": 10420 }, { - "epoch": 0.7745432942224862, - "grad_norm": 1.2472357749938965, - "learning_rate": 2.5352740234665082e-05, - "loss": 0.0706, + "epoch": 3.0977130977130978, + "grad_norm": 1.3230763673782349, + "learning_rate": 1.1413721413721415e-05, + "loss": 0.0644, "step": 10430 }, { - "epoch": 0.7752859052428338, - "grad_norm": 3.426825523376465, - "learning_rate": 2.5348284568542997e-05, - "loss": 0.0776, + "epoch": 3.1006831006831006, + "grad_norm": 0.6661444306373596, + "learning_rate": 1.1395901395901397e-05, + "loss": 0.0585, "step": 10440 }, { - "epoch": 0.7760285162631814, - "grad_norm": 4.194761753082275, - "learning_rate": 2.5343828902420912e-05, - "loss": 0.0697, + "epoch": 3.1036531036531034, + "grad_norm": 0.6051294207572937, + "learning_rate": 1.1378081378081377e-05, + "loss": 0.0583, "step": 10450 }, { - "epoch": 0.7767711272835289, - "grad_norm": 0.678124189376831, - "learning_rate": 2.5339373236298827e-05, - "loss": 0.1128, + "epoch": 3.1066231066231067, + "grad_norm": 0.9865986704826355, + "learning_rate": 1.136026136026136e-05, + "loss": 0.0547, "step": 10460 }, { - "epoch": 0.7775137383038764, - "grad_norm": 2.648623466491699, - "learning_rate": 2.5334917570176742e-05, - "loss": 0.0876, + "epoch": 3.1095931095931095, + "grad_norm": 0.6704586148262024, + "learning_rate": 1.1342441342441343e-05, + "loss": 0.0582, "step": 10470 }, { - "epoch": 0.7782563493242239, - "grad_norm": 1.699841856956482, - "learning_rate": 2.5330461904054657e-05, - "loss": 0.074, + "epoch": 3.1125631125631124, + "grad_norm": 0.8656442761421204, + "learning_rate": 1.1324621324621326e-05, + "loss": 0.0574, "step": 10480 }, { - "epoch": 0.7789989603445715, - "grad_norm": 3.2049789428710938, - "learning_rate": 2.5326006237932572e-05, - "loss": 0.0872, + "epoch": 3.1155331155331156, + "grad_norm": 0.704118549823761, + "learning_rate": 1.1306801306801306e-05, + "loss": 0.0509, "step": 10490 }, { - "epoch": 0.7797415713649191, - "grad_norm": 3.888385057449341, - "learning_rate": 2.5321550571810487e-05, - "loss": 0.0717, + "epoch": 3.1185031185031185, + "grad_norm": 0.5805778503417969, + "learning_rate": 1.128898128898129e-05, + "loss": 0.0587, "step": 10500 }, { - "epoch": 0.7804841823852666, - "grad_norm": 2.0463638305664062, - "learning_rate": 2.5317094905688402e-05, - "loss": 0.1034, + "epoch": 3.1214731214731213, + "grad_norm": 1.2092708349227905, + "learning_rate": 1.1271161271161272e-05, + "loss": 0.069, "step": 10510 }, { - "epoch": 0.7812267934056142, - "grad_norm": 0.6997508406639099, - "learning_rate": 2.5312639239566314e-05, - "loss": 0.0717, + "epoch": 3.1244431244431246, + "grad_norm": 0.7527390718460083, + "learning_rate": 1.1253341253341253e-05, + "loss": 0.059, "step": 10520 }, { - "epoch": 0.7819694044259616, - "grad_norm": 1.8925009965896606, - "learning_rate": 2.530818357344423e-05, - "loss": 0.0693, + "epoch": 3.1274131274131274, + "grad_norm": 0.5099059343338013, + "learning_rate": 1.1235521235521235e-05, + "loss": 0.0616, "step": 10530 }, { - "epoch": 0.7827120154463092, - "grad_norm": 1.6464449167251587, - "learning_rate": 2.5303727907322147e-05, - "loss": 0.0822, + "epoch": 3.13038313038313, + "grad_norm": 0.6300451755523682, + "learning_rate": 1.1217701217701219e-05, + "loss": 0.0625, "step": 10540 }, { - "epoch": 0.7834546264666568, - "grad_norm": 0.8865845203399658, - "learning_rate": 2.529927224120006e-05, - "loss": 0.0847, + "epoch": 3.1333531333531335, + "grad_norm": 0.9511438608169556, + "learning_rate": 1.11998811998812e-05, + "loss": 0.0734, "step": 10550 }, { - "epoch": 0.7841972374870043, - "grad_norm": 2.912022113800049, - "learning_rate": 2.5294816575077974e-05, - "loss": 0.0784, + "epoch": 3.1363231363231363, + "grad_norm": 0.6538392901420593, + "learning_rate": 1.1182061182061181e-05, + "loss": 0.0429, "step": 10560 }, { - "epoch": 0.7849398485073519, - "grad_norm": 2.305199146270752, - "learning_rate": 2.5290360908955892e-05, - "loss": 0.0715, + "epoch": 3.139293139293139, + "grad_norm": 0.983573317527771, + "learning_rate": 1.1164241164241165e-05, + "loss": 0.049, "step": 10570 }, { - "epoch": 0.7856824595276994, - "grad_norm": 3.301766872406006, - "learning_rate": 2.5285905242833804e-05, - "loss": 0.0818, + "epoch": 3.1422631422631424, + "grad_norm": 0.8676197528839111, + "learning_rate": 1.1146421146421147e-05, + "loss": 0.0639, "step": 10580 }, { - "epoch": 0.7864250705480469, - "grad_norm": 0.7540196180343628, - "learning_rate": 2.528144957671172e-05, - "loss": 0.0804, + "epoch": 3.1452331452331452, + "grad_norm": 0.4240647256374359, + "learning_rate": 1.1128601128601128e-05, + "loss": 0.063, "step": 10590 }, { - "epoch": 0.7871676815683945, - "grad_norm": 4.044961452484131, - "learning_rate": 2.5276993910589634e-05, - "loss": 0.0827, + "epoch": 3.148203148203148, + "grad_norm": 0.9053698182106018, + "learning_rate": 1.111078111078111e-05, + "loss": 0.0409, "step": 10600 }, { - "epoch": 0.787910292588742, - "grad_norm": 1.4841824769973755, - "learning_rate": 2.527253824446755e-05, - "loss": 0.1031, + "epoch": 3.1511731511731513, + "grad_norm": 0.6372582316398621, + "learning_rate": 1.1092961092961094e-05, + "loss": 0.0648, "step": 10610 }, { - "epoch": 0.7886529036090896, - "grad_norm": 1.3933384418487549, - "learning_rate": 2.5268082578345464e-05, - "loss": 0.057, + "epoch": 3.154143154143154, + "grad_norm": 0.7005014419555664, + "learning_rate": 1.1075141075141076e-05, + "loss": 0.0563, "step": 10620 }, { - "epoch": 0.7893955146294371, - "grad_norm": 2.6198787689208984, - "learning_rate": 2.5263626912223375e-05, - "loss": 0.0856, + "epoch": 3.157113157113157, + "grad_norm": 0.8473065495491028, + "learning_rate": 1.1057321057321056e-05, + "loss": 0.0729, "step": 10630 }, { - "epoch": 0.7901381256497847, - "grad_norm": 1.5979726314544678, - "learning_rate": 2.5259171246101294e-05, - "loss": 0.0774, + "epoch": 3.1600831600831603, + "grad_norm": 0.3668000102043152, + "learning_rate": 1.103950103950104e-05, + "loss": 0.0609, "step": 10640 }, { - "epoch": 0.7908807366701321, - "grad_norm": 1.575772762298584, - "learning_rate": 2.525471557997921e-05, - "loss": 0.0775, + "epoch": 3.163053163053163, + "grad_norm": 0.3873116374015808, + "learning_rate": 1.1021681021681022e-05, + "loss": 0.0427, "step": 10650 }, { - "epoch": 0.7916233476904797, - "grad_norm": 2.2343573570251465, - "learning_rate": 2.525025991385712e-05, - "loss": 0.109, + "epoch": 3.166023166023166, + "grad_norm": 0.6118089556694031, + "learning_rate": 1.1003861003861003e-05, + "loss": 0.0426, "step": 10660 }, { - "epoch": 0.7923659587108273, - "grad_norm": 0.6971462368965149, - "learning_rate": 2.524580424773504e-05, - "loss": 0.072, + "epoch": 3.168993168993169, + "grad_norm": 0.8138056993484497, + "learning_rate": 1.0986040986040985e-05, + "loss": 0.0626, "step": 10670 }, { - "epoch": 0.7931085697311748, - "grad_norm": 1.1593713760375977, - "learning_rate": 2.5241348581612954e-05, - "loss": 0.1163, + "epoch": 3.171963171963172, + "grad_norm": 0.45243504643440247, + "learning_rate": 1.0968220968220969e-05, + "loss": 0.0643, "step": 10680 }, { - "epoch": 0.7938511807515224, - "grad_norm": 2.554516077041626, - "learning_rate": 2.5236892915490865e-05, - "loss": 0.096, + "epoch": 3.174933174933175, + "grad_norm": 0.8468220233917236, + "learning_rate": 1.0950400950400951e-05, + "loss": 0.0628, "step": 10690 }, { - "epoch": 0.7945937917718698, - "grad_norm": 1.123022198677063, - "learning_rate": 2.523243724936878e-05, - "loss": 0.0701, + "epoch": 3.177903177903178, + "grad_norm": 0.3471866846084595, + "learning_rate": 1.0932580932580932e-05, + "loss": 0.056, "step": 10700 }, { - "epoch": 0.7953364027922174, - "grad_norm": 2.6108126640319824, - "learning_rate": 2.52279815832467e-05, - "loss": 0.0747, + "epoch": 3.180873180873181, + "grad_norm": 0.6115337014198303, + "learning_rate": 1.0914760914760916e-05, + "loss": 0.0571, "step": 10710 }, { - "epoch": 0.796079013812565, - "grad_norm": 0.9052862524986267, - "learning_rate": 2.522352591712461e-05, - "loss": 0.0864, + "epoch": 3.1838431838431838, + "grad_norm": 0.7158623933792114, + "learning_rate": 1.0896940896940898e-05, + "loss": 0.0541, "step": 10720 }, { - "epoch": 0.7968216248329125, - "grad_norm": 1.4516713619232178, - "learning_rate": 2.5219070251002525e-05, - "loss": 0.0646, + "epoch": 3.186813186813187, + "grad_norm": 0.49794185161590576, + "learning_rate": 1.087912087912088e-05, + "loss": 0.0491, "step": 10730 }, { - "epoch": 0.7975642358532601, - "grad_norm": 4.304675579071045, - "learning_rate": 2.521461458488044e-05, - "loss": 0.0857, + "epoch": 3.18978318978319, + "grad_norm": 0.786274254322052, + "learning_rate": 1.086130086130086e-05, + "loss": 0.0675, "step": 10740 }, { - "epoch": 0.7983068468736076, - "grad_norm": 1.783659815788269, - "learning_rate": 2.5210158918758355e-05, - "loss": 0.0866, + "epoch": 3.1927531927531927, + "grad_norm": 1.0238100290298462, + "learning_rate": 1.0843480843480844e-05, + "loss": 0.0413, "step": 10750 }, { - "epoch": 0.7990494578939551, - "grad_norm": 1.544155240058899, - "learning_rate": 2.520570325263627e-05, - "loss": 0.0547, + "epoch": 3.1957231957231955, + "grad_norm": 1.032472848892212, + "learning_rate": 1.0825660825660826e-05, + "loss": 0.0607, "step": 10760 }, { - "epoch": 0.7997920689143027, - "grad_norm": 3.2248337268829346, - "learning_rate": 2.5201247586514182e-05, - "loss": 0.0715, + "epoch": 3.198693198693199, + "grad_norm": 0.7296070456504822, + "learning_rate": 1.0807840807840807e-05, + "loss": 0.0532, "step": 10770 }, { - "epoch": 0.8005346799346502, - "grad_norm": 1.370150089263916, - "learning_rate": 2.51967919203921e-05, - "loss": 0.0913, + "epoch": 3.2016632016632016, + "grad_norm": 1.2393643856048584, + "learning_rate": 1.079002079002079e-05, + "loss": 0.0602, "step": 10780 }, { - "epoch": 0.8012772909549978, - "grad_norm": 1.1197993755340576, - "learning_rate": 2.5192336254270015e-05, - "loss": 0.0771, + "epoch": 3.2046332046332044, + "grad_norm": 0.6754175424575806, + "learning_rate": 1.0772200772200773e-05, + "loss": 0.0592, "step": 10790 }, { - "epoch": 0.8020199019753453, - "grad_norm": 1.1327694654464722, - "learning_rate": 2.5187880588147927e-05, - "loss": 0.0875, + "epoch": 3.2076032076032077, + "grad_norm": 0.5455211997032166, + "learning_rate": 1.0754380754380755e-05, + "loss": 0.0622, "step": 10800 }, { - "epoch": 0.8027625129956929, - "grad_norm": 1.8613241910934448, - "learning_rate": 2.5183424922025845e-05, - "loss": 0.0681, + "epoch": 3.2105732105732105, + "grad_norm": 0.449032187461853, + "learning_rate": 1.0736560736560736e-05, + "loss": 0.0497, "step": 10810 }, { - "epoch": 0.8035051240160404, - "grad_norm": 2.5763204097747803, - "learning_rate": 2.517896925590376e-05, - "loss": 0.0884, + "epoch": 3.2135432135432134, + "grad_norm": 0.3489013612270355, + "learning_rate": 1.071874071874072e-05, + "loss": 0.0669, "step": 10820 }, { - "epoch": 0.8042477350363879, - "grad_norm": 2.63012433052063, - "learning_rate": 2.5174513589781672e-05, - "loss": 0.0685, + "epoch": 3.2165132165132166, + "grad_norm": 1.101258397102356, + "learning_rate": 1.0700920700920702e-05, + "loss": 0.0718, "step": 10830 }, { - "epoch": 0.8049903460567355, - "grad_norm": 0.752113401889801, - "learning_rate": 2.5170057923659587e-05, - "loss": 0.1005, + "epoch": 3.2194832194832195, + "grad_norm": 0.637738049030304, + "learning_rate": 1.0683100683100682e-05, + "loss": 0.0455, "step": 10840 }, { - "epoch": 0.805732957077083, - "grad_norm": 1.2157506942749023, - "learning_rate": 2.5165602257537502e-05, - "loss": 0.0859, + "epoch": 3.2224532224532223, + "grad_norm": 0.8752096891403198, + "learning_rate": 1.0665280665280666e-05, + "loss": 0.0671, "step": 10850 }, { - "epoch": 0.8064755680974306, - "grad_norm": 2.4420969486236572, - "learning_rate": 2.5161146591415417e-05, - "loss": 0.0972, + "epoch": 3.2254232254232256, + "grad_norm": 0.5893465876579285, + "learning_rate": 1.0647460647460648e-05, + "loss": 0.0627, "step": 10860 }, { - "epoch": 0.8072181791177782, - "grad_norm": 2.374080181121826, - "learning_rate": 2.5156690925293332e-05, - "loss": 0.0523, + "epoch": 3.2283932283932284, + "grad_norm": 1.188922643661499, + "learning_rate": 1.062964062964063e-05, + "loss": 0.066, "step": 10870 }, { - "epoch": 0.8079607901381256, - "grad_norm": 0.5466364622116089, - "learning_rate": 2.5152235259171247e-05, - "loss": 0.0482, + "epoch": 3.2313632313632312, + "grad_norm": 0.6569589376449585, + "learning_rate": 1.0611820611820612e-05, + "loss": 0.0481, "step": 10880 }, { - "epoch": 0.8087034011584732, - "grad_norm": 0.722277045249939, - "learning_rate": 2.5147779593049162e-05, - "loss": 0.0616, + "epoch": 3.2343332343332345, + "grad_norm": 0.7973625063896179, + "learning_rate": 1.0594000594000595e-05, + "loss": 0.0532, "step": 10890 }, { - "epoch": 0.8094460121788207, - "grad_norm": 3.784972906112671, - "learning_rate": 2.5143323926927077e-05, - "loss": 0.0835, + "epoch": 3.2373032373032373, + "grad_norm": 1.0945541858673096, + "learning_rate": 1.0576180576180577e-05, + "loss": 0.0557, "step": 10900 }, { - "epoch": 0.8101886231991683, - "grad_norm": 1.7221379280090332, - "learning_rate": 2.5138868260804992e-05, - "loss": 0.0891, + "epoch": 3.24027324027324, + "grad_norm": 0.6560423374176025, + "learning_rate": 1.0558360558360557e-05, + "loss": 0.048, "step": 10910 }, { - "epoch": 0.8109312342195159, - "grad_norm": 1.2730120420455933, - "learning_rate": 2.5134412594682907e-05, - "loss": 0.078, + "epoch": 3.2432432432432434, + "grad_norm": 0.7343136072158813, + "learning_rate": 1.0540540540540541e-05, + "loss": 0.0588, "step": 10920 }, { - "epoch": 0.8116738452398634, - "grad_norm": 1.8988823890686035, - "learning_rate": 2.512995692856082e-05, - "loss": 0.0918, + "epoch": 3.2462132462132463, + "grad_norm": 0.6441863775253296, + "learning_rate": 1.0522720522720523e-05, + "loss": 0.0501, "step": 10930 }, { - "epoch": 0.8124164562602109, - "grad_norm": 0.7136462926864624, - "learning_rate": 2.5125501262438733e-05, - "loss": 0.047, + "epoch": 3.249183249183249, + "grad_norm": 0.5556747913360596, + "learning_rate": 1.0504900504900505e-05, + "loss": 0.0521, "step": 10940 }, { - "epoch": 0.8131590672805584, - "grad_norm": 1.3096719980239868, - "learning_rate": 2.5121045596316652e-05, - "loss": 0.09, + "epoch": 3.252153252153252, + "grad_norm": 0.636785089969635, + "learning_rate": 1.0487080487080488e-05, + "loss": 0.0593, "step": 10950 }, { - "epoch": 0.813901678300906, - "grad_norm": 1.3436990976333618, - "learning_rate": 2.5116589930194563e-05, - "loss": 0.0795, + "epoch": 3.255123255123255, + "grad_norm": 1.0058456659317017, + "learning_rate": 1.046926046926047e-05, + "loss": 0.0683, "step": 10960 }, { - "epoch": 0.8146442893212535, - "grad_norm": 1.0467826128005981, - "learning_rate": 2.511213426407248e-05, - "loss": 0.0906, + "epoch": 3.258093258093258, + "grad_norm": 0.6078860759735107, + "learning_rate": 1.0451440451440452e-05, + "loss": 0.0525, "step": 10970 }, { - "epoch": 0.8153869003416011, - "grad_norm": 0.924268364906311, - "learning_rate": 2.5107678597950397e-05, - "loss": 0.094, + "epoch": 3.261063261063261, + "grad_norm": 0.675046980381012, + "learning_rate": 1.0433620433620434e-05, + "loss": 0.0538, "step": 10980 }, { - "epoch": 0.8161295113619486, - "grad_norm": 1.8887720108032227, - "learning_rate": 2.510322293182831e-05, - "loss": 0.0878, + "epoch": 3.264033264033264, + "grad_norm": 0.4468725323677063, + "learning_rate": 1.0415800415800416e-05, + "loss": 0.0669, "step": 10990 }, { - "epoch": 0.8168721223822961, - "grad_norm": 3.255546808242798, - "learning_rate": 2.5098767265706223e-05, - "loss": 0.0729, + "epoch": 3.267003267003267, + "grad_norm": 0.7147549986839294, + "learning_rate": 1.0397980397980398e-05, + "loss": 0.0454, "step": 11000 }, { - "epoch": 0.8176147334026437, - "grad_norm": 1.3691035509109497, - "learning_rate": 2.509431159958414e-05, - "loss": 0.0854, + "epoch": 3.2699732699732698, + "grad_norm": 0.8929649591445923, + "learning_rate": 1.038016038016038e-05, + "loss": 0.0399, "step": 11010 }, { - "epoch": 0.8183573444229912, - "grad_norm": 0.6990775465965271, - "learning_rate": 2.5089855933462053e-05, - "loss": 0.046, + "epoch": 3.272943272943273, + "grad_norm": 0.3554942011833191, + "learning_rate": 1.0362340362340363e-05, + "loss": 0.0501, "step": 11020 }, { - "epoch": 0.8190999554433388, - "grad_norm": 2.0553324222564697, - "learning_rate": 2.508540026733997e-05, - "loss": 0.0902, + "epoch": 3.275913275913276, + "grad_norm": 0.4639175236225128, + "learning_rate": 1.0344520344520345e-05, + "loss": 0.0614, "step": 11030 }, { - "epoch": 0.8198425664636864, - "grad_norm": 2.257805109024048, - "learning_rate": 2.508094460121788e-05, - "loss": 0.0885, + "epoch": 3.2788832788832787, + "grad_norm": 0.8245081901550293, + "learning_rate": 1.0326700326700327e-05, + "loss": 0.0581, "step": 11040 }, { - "epoch": 0.8205851774840338, - "grad_norm": 1.704160451889038, - "learning_rate": 2.50764889350958e-05, - "loss": 0.0632, + "epoch": 3.281853281853282, + "grad_norm": 0.9380563497543335, + "learning_rate": 1.030888030888031e-05, + "loss": 0.0677, "step": 11050 }, { - "epoch": 0.8213277885043814, - "grad_norm": 0.8274914622306824, - "learning_rate": 2.5072033268973713e-05, - "loss": 0.0925, + "epoch": 3.284823284823285, + "grad_norm": 1.2146899700164795, + "learning_rate": 1.0291060291060291e-05, + "loss": 0.0511, "step": 11060 }, { - "epoch": 0.8220703995247289, - "grad_norm": 0.8776381015777588, - "learning_rate": 2.5067577602851625e-05, - "loss": 0.0756, + "epoch": 3.2877932877932876, + "grad_norm": 0.7667972445487976, + "learning_rate": 1.0273240273240274e-05, + "loss": 0.0475, "step": 11070 }, { - "epoch": 0.8228130105450765, - "grad_norm": 1.621468424797058, - "learning_rate": 2.5063121936729543e-05, - "loss": 0.0895, + "epoch": 3.290763290763291, + "grad_norm": 0.6132957339286804, + "learning_rate": 1.0255420255420256e-05, + "loss": 0.0539, "step": 11080 }, { - "epoch": 0.8235556215654241, - "grad_norm": 0.5569895505905151, - "learning_rate": 2.5058666270607458e-05, - "loss": 0.0859, + "epoch": 3.2937332937332937, + "grad_norm": 1.2011501789093018, + "learning_rate": 1.0237600237600238e-05, + "loss": 0.0475, "step": 11090 }, { - "epoch": 0.8242982325857716, - "grad_norm": 1.4502453804016113, - "learning_rate": 2.505421060448537e-05, - "loss": 0.113, + "epoch": 3.2967032967032965, + "grad_norm": 0.6205746531486511, + "learning_rate": 1.021978021978022e-05, + "loss": 0.0593, "step": 11100 }, { - "epoch": 0.8250408436061191, - "grad_norm": 2.805652141571045, - "learning_rate": 2.5049754938363285e-05, - "loss": 0.0803, + "epoch": 3.2996732996733, + "grad_norm": 1.1285995244979858, + "learning_rate": 1.0201960201960202e-05, + "loss": 0.0693, "step": 11110 }, { - "epoch": 0.8257834546264666, - "grad_norm": 1.2207911014556885, - "learning_rate": 2.5045299272241203e-05, - "loss": 0.0753, + "epoch": 3.3026433026433026, + "grad_norm": 1.0713976621627808, + "learning_rate": 1.0184140184140184e-05, + "loss": 0.062, "step": 11120 }, { - "epoch": 0.8265260656468142, - "grad_norm": 1.3321232795715332, - "learning_rate": 2.5040843606119115e-05, - "loss": 0.0789, + "epoch": 3.3056133056133055, + "grad_norm": 0.6657689213752747, + "learning_rate": 1.0166320166320167e-05, + "loss": 0.0544, "step": 11130 }, { - "epoch": 0.8272686766671618, - "grad_norm": 0.8445536494255066, - "learning_rate": 2.503638793999703e-05, - "loss": 0.1171, + "epoch": 3.3085833085833087, + "grad_norm": 0.9671308994293213, + "learning_rate": 1.0148500148500149e-05, + "loss": 0.0586, "step": 11140 }, { - "epoch": 0.8280112876875093, - "grad_norm": 1.156607985496521, - "learning_rate": 2.5031932273874945e-05, - "loss": 0.0651, + "epoch": 3.3115533115533116, + "grad_norm": 0.6092202663421631, + "learning_rate": 1.0130680130680131e-05, + "loss": 0.0646, "step": 11150 }, { - "epoch": 0.8287538987078569, - "grad_norm": 2.5844602584838867, - "learning_rate": 2.502747660775286e-05, - "loss": 0.0992, + "epoch": 3.3145233145233144, + "grad_norm": 0.8114497065544128, + "learning_rate": 1.0112860112860113e-05, + "loss": 0.0723, "step": 11160 }, { - "epoch": 0.8294965097282043, - "grad_norm": 2.682854413986206, - "learning_rate": 2.5023020941630775e-05, - "loss": 0.0615, + "epoch": 3.3174933174933177, + "grad_norm": 1.0398253202438354, + "learning_rate": 1.0095040095040095e-05, + "loss": 0.0516, "step": 11170 }, { - "epoch": 0.8302391207485519, - "grad_norm": 1.1782902479171753, - "learning_rate": 2.5018565275508686e-05, - "loss": 0.075, + "epoch": 3.3204633204633205, + "grad_norm": 0.773544430732727, + "learning_rate": 1.0077220077220078e-05, + "loss": 0.0553, "step": 11180 }, { - "epoch": 0.8309817317688994, - "grad_norm": 3.394202709197998, - "learning_rate": 2.5014109609386605e-05, - "loss": 0.1351, + "epoch": 3.3234333234333233, + "grad_norm": 1.1099355220794678, + "learning_rate": 1.005940005940006e-05, + "loss": 0.0662, "step": 11190 }, { - "epoch": 0.831724342789247, - "grad_norm": 2.0263335704803467, - "learning_rate": 2.500965394326452e-05, - "loss": 0.086, + "epoch": 3.3264033264033266, + "grad_norm": 0.7854142189025879, + "learning_rate": 1.0041580041580042e-05, + "loss": 0.0578, "step": 11200 }, { - "epoch": 0.8324669538095946, - "grad_norm": 2.133747100830078, - "learning_rate": 2.500519827714243e-05, - "loss": 0.0788, + "epoch": 3.3293733293733294, + "grad_norm": 0.47787368297576904, + "learning_rate": 1.0023760023760024e-05, + "loss": 0.0634, "step": 11210 }, { - "epoch": 0.8332095648299421, - "grad_norm": 3.7382562160491943, - "learning_rate": 2.500074261102035e-05, - "loss": 0.067, + "epoch": 3.3323433323433322, + "grad_norm": 0.5865157246589661, + "learning_rate": 1.0005940005940006e-05, + "loss": 0.0425, "step": 11220 }, { - "epoch": 0.8339521758502896, - "grad_norm": 1.4864078760147095, - "learning_rate": 2.4996286944898265e-05, - "loss": 0.0822, + "epoch": 3.3353133353133355, + "grad_norm": 0.7458857297897339, + "learning_rate": 9.988119988119988e-06, + "loss": 0.0659, "step": 11230 }, { - "epoch": 0.8346947868706371, - "grad_norm": 2.0430474281311035, - "learning_rate": 2.4991831278776176e-05, - "loss": 0.0774, + "epoch": 3.3382833382833383, + "grad_norm": 0.73867267370224, + "learning_rate": 9.97029997029997e-06, + "loss": 0.0615, "step": 11240 }, { - "epoch": 0.8354373978909847, - "grad_norm": 3.536273956298828, - "learning_rate": 2.498737561265409e-05, - "loss": 0.1103, + "epoch": 3.341253341253341, + "grad_norm": 0.9785704016685486, + "learning_rate": 9.952479952479953e-06, + "loss": 0.0452, "step": 11250 }, { - "epoch": 0.8361800089113323, - "grad_norm": 0.7639611959457397, - "learning_rate": 2.4982919946532006e-05, - "loss": 0.0826, + "epoch": 3.3442233442233444, + "grad_norm": 0.7227851152420044, + "learning_rate": 9.934659934659935e-06, + "loss": 0.0572, "step": 11260 }, { - "epoch": 0.8369226199316798, - "grad_norm": 1.1882314682006836, - "learning_rate": 2.497846428040992e-05, - "loss": 0.0952, + "epoch": 3.3471933471933473, + "grad_norm": 0.6161572933197021, + "learning_rate": 9.916839916839917e-06, + "loss": 0.0457, "step": 11270 }, { - "epoch": 0.8376652309520273, - "grad_norm": 1.9526349306106567, - "learning_rate": 2.4974008614287836e-05, - "loss": 0.0446, + "epoch": 3.35016335016335, + "grad_norm": 0.6703232526779175, + "learning_rate": 9.8990198990199e-06, + "loss": 0.0682, "step": 11280 }, { - "epoch": 0.8384078419723748, - "grad_norm": 2.2650139331817627, - "learning_rate": 2.496955294816575e-05, - "loss": 0.0896, + "epoch": 3.3531333531333534, + "grad_norm": 0.6447526216506958, + "learning_rate": 9.881199881199881e-06, + "loss": 0.0596, "step": 11290 }, { - "epoch": 0.8391504529927224, - "grad_norm": 1.7543269395828247, - "learning_rate": 2.4965097282043666e-05, - "loss": 0.0582, + "epoch": 3.356103356103356, + "grad_norm": 0.46125978231430054, + "learning_rate": 9.863379863379865e-06, + "loss": 0.0659, "step": 11300 }, { - "epoch": 0.83989306401307, - "grad_norm": 1.1936362981796265, - "learning_rate": 2.496064161592158e-05, - "loss": 0.0939, + "epoch": 3.359073359073359, + "grad_norm": 1.2563014030456543, + "learning_rate": 9.845559845559846e-06, + "loss": 0.057, "step": 11310 }, { - "epoch": 0.8406356750334175, - "grad_norm": 2.0941545963287354, - "learning_rate": 2.4956185949799496e-05, - "loss": 0.0691, + "epoch": 3.362043362043362, + "grad_norm": 0.8137726187705994, + "learning_rate": 9.827739827739828e-06, + "loss": 0.0461, "step": 11320 }, { - "epoch": 0.8413782860537651, - "grad_norm": 3.267097234725952, - "learning_rate": 2.495173028367741e-05, - "loss": 0.0826, + "epoch": 3.365013365013365, + "grad_norm": 0.5662651658058167, + "learning_rate": 9.80991980991981e-06, + "loss": 0.0484, "step": 11330 }, { - "epoch": 0.8421208970741125, - "grad_norm": 2.769155263900757, - "learning_rate": 2.4947274617555326e-05, - "loss": 0.0572, + "epoch": 3.367983367983368, + "grad_norm": 0.5154465436935425, + "learning_rate": 9.792099792099792e-06, + "loss": 0.0579, "step": 11340 }, { - "epoch": 0.8428635080944601, - "grad_norm": 0.9428232312202454, - "learning_rate": 2.4942818951433238e-05, - "loss": 0.081, + "epoch": 3.3709533709533708, + "grad_norm": 0.7611321806907654, + "learning_rate": 9.774279774279774e-06, + "loss": 0.0613, "step": 11350 }, { - "epoch": 0.8436061191148077, - "grad_norm": 1.1093528270721436, - "learning_rate": 2.4938363285311156e-05, - "loss": 0.0688, + "epoch": 3.373923373923374, + "grad_norm": 0.8335089087486267, + "learning_rate": 9.756459756459757e-06, + "loss": 0.0493, "step": 11360 }, { - "epoch": 0.8443487301351552, - "grad_norm": 1.8220789432525635, - "learning_rate": 2.4933907619189068e-05, - "loss": 0.0744, + "epoch": 3.376893376893377, + "grad_norm": 1.004220724105835, + "learning_rate": 9.73863973863974e-06, + "loss": 0.0511, "step": 11370 }, { - "epoch": 0.8450913411555028, - "grad_norm": 3.5718438625335693, - "learning_rate": 2.4929451953066983e-05, - "loss": 0.0902, + "epoch": 3.3798633798633797, + "grad_norm": 0.5322721600532532, + "learning_rate": 9.720819720819721e-06, + "loss": 0.0551, "step": 11380 }, { - "epoch": 0.8458339521758503, - "grad_norm": 1.5545248985290527, - "learning_rate": 2.49249962869449e-05, - "loss": 0.1087, + "epoch": 3.382833382833383, + "grad_norm": 0.7684707641601562, + "learning_rate": 9.702999702999703e-06, + "loss": 0.0658, "step": 11390 }, { - "epoch": 0.8465765631961978, - "grad_norm": 1.3270010948181152, - "learning_rate": 2.4920540620822813e-05, - "loss": 0.1128, + "epoch": 3.385803385803386, + "grad_norm": 0.7341310381889343, + "learning_rate": 9.685179685179685e-06, + "loss": 0.0626, "step": 11400 }, { - "epoch": 0.8473191742165453, - "grad_norm": 1.9811359643936157, - "learning_rate": 2.4916084954700728e-05, - "loss": 0.0802, + "epoch": 3.3887733887733886, + "grad_norm": 0.7289576530456543, + "learning_rate": 9.667359667359667e-06, + "loss": 0.0508, "step": 11410 }, { - "epoch": 0.8480617852368929, - "grad_norm": 2.4535109996795654, - "learning_rate": 2.4911629288578643e-05, - "loss": 0.076, + "epoch": 3.391743391743392, + "grad_norm": 0.9594446420669556, + "learning_rate": 9.64953964953965e-06, + "loss": 0.0559, "step": 11420 }, { - "epoch": 0.8488043962572405, - "grad_norm": 2.042264223098755, - "learning_rate": 2.4907173622456558e-05, - "loss": 0.0889, + "epoch": 3.3947133947133947, + "grad_norm": 0.7913696765899658, + "learning_rate": 9.631719631719632e-06, + "loss": 0.0648, "step": 11430 }, { - "epoch": 0.849547007277588, - "grad_norm": 1.1611895561218262, - "learning_rate": 2.4902717956334473e-05, - "loss": 0.0782, + "epoch": 3.3976833976833976, + "grad_norm": 0.8502475619316101, + "learning_rate": 9.613899613899616e-06, + "loss": 0.0531, "step": 11440 }, { - "epoch": 0.8502896182979356, - "grad_norm": 1.567514181137085, - "learning_rate": 2.4898262290212385e-05, - "loss": 0.1065, + "epoch": 3.400653400653401, + "grad_norm": 0.8771364092826843, + "learning_rate": 9.596079596079596e-06, + "loss": 0.0577, "step": 11450 }, { - "epoch": 0.851032229318283, - "grad_norm": 1.7414668798446655, - "learning_rate": 2.4893806624090303e-05, - "loss": 0.0752, + "epoch": 3.4036234036234037, + "grad_norm": 0.6660274267196655, + "learning_rate": 9.578259578259578e-06, + "loss": 0.0496, "step": 11460 }, { - "epoch": 0.8517748403386306, - "grad_norm": 1.3344578742980957, - "learning_rate": 2.4889350957968218e-05, - "loss": 0.0794, + "epoch": 3.4065934065934065, + "grad_norm": 0.7120699286460876, + "learning_rate": 9.56043956043956e-06, + "loss": 0.0533, "step": 11470 }, { - "epoch": 0.8525174513589782, - "grad_norm": 1.340126395225525, - "learning_rate": 2.488489529184613e-05, - "loss": 0.0936, + "epoch": 3.4095634095634098, + "grad_norm": 0.8846875429153442, + "learning_rate": 9.542619542619543e-06, + "loss": 0.0557, "step": 11480 }, { - "epoch": 0.8532600623793257, - "grad_norm": 2.9865872859954834, - "learning_rate": 2.4880439625724048e-05, - "loss": 0.0619, + "epoch": 3.4125334125334126, + "grad_norm": 0.5555750131607056, + "learning_rate": 9.524799524799525e-06, + "loss": 0.049, "step": 11490 }, { - "epoch": 0.8540026733996733, - "grad_norm": 2.3079800605773926, - "learning_rate": 2.4875983959601963e-05, - "loss": 0.0819, + "epoch": 3.4155034155034154, + "grad_norm": 0.6334190964698792, + "learning_rate": 9.506979506979507e-06, + "loss": 0.0679, "step": 11500 }, { - "epoch": 0.8547452844200208, - "grad_norm": 2.029001474380493, - "learning_rate": 2.4871528293479874e-05, - "loss": 0.0964, + "epoch": 3.4184734184734182, + "grad_norm": 0.462439626455307, + "learning_rate": 9.48915948915949e-06, + "loss": 0.0572, "step": 11510 }, { - "epoch": 0.8554878954403683, - "grad_norm": 0.8514242768287659, - "learning_rate": 2.486707262735779e-05, - "loss": 0.1067, + "epoch": 3.4214434214434215, + "grad_norm": 0.6081441640853882, + "learning_rate": 9.471339471339471e-06, + "loss": 0.0487, "step": 11520 }, { - "epoch": 0.8562305064607159, - "grad_norm": 3.7588460445404053, - "learning_rate": 2.4862616961235708e-05, - "loss": 0.0691, + "epoch": 3.4244134244134243, + "grad_norm": 0.7960311770439148, + "learning_rate": 9.453519453519453e-06, + "loss": 0.0628, "step": 11530 }, { - "epoch": 0.8569731174810634, - "grad_norm": 1.4834811687469482, - "learning_rate": 2.485816129511362e-05, - "loss": 0.0713, + "epoch": 3.427383427383427, + "grad_norm": 0.5583157539367676, + "learning_rate": 9.435699435699436e-06, + "loss": 0.0666, "step": 11540 }, { - "epoch": 0.857715728501411, - "grad_norm": 1.4009684324264526, - "learning_rate": 2.4853705628991534e-05, - "loss": 0.1032, + "epoch": 3.4303534303534304, + "grad_norm": 0.8321641683578491, + "learning_rate": 9.417879417879418e-06, + "loss": 0.0527, "step": 11550 }, { - "epoch": 0.8584583395217585, - "grad_norm": 4.322129249572754, - "learning_rate": 2.484924996286945e-05, - "loss": 0.1125, + "epoch": 3.4333234333234333, + "grad_norm": 0.6398223042488098, + "learning_rate": 9.4000594000594e-06, + "loss": 0.0421, "step": 11560 }, { - "epoch": 0.859200950542106, - "grad_norm": 2.336434841156006, - "learning_rate": 2.4844794296747364e-05, - "loss": 0.1049, + "epoch": 3.436293436293436, + "grad_norm": 0.8610657453536987, + "learning_rate": 9.382239382239382e-06, + "loss": 0.0512, "step": 11570 }, { - "epoch": 0.8599435615624536, - "grad_norm": 1.3329766988754272, - "learning_rate": 2.484033863062528e-05, - "loss": 0.0499, + "epoch": 3.4392634392634394, + "grad_norm": 0.8731980919837952, + "learning_rate": 9.364419364419366e-06, + "loss": 0.0529, "step": 11580 }, { - "epoch": 0.8606861725828011, - "grad_norm": 2.4188973903656006, - "learning_rate": 2.483588296450319e-05, - "loss": 0.0634, + "epoch": 3.442233442233442, + "grad_norm": 0.6579751372337341, + "learning_rate": 9.346599346599347e-06, + "loss": 0.0437, "step": 11590 }, { - "epoch": 0.8614287836031487, - "grad_norm": 0.7930353283882141, - "learning_rate": 2.483142729838111e-05, - "loss": 0.0704, + "epoch": 3.445203445203445, + "grad_norm": 1.1427984237670898, + "learning_rate": 9.328779328779329e-06, + "loss": 0.0565, "step": 11600 }, { - "epoch": 0.8621713946234962, - "grad_norm": 1.0637152194976807, - "learning_rate": 2.4826971632259024e-05, - "loss": 0.0958, + "epoch": 3.4481734481734483, + "grad_norm": 0.7703096866607666, + "learning_rate": 9.31095931095931e-06, + "loss": 0.0473, "step": 11610 }, { - "epoch": 0.8629140056438438, - "grad_norm": 1.6092619895935059, - "learning_rate": 2.4822515966136936e-05, - "loss": 0.0967, + "epoch": 3.451143451143451, + "grad_norm": 1.2040685415267944, + "learning_rate": 9.293139293139295e-06, + "loss": 0.0579, "step": 11620 }, { - "epoch": 0.8636566166641912, - "grad_norm": 1.6927438974380493, - "learning_rate": 2.4818060300014854e-05, - "loss": 0.0814, + "epoch": 3.454113454113454, + "grad_norm": 0.7224343419075012, + "learning_rate": 9.275319275319275e-06, + "loss": 0.0523, "step": 11630 }, { - "epoch": 0.8643992276845388, - "grad_norm": 2.1163792610168457, - "learning_rate": 2.481360463389277e-05, - "loss": 0.0845, + "epoch": 3.457083457083457, + "grad_norm": 1.0151997804641724, + "learning_rate": 9.257499257499257e-06, + "loss": 0.0559, "step": 11640 }, { - "epoch": 0.8651418387048864, - "grad_norm": 3.7081539630889893, - "learning_rate": 2.480914896777068e-05, - "loss": 0.0802, + "epoch": 3.46005346005346, + "grad_norm": 0.5172938704490662, + "learning_rate": 9.239679239679241e-06, + "loss": 0.0518, "step": 11650 }, { - "epoch": 0.8658844497252339, - "grad_norm": 0.5612799525260925, - "learning_rate": 2.48046933016486e-05, - "loss": 0.0845, + "epoch": 3.463023463023463, + "grad_norm": 0.864036500453949, + "learning_rate": 9.221859221859222e-06, + "loss": 0.0407, "step": 11660 }, { - "epoch": 0.8666270607455815, - "grad_norm": 3.5866827964782715, - "learning_rate": 2.480023763552651e-05, - "loss": 0.0662, + "epoch": 3.465993465993466, + "grad_norm": 0.603539228439331, + "learning_rate": 9.204039204039204e-06, + "loss": 0.0589, "step": 11670 }, { - "epoch": 0.867369671765929, - "grad_norm": 2.168499231338501, - "learning_rate": 2.4795781969404426e-05, - "loss": 0.077, + "epoch": 3.468963468963469, + "grad_norm": 0.6137470006942749, + "learning_rate": 9.186219186219186e-06, + "loss": 0.0657, "step": 11680 }, { - "epoch": 0.8681122827862765, - "grad_norm": 1.5439636707305908, - "learning_rate": 2.479132630328234e-05, - "loss": 0.0681, + "epoch": 3.471933471933472, + "grad_norm": 0.8344042897224426, + "learning_rate": 9.16839916839917e-06, + "loss": 0.0562, "step": 11690 }, { - "epoch": 0.8688548938066241, - "grad_norm": 5.759429931640625, - "learning_rate": 2.4786870637160256e-05, - "loss": 0.0536, + "epoch": 3.474903474903475, + "grad_norm": 0.9684216976165771, + "learning_rate": 9.15057915057915e-06, + "loss": 0.0482, "step": 11700 }, { - "epoch": 0.8695975048269716, - "grad_norm": 3.6019375324249268, - "learning_rate": 2.478241497103817e-05, - "loss": 0.0706, + "epoch": 3.477873477873478, + "grad_norm": 0.5925269722938538, + "learning_rate": 9.132759132759133e-06, + "loss": 0.0563, "step": 11710 }, { - "epoch": 0.8703401158473192, - "grad_norm": 2.023331880569458, - "learning_rate": 2.4777959304916086e-05, - "loss": 0.1183, + "epoch": 3.4808434808434807, + "grad_norm": 1.1107386350631714, + "learning_rate": 9.114939114939116e-06, + "loss": 0.0585, "step": 11720 }, { - "epoch": 0.8710827268676667, - "grad_norm": 2.2907047271728516, - "learning_rate": 2.4773503638794e-05, - "loss": 0.1194, + "epoch": 3.483813483813484, + "grad_norm": 0.7802149653434753, + "learning_rate": 9.097119097119097e-06, + "loss": 0.0639, "step": 11730 }, { - "epoch": 0.8718253378880143, - "grad_norm": 0.9772320985794067, - "learning_rate": 2.4769047972671916e-05, - "loss": 0.0775, + "epoch": 3.486783486783487, + "grad_norm": 0.9649807214736938, + "learning_rate": 9.079299079299079e-06, + "loss": 0.0687, "step": 11740 }, { - "epoch": 0.8725679489083618, - "grad_norm": 2.4488956928253174, - "learning_rate": 2.476459230654983e-05, - "loss": 0.0775, + "epoch": 3.4897534897534896, + "grad_norm": 0.5262308716773987, + "learning_rate": 9.061479061479061e-06, + "loss": 0.0432, "step": 11750 }, { - "epoch": 0.8733105599287093, - "grad_norm": 1.9681178331375122, - "learning_rate": 2.4760136640427743e-05, - "loss": 0.0738, + "epoch": 3.492723492723493, + "grad_norm": 0.8198150396347046, + "learning_rate": 9.043659043659045e-06, + "loss": 0.0583, "step": 11760 }, { - "epoch": 0.8740531709490569, - "grad_norm": 1.475229024887085, - "learning_rate": 2.475568097430566e-05, - "loss": 0.0754, + "epoch": 3.4956934956934957, + "grad_norm": 1.3013529777526855, + "learning_rate": 9.025839025839026e-06, + "loss": 0.0562, "step": 11770 }, { - "epoch": 0.8747957819694044, - "grad_norm": 2.42449951171875, - "learning_rate": 2.4751225308183573e-05, - "loss": 0.0802, + "epoch": 3.4986634986634986, + "grad_norm": 0.875752329826355, + "learning_rate": 9.008019008019008e-06, + "loss": 0.0499, "step": 11780 }, { - "epoch": 0.875538392989752, - "grad_norm": 1.4891407489776611, - "learning_rate": 2.4746769642061488e-05, - "loss": 0.0627, + "epoch": 3.501633501633502, + "grad_norm": 0.8648601174354553, + "learning_rate": 8.990198990198992e-06, + "loss": 0.0502, "step": 11790 }, { - "epoch": 0.8762810040100996, - "grad_norm": 2.7915236949920654, - "learning_rate": 2.4742313975939406e-05, - "loss": 0.0902, + "epoch": 3.5046035046035047, + "grad_norm": 0.9301609992980957, + "learning_rate": 8.972378972378972e-06, + "loss": 0.0478, "step": 11800 }, { - "epoch": 0.877023615030447, - "grad_norm": 0.7196487188339233, - "learning_rate": 2.4737858309817318e-05, - "loss": 0.0803, + "epoch": 3.5075735075735075, + "grad_norm": 0.9498510360717773, + "learning_rate": 8.954558954558954e-06, + "loss": 0.0503, "step": 11810 }, { - "epoch": 0.8777662260507946, - "grad_norm": 2.8779609203338623, - "learning_rate": 2.4733402643695233e-05, - "loss": 0.0804, + "epoch": 3.5105435105435108, + "grad_norm": 0.5462694764137268, + "learning_rate": 8.936738936738936e-06, + "loss": 0.0575, "step": 11820 }, { - "epoch": 0.8785088370711421, - "grad_norm": 1.198697566986084, - "learning_rate": 2.4728946977573148e-05, - "loss": 0.0507, + "epoch": 3.5135135135135136, + "grad_norm": 0.8553217649459839, + "learning_rate": 8.91891891891892e-06, + "loss": 0.0655, "step": 11830 }, { - "epoch": 0.8792514480914897, - "grad_norm": 2.312344789505005, - "learning_rate": 2.4724491311451063e-05, - "loss": 0.0869, + "epoch": 3.5164835164835164, + "grad_norm": 0.9228159189224243, + "learning_rate": 8.9010989010989e-06, + "loss": 0.0611, "step": 11840 }, { - "epoch": 0.8799940591118373, - "grad_norm": 1.2055100202560425, - "learning_rate": 2.4720035645328978e-05, - "loss": 0.0791, + "epoch": 3.5194535194535197, + "grad_norm": 0.6614230275154114, + "learning_rate": 8.883278883278883e-06, + "loss": 0.0465, "step": 11850 }, { - "epoch": 0.8807366701321847, - "grad_norm": 1.9583430290222168, - "learning_rate": 2.4715579979206893e-05, - "loss": 0.0704, + "epoch": 3.5224235224235225, + "grad_norm": 0.9171364307403564, + "learning_rate": 8.865458865458867e-06, + "loss": 0.0524, "step": 11860 }, { - "epoch": 0.8814792811525323, - "grad_norm": 3.8078420162200928, - "learning_rate": 2.4711124313084808e-05, - "loss": 0.0853, + "epoch": 3.5253935253935254, + "grad_norm": 0.4737289249897003, + "learning_rate": 8.847638847638847e-06, + "loss": 0.0688, "step": 11870 }, { - "epoch": 0.8822218921728798, - "grad_norm": 1.3811652660369873, - "learning_rate": 2.4706668646962723e-05, - "loss": 0.0972, + "epoch": 3.5283635283635286, + "grad_norm": 0.79871666431427, + "learning_rate": 8.82981882981883e-06, + "loss": 0.0556, "step": 11880 }, { - "epoch": 0.8829645031932274, - "grad_norm": 0.9326895475387573, - "learning_rate": 2.4702212980840634e-05, - "loss": 0.0777, + "epoch": 3.5313335313335315, + "grad_norm": 0.9557964205741882, + "learning_rate": 8.811998811998812e-06, + "loss": 0.0592, "step": 11890 }, { - "epoch": 0.883707114213575, - "grad_norm": 1.280218243598938, - "learning_rate": 2.4697757314718552e-05, - "loss": 0.0856, + "epoch": 3.5343035343035343, + "grad_norm": 0.9244213104248047, + "learning_rate": 8.794178794178795e-06, + "loss": 0.0513, "step": 11900 }, { - "epoch": 0.8844497252339225, - "grad_norm": 3.6391515731811523, - "learning_rate": 2.4693301648596467e-05, - "loss": 0.0919, + "epoch": 3.5372735372735375, + "grad_norm": 0.5867542624473572, + "learning_rate": 8.776358776358776e-06, + "loss": 0.0504, "step": 11910 }, { - "epoch": 0.88519233625427, - "grad_norm": 1.1083297729492188, - "learning_rate": 2.468884598247438e-05, - "loss": 0.0576, + "epoch": 3.5402435402435404, + "grad_norm": 0.47742247581481934, + "learning_rate": 8.758538758538758e-06, + "loss": 0.0529, "step": 11920 }, { - "epoch": 0.8859349472746175, - "grad_norm": 1.3229732513427734, - "learning_rate": 2.4684390316352294e-05, - "loss": 0.0891, + "epoch": 3.543213543213543, + "grad_norm": 1.0986418724060059, + "learning_rate": 8.740718740718742e-06, + "loss": 0.0546, "step": 11930 }, { - "epoch": 0.8866775582949651, - "grad_norm": 1.0628166198730469, - "learning_rate": 2.4679934650230212e-05, - "loss": 0.1007, + "epoch": 3.546183546183546, + "grad_norm": 0.7719411253929138, + "learning_rate": 8.722898722898724e-06, + "loss": 0.0564, "step": 11940 }, { - "epoch": 0.8874201693153126, - "grad_norm": 1.2441374063491821, - "learning_rate": 2.4675478984108124e-05, - "loss": 0.0989, + "epoch": 3.5491535491535493, + "grad_norm": 0.9818161129951477, + "learning_rate": 8.705078705078705e-06, + "loss": 0.0691, "step": 11950 }, { - "epoch": 0.8881627803356602, - "grad_norm": 0.5451275110244751, - "learning_rate": 2.467102331798604e-05, - "loss": 0.0576, + "epoch": 3.552123552123552, + "grad_norm": 0.6486049294471741, + "learning_rate": 8.687258687258687e-06, + "loss": 0.0662, "step": 11960 }, { - "epoch": 0.8889053913560078, - "grad_norm": 0.9148317575454712, - "learning_rate": 2.4666567651863954e-05, - "loss": 0.0591, + "epoch": 3.555093555093555, + "grad_norm": 0.6224350929260254, + "learning_rate": 8.66943866943867e-06, + "loss": 0.0565, "step": 11970 }, { - "epoch": 0.8896480023763552, - "grad_norm": 0.7988538146018982, - "learning_rate": 2.466211198574187e-05, - "loss": 0.0683, + "epoch": 3.5580635580635582, + "grad_norm": 1.0095922946929932, + "learning_rate": 8.651618651618651e-06, + "loss": 0.0609, "step": 11980 }, { - "epoch": 0.8903906133967028, - "grad_norm": 2.098226547241211, - "learning_rate": 2.4657656319619784e-05, - "loss": 0.0964, + "epoch": 3.561033561033561, + "grad_norm": 0.5099046230316162, + "learning_rate": 8.633798633798633e-06, + "loss": 0.0578, "step": 11990 }, { - "epoch": 0.8911332244170503, - "grad_norm": 2.0330681800842285, - "learning_rate": 2.4653200653497696e-05, - "loss": 0.0865, + "epoch": 3.564003564003564, + "grad_norm": 0.9583487510681152, + "learning_rate": 8.615978615978617e-06, + "loss": 0.0751, "step": 12000 }, { - "epoch": 0.8918758354373979, - "grad_norm": 2.2921535968780518, - "learning_rate": 2.4648744987375614e-05, - "loss": 0.077, + "epoch": 3.5669735669735667, + "grad_norm": 0.8922184109687805, + "learning_rate": 8.5981585981586e-06, + "loss": 0.0469, "step": 12010 }, { - "epoch": 0.8926184464577455, - "grad_norm": 3.7081544399261475, - "learning_rate": 2.464428932125353e-05, - "loss": 0.099, + "epoch": 3.56994356994357, + "grad_norm": 0.6828520894050598, + "learning_rate": 8.58033858033858e-06, + "loss": 0.0482, "step": 12020 }, { - "epoch": 0.893361057478093, - "grad_norm": 0.6023477911949158, - "learning_rate": 2.463983365513144e-05, - "loss": 0.0599, + "epoch": 3.572913572913573, + "grad_norm": 1.372278094291687, + "learning_rate": 8.562518562518562e-06, + "loss": 0.0606, "step": 12030 }, { - "epoch": 0.8941036684984405, - "grad_norm": 1.6796938180923462, - "learning_rate": 2.463537798900936e-05, - "loss": 0.0984, + "epoch": 3.5758835758835756, + "grad_norm": 0.5054883360862732, + "learning_rate": 8.544698544698546e-06, + "loss": 0.0465, "step": 12040 }, { - "epoch": 0.894846279518788, - "grad_norm": 4.132201194763184, - "learning_rate": 2.4630922322887274e-05, - "loss": 0.0771, + "epoch": 3.578853578853579, + "grad_norm": 0.564597487449646, + "learning_rate": 8.526878526878526e-06, + "loss": 0.0516, "step": 12050 }, { - "epoch": 0.8955888905391356, - "grad_norm": 2.146115303039551, - "learning_rate": 2.4626466656765186e-05, - "loss": 0.0808, + "epoch": 3.5818235818235817, + "grad_norm": 0.6574399471282959, + "learning_rate": 8.509058509058509e-06, + "loss": 0.0511, "step": 12060 }, { - "epoch": 0.8963315015594832, - "grad_norm": 0.9783619046211243, - "learning_rate": 2.4622010990643104e-05, - "loss": 0.0676, + "epoch": 3.5847935847935846, + "grad_norm": 0.4628748893737793, + "learning_rate": 8.491238491238492e-06, + "loss": 0.0494, "step": 12070 }, { - "epoch": 0.8970741125798307, - "grad_norm": 0.4546336829662323, - "learning_rate": 2.4617555324521016e-05, - "loss": 0.092, + "epoch": 3.587763587763588, + "grad_norm": 0.957274854183197, + "learning_rate": 8.473418473418475e-06, + "loss": 0.0558, "step": 12080 }, { - "epoch": 0.8978167236001783, - "grad_norm": 1.2638888359069824, - "learning_rate": 2.461309965839893e-05, - "loss": 0.0672, + "epoch": 3.5907335907335907, + "grad_norm": 0.9441186785697937, + "learning_rate": 8.455598455598455e-06, + "loss": 0.0572, "step": 12090 }, { - "epoch": 0.8985593346205257, - "grad_norm": 1.3266007900238037, - "learning_rate": 2.4608643992276846e-05, - "loss": 0.0567, + "epoch": 3.5937035937035935, + "grad_norm": 0.7726499438285828, + "learning_rate": 8.437778437778437e-06, + "loss": 0.058, "step": 12100 }, { - "epoch": 0.8993019456408733, - "grad_norm": 2.1246678829193115, - "learning_rate": 2.460418832615476e-05, - "loss": 0.0795, + "epoch": 3.5966735966735968, + "grad_norm": 0.7733856439590454, + "learning_rate": 8.419958419958421e-06, + "loss": 0.0383, "step": 12110 }, { - "epoch": 0.9000445566612209, - "grad_norm": 2.1990578174591064, - "learning_rate": 2.4599732660032676e-05, - "loss": 0.0881, + "epoch": 3.5996435996435996, + "grad_norm": 0.8538611531257629, + "learning_rate": 8.402138402138402e-06, + "loss": 0.0647, "step": 12120 }, { - "epoch": 0.9007871676815684, - "grad_norm": 0.5446377992630005, - "learning_rate": 2.459527699391059e-05, - "loss": 0.0697, + "epoch": 3.6026136026136024, + "grad_norm": 0.826570451259613, + "learning_rate": 8.384318384318384e-06, + "loss": 0.0521, "step": 12130 }, { - "epoch": 0.901529778701916, - "grad_norm": 2.7443840503692627, - "learning_rate": 2.4590821327788506e-05, - "loss": 0.0912, + "epoch": 3.6055836055836057, + "grad_norm": 1.6991403102874756, + "learning_rate": 8.366498366498368e-06, + "loss": 0.058, "step": 12140 }, { - "epoch": 0.9022723897222634, - "grad_norm": 1.4836909770965576, - "learning_rate": 2.458636566166642e-05, - "loss": 0.0904, + "epoch": 3.6085536085536085, + "grad_norm": 0.7238519191741943, + "learning_rate": 8.34867834867835e-06, + "loss": 0.0655, "step": 12150 }, { - "epoch": 0.903015000742611, - "grad_norm": 0.3852311968803406, - "learning_rate": 2.4581909995544336e-05, - "loss": 0.0695, + "epoch": 3.6115236115236113, + "grad_norm": 0.7092881798744202, + "learning_rate": 8.33085833085833e-06, + "loss": 0.0623, "step": 12160 }, { - "epoch": 0.9037576117629585, - "grad_norm": 1.652395248413086, - "learning_rate": 2.4577454329422247e-05, - "loss": 0.0836, + "epoch": 3.6144936144936146, + "grad_norm": 0.5333400964736938, + "learning_rate": 8.313038313038312e-06, + "loss": 0.0503, "step": 12170 }, { - "epoch": 0.9045002227833061, - "grad_norm": 3.4490652084350586, - "learning_rate": 2.4572998663300166e-05, - "loss": 0.0974, + "epoch": 3.6174636174636174, + "grad_norm": 0.9490695595741272, + "learning_rate": 8.295218295218296e-06, + "loss": 0.0495, "step": 12180 }, { - "epoch": 0.9052428338036537, - "grad_norm": 1.1233237981796265, - "learning_rate": 2.4568542997178077e-05, - "loss": 0.0968, + "epoch": 3.6204336204336203, + "grad_norm": 0.5925624370574951, + "learning_rate": 8.277398277398278e-06, + "loss": 0.0575, "step": 12190 }, { - "epoch": 0.9059854448240012, - "grad_norm": 1.1226853132247925, - "learning_rate": 2.4564087331055992e-05, - "loss": 0.0837, + "epoch": 3.6234036234036235, + "grad_norm": 0.8142616748809814, + "learning_rate": 8.259578259578259e-06, + "loss": 0.0561, "step": 12200 }, { - "epoch": 0.9067280558443487, - "grad_norm": 6.846561908721924, - "learning_rate": 2.455963166493391e-05, - "loss": 0.0965, + "epoch": 3.6263736263736264, + "grad_norm": 0.8163090348243713, + "learning_rate": 8.241758241758243e-06, + "loss": 0.0576, "step": 12210 }, { - "epoch": 0.9074706668646962, - "grad_norm": 2.6397814750671387, - "learning_rate": 2.4555175998811822e-05, - "loss": 0.074, + "epoch": 3.629343629343629, + "grad_norm": 0.8602144122123718, + "learning_rate": 8.223938223938225e-06, + "loss": 0.0543, "step": 12220 }, { - "epoch": 0.9082132778850438, - "grad_norm": 1.7175049781799316, - "learning_rate": 2.4550720332689737e-05, - "loss": 0.076, + "epoch": 3.6323136323136325, + "grad_norm": 0.5057087540626526, + "learning_rate": 8.206118206118205e-06, + "loss": 0.0658, "step": 12230 }, { - "epoch": 0.9089558889053914, - "grad_norm": 1.3105518817901611, - "learning_rate": 2.4546264666567652e-05, - "loss": 0.0952, + "epoch": 3.6352836352836353, + "grad_norm": 1.0046048164367676, + "learning_rate": 8.188298188298188e-06, + "loss": 0.0546, "step": 12240 }, { - "epoch": 0.9096984999257389, - "grad_norm": 3.014943838119507, - "learning_rate": 2.4541809000445567e-05, - "loss": 0.089, + "epoch": 3.638253638253638, + "grad_norm": 0.5388000011444092, + "learning_rate": 8.170478170478171e-06, + "loss": 0.0535, "step": 12250 }, { - "epoch": 0.9104411109460865, - "grad_norm": 1.1072237491607666, - "learning_rate": 2.4537353334323482e-05, - "loss": 0.0823, + "epoch": 3.6412236412236414, + "grad_norm": 0.31448933482170105, + "learning_rate": 8.152658152658154e-06, + "loss": 0.0561, "step": 12260 }, { - "epoch": 0.9111837219664339, - "grad_norm": 1.2074459791183472, - "learning_rate": 2.4532897668201397e-05, - "loss": 0.0787, + "epoch": 3.644193644193644, + "grad_norm": 0.7060285210609436, + "learning_rate": 8.134838134838134e-06, + "loss": 0.0628, "step": 12270 }, { - "epoch": 0.9119263329867815, - "grad_norm": 0.8429141640663147, - "learning_rate": 2.4528442002079312e-05, - "loss": 0.1024, + "epoch": 3.647163647163647, + "grad_norm": 0.44246843457221985, + "learning_rate": 8.117018117018118e-06, + "loss": 0.0549, "step": 12280 }, { - "epoch": 0.9126689440071291, - "grad_norm": 2.6227517127990723, - "learning_rate": 2.4523986335957227e-05, - "loss": 0.0799, + "epoch": 3.6501336501336503, + "grad_norm": 0.8844314813613892, + "learning_rate": 8.0991980991981e-06, + "loss": 0.0525, "step": 12290 }, { - "epoch": 0.9134115550274766, - "grad_norm": 0.7948519587516785, - "learning_rate": 2.451953066983514e-05, - "loss": 0.0722, + "epoch": 3.653103653103653, + "grad_norm": 1.1188846826553345, + "learning_rate": 8.08137808137808e-06, + "loss": 0.0563, "step": 12300 }, { - "epoch": 0.9141541660478242, - "grad_norm": 1.5158371925354004, - "learning_rate": 2.4515075003713057e-05, - "loss": 0.0938, + "epoch": 3.656073656073656, + "grad_norm": 0.7898098230361938, + "learning_rate": 8.063558063558063e-06, + "loss": 0.0571, "step": 12310 }, { - "epoch": 0.9148967770681717, - "grad_norm": 1.2404049634933472, - "learning_rate": 2.4510619337590972e-05, - "loss": 0.1068, + "epoch": 3.6590436590436592, + "grad_norm": 0.5534541606903076, + "learning_rate": 8.045738045738047e-06, + "loss": 0.0648, "step": 12320 }, { - "epoch": 0.9156393880885192, - "grad_norm": 1.7605699300765991, - "learning_rate": 2.4506163671468884e-05, - "loss": 0.0894, + "epoch": 3.662013662013662, + "grad_norm": 0.5332062840461731, + "learning_rate": 8.027918027918029e-06, + "loss": 0.0674, "step": 12330 }, { - "epoch": 0.9163819991088668, - "grad_norm": 1.6435573101043701, - "learning_rate": 2.45017080053468e-05, - "loss": 0.0742, + "epoch": 3.664983664983665, + "grad_norm": 1.169073224067688, + "learning_rate": 8.01009801009801e-06, + "loss": 0.0532, "step": 12340 }, { - "epoch": 0.9171246101292143, - "grad_norm": 0.6193175911903381, - "learning_rate": 2.4497252339224717e-05, - "loss": 0.0551, + "epoch": 3.667953667953668, + "grad_norm": 0.5271252393722534, + "learning_rate": 7.992277992277993e-06, + "loss": 0.0593, "step": 12350 }, { - "epoch": 0.9178672211495619, - "grad_norm": 2.4157028198242188, - "learning_rate": 2.449279667310263e-05, - "loss": 0.0889, + "epoch": 3.670923670923671, + "grad_norm": 0.8392048478126526, + "learning_rate": 7.974457974457975e-06, + "loss": 0.0468, "step": 12360 }, { - "epoch": 0.9186098321699094, - "grad_norm": 3.2014384269714355, - "learning_rate": 2.4488341006980544e-05, - "loss": 0.0777, + "epoch": 3.673893673893674, + "grad_norm": 0.7559081315994263, + "learning_rate": 7.956637956637956e-06, + "loss": 0.0476, "step": 12370 }, { - "epoch": 0.919352443190257, - "grad_norm": 2.560277223587036, - "learning_rate": 2.448388534085846e-05, - "loss": 0.0892, + "epoch": 3.676863676863677, + "grad_norm": 0.6734028458595276, + "learning_rate": 7.938817938817938e-06, + "loss": 0.0532, "step": 12380 }, { - "epoch": 0.9200950542106044, - "grad_norm": 1.1629691123962402, - "learning_rate": 2.4479429674736374e-05, - "loss": 0.0693, + "epoch": 3.67983367983368, + "grad_norm": 0.7153018712997437, + "learning_rate": 7.920997920997922e-06, + "loss": 0.0597, "step": 12390 }, { - "epoch": 0.920837665230952, - "grad_norm": 0.7739498019218445, - "learning_rate": 2.447497400861429e-05, - "loss": 0.0751, + "epoch": 3.6828036828036828, + "grad_norm": 0.7017358541488647, + "learning_rate": 7.903177903177904e-06, + "loss": 0.0642, "step": 12400 }, { - "epoch": 0.9215802762512996, - "grad_norm": 3.0681796073913574, - "learning_rate": 2.44705183424922e-05, - "loss": 0.1, + "epoch": 3.685773685773686, + "grad_norm": 0.8669632077217102, + "learning_rate": 7.885357885357884e-06, + "loss": 0.0521, "step": 12410 }, { - "epoch": 0.9223228872716471, - "grad_norm": 1.9392578601837158, - "learning_rate": 2.446606267637012e-05, - "loss": 0.0949, + "epoch": 3.688743688743689, + "grad_norm": 0.5131626725196838, + "learning_rate": 7.867537867537868e-06, + "loss": 0.0578, "step": 12420 }, { - "epoch": 0.9230654982919947, - "grad_norm": 1.269616723060608, - "learning_rate": 2.4461607010248034e-05, - "loss": 0.0821, + "epoch": 3.6917136917136917, + "grad_norm": 1.0312336683273315, + "learning_rate": 7.84971784971785e-06, + "loss": 0.0641, "step": 12430 }, { - "epoch": 0.9238081093123421, - "grad_norm": 0.9152816534042358, - "learning_rate": 2.4457151344125945e-05, - "loss": 0.0905, + "epoch": 3.694683694683695, + "grad_norm": 0.620993435382843, + "learning_rate": 7.831897831897831e-06, + "loss": 0.0577, "step": 12440 }, { - "epoch": 0.9245507203326897, - "grad_norm": 1.6232317686080933, - "learning_rate": 2.4452695678003864e-05, - "loss": 0.0869, + "epoch": 3.697653697653698, + "grad_norm": 0.9431599974632263, + "learning_rate": 7.814077814077813e-06, + "loss": 0.054, "step": 12450 }, { - "epoch": 0.9252933313530373, - "grad_norm": 3.454188585281372, - "learning_rate": 2.444824001188178e-05, - "loss": 0.0908, + "epoch": 3.7006237006237006, + "grad_norm": 0.5963833928108215, + "learning_rate": 7.796257796257797e-06, + "loss": 0.0617, "step": 12460 }, { - "epoch": 0.9260359423733848, - "grad_norm": 1.880387783050537, - "learning_rate": 2.444378434575969e-05, - "loss": 0.0705, + "epoch": 3.7035937035937034, + "grad_norm": 0.8309038877487183, + "learning_rate": 7.77843777843778e-06, + "loss": 0.064, "step": 12470 }, { - "epoch": 0.9267785533937324, - "grad_norm": 2.0626840591430664, - "learning_rate": 2.443932867963761e-05, - "loss": 0.0897, + "epoch": 3.7065637065637067, + "grad_norm": 0.38153156638145447, + "learning_rate": 7.76061776061776e-06, + "loss": 0.0574, "step": 12480 }, { - "epoch": 0.92752116441408, - "grad_norm": 1.5957422256469727, - "learning_rate": 2.443487301351552e-05, - "loss": 0.0902, + "epoch": 3.7095337095337095, + "grad_norm": 0.6947796940803528, + "learning_rate": 7.742797742797744e-06, + "loss": 0.0582, "step": 12490 }, { - "epoch": 0.9282637754344274, - "grad_norm": 3.2366816997528076, - "learning_rate": 2.4430417347393435e-05, - "loss": 0.0757, + "epoch": 3.7125037125037124, + "grad_norm": 0.29392537474632263, + "learning_rate": 7.724977724977726e-06, + "loss": 0.0599, "step": 12500 }, { - "epoch": 0.929006386454775, - "grad_norm": 0.9479996562004089, - "learning_rate": 2.442596168127135e-05, - "loss": 0.0631, + "epoch": 3.7154737154737156, + "grad_norm": 0.7719232439994812, + "learning_rate": 7.707157707157708e-06, + "loss": 0.0454, "step": 12510 }, { - "epoch": 0.9297489974751225, - "grad_norm": 1.3636139631271362, - "learning_rate": 2.4421506015149265e-05, - "loss": 0.0845, + "epoch": 3.7184437184437185, + "grad_norm": 0.8208956718444824, + "learning_rate": 7.689337689337688e-06, + "loss": 0.0627, "step": 12520 }, { - "epoch": 0.9304916084954701, - "grad_norm": 1.1171748638153076, - "learning_rate": 2.441705034902718e-05, - "loss": 0.0786, + "epoch": 3.7214137214137213, + "grad_norm": 0.7686058878898621, + "learning_rate": 7.671517671517672e-06, + "loss": 0.055, "step": 12530 }, { - "epoch": 0.9312342195158176, - "grad_norm": 1.6914044618606567, - "learning_rate": 2.4412594682905095e-05, - "loss": 0.0919, + "epoch": 3.724383724383724, + "grad_norm": 0.7790077328681946, + "learning_rate": 7.653697653697654e-06, + "loss": 0.0595, "step": 12540 }, { - "epoch": 0.9319768305361652, - "grad_norm": 1.3057868480682373, - "learning_rate": 2.440813901678301e-05, - "loss": 0.079, + "epoch": 3.7273537273537274, + "grad_norm": 0.5429967045783997, + "learning_rate": 7.635877635877635e-06, + "loss": 0.0596, "step": 12550 }, { - "epoch": 0.9327194415565127, - "grad_norm": 4.414134979248047, - "learning_rate": 2.4403683350660925e-05, - "loss": 0.0893, + "epoch": 3.73032373032373, + "grad_norm": 0.4277282655239105, + "learning_rate": 7.618057618057619e-06, + "loss": 0.057, "step": 12560 }, { - "epoch": 0.9334620525768602, - "grad_norm": 0.9300063848495483, - "learning_rate": 2.439922768453884e-05, - "loss": 0.1079, + "epoch": 3.733293733293733, + "grad_norm": 0.9140810966491699, + "learning_rate": 7.600237600237601e-06, + "loss": 0.0572, "step": 12570 }, { - "epoch": 0.9342046635972078, - "grad_norm": 0.956235408782959, - "learning_rate": 2.4394772018416752e-05, - "loss": 0.1299, + "epoch": 3.7362637362637363, + "grad_norm": 0.9258958697319031, + "learning_rate": 7.582417582417582e-06, + "loss": 0.0467, "step": 12580 }, { - "epoch": 0.9349472746175553, - "grad_norm": 1.3673025369644165, - "learning_rate": 2.439031635229467e-05, - "loss": 0.0704, + "epoch": 3.739233739233739, + "grad_norm": 0.4420478045940399, + "learning_rate": 7.564597564597564e-06, + "loss": 0.0633, "step": 12590 }, { - "epoch": 0.9356898856379029, - "grad_norm": 0.966641366481781, - "learning_rate": 2.4385860686172582e-05, - "loss": 0.0806, + "epoch": 3.742203742203742, + "grad_norm": 0.642549455165863, + "learning_rate": 7.546777546777547e-06, + "loss": 0.059, "step": 12600 }, { - "epoch": 0.9364324966582505, - "grad_norm": 1.7886812686920166, - "learning_rate": 2.4381405020050497e-05, - "loss": 0.0621, + "epoch": 3.7451737451737452, + "grad_norm": 0.9195041060447693, + "learning_rate": 7.528957528957529e-06, + "loss": 0.059, "step": 12610 }, { - "epoch": 0.9371751076785979, - "grad_norm": 1.5795032978057861, - "learning_rate": 2.4376949353928415e-05, - "loss": 0.0645, + "epoch": 3.748143748143748, + "grad_norm": 0.7634923458099365, + "learning_rate": 7.511137511137511e-06, + "loss": 0.0476, "step": 12620 }, { - "epoch": 0.9379177186989455, - "grad_norm": 0.8962666392326355, - "learning_rate": 2.4372493687806327e-05, - "loss": 0.0825, + "epoch": 3.751113751113751, + "grad_norm": 0.7769595980644226, + "learning_rate": 7.493317493317493e-06, + "loss": 0.0573, "step": 12630 }, { - "epoch": 0.938660329719293, - "grad_norm": 1.669140100479126, - "learning_rate": 2.4368038021684242e-05, - "loss": 0.1027, + "epoch": 3.754083754083754, + "grad_norm": 0.8632429242134094, + "learning_rate": 7.475497475497476e-06, + "loss": 0.0769, "step": 12640 }, { - "epoch": 0.9394029407396406, - "grad_norm": 0.6645317077636719, - "learning_rate": 2.4363582355562157e-05, - "loss": 0.0672, + "epoch": 3.757053757053757, + "grad_norm": 0.7088262438774109, + "learning_rate": 7.457677457677457e-06, + "loss": 0.0542, "step": 12650 }, { - "epoch": 0.9401455517599882, - "grad_norm": 1.4275974035263062, - "learning_rate": 2.4359126689440072e-05, - "loss": 0.094, + "epoch": 3.76002376002376, + "grad_norm": 0.7802964448928833, + "learning_rate": 7.4398574398574404e-06, + "loss": 0.0541, "step": 12660 }, { - "epoch": 0.9408881627803357, - "grad_norm": 1.8758856058120728, - "learning_rate": 2.4354671023317987e-05, - "loss": 0.0911, + "epoch": 3.762993762993763, + "grad_norm": 0.8181250095367432, + "learning_rate": 7.422037422037423e-06, + "loss": 0.0555, "step": 12670 }, { - "epoch": 0.9416307738006832, - "grad_norm": 1.4419819116592407, - "learning_rate": 2.4350215357195902e-05, - "loss": 0.0613, + "epoch": 3.765963765963766, + "grad_norm": 0.48934659361839294, + "learning_rate": 7.404217404217404e-06, + "loss": 0.0552, "step": 12680 }, { - "epoch": 0.9423733848210307, - "grad_norm": 1.668791651725769, - "learning_rate": 2.4345759691073817e-05, - "loss": 0.067, + "epoch": 3.7689337689337687, + "grad_norm": 0.7606164216995239, + "learning_rate": 7.386397386397387e-06, + "loss": 0.052, "step": 12690 }, { - "epoch": 0.9431159958413783, - "grad_norm": 1.9676769971847534, - "learning_rate": 2.4341304024951732e-05, - "loss": 0.0881, + "epoch": 3.771903771903772, + "grad_norm": 0.5713620185852051, + "learning_rate": 7.368577368577368e-06, + "loss": 0.0497, "step": 12700 }, { - "epoch": 0.9438586068617258, - "grad_norm": 0.5292758941650391, - "learning_rate": 2.4336848358829643e-05, - "loss": 0.0722, + "epoch": 3.774873774873775, + "grad_norm": 0.4329434037208557, + "learning_rate": 7.350757350757351e-06, + "loss": 0.0508, "step": 12710 }, { - "epoch": 0.9446012178820734, - "grad_norm": 2.253980875015259, - "learning_rate": 2.433239269270756e-05, - "loss": 0.0689, + "epoch": 3.7778437778437777, + "grad_norm": 0.5861251950263977, + "learning_rate": 7.332937332937333e-06, + "loss": 0.0574, "step": 12720 }, { - "epoch": 0.9453438289024209, - "grad_norm": 2.0630314350128174, - "learning_rate": 2.4327937026585477e-05, - "loss": 0.0867, + "epoch": 3.780813780813781, + "grad_norm": 0.9137320518493652, + "learning_rate": 7.315117315117316e-06, + "loss": 0.0473, "step": 12730 }, { - "epoch": 0.9460864399227684, - "grad_norm": 0.9975630640983582, - "learning_rate": 2.4323481360463388e-05, - "loss": 0.1088, + "epoch": 3.7837837837837838, + "grad_norm": 0.4197285771369934, + "learning_rate": 7.297297297297298e-06, + "loss": 0.0557, "step": 12740 }, { - "epoch": 0.946829050943116, - "grad_norm": 1.6637675762176514, - "learning_rate": 2.4319025694341303e-05, - "loss": 0.103, + "epoch": 3.7867537867537866, + "grad_norm": 0.8672818541526794, + "learning_rate": 7.27947727947728e-06, + "loss": 0.0547, "step": 12750 }, { - "epoch": 0.9475716619634635, - "grad_norm": 1.508355975151062, - "learning_rate": 2.431457002821922e-05, - "loss": 0.1022, + "epoch": 3.78972378972379, + "grad_norm": 1.1099815368652344, + "learning_rate": 7.261657261657262e-06, + "loss": 0.0547, "step": 12760 }, { - "epoch": 0.9483142729838111, - "grad_norm": 1.989896297454834, - "learning_rate": 2.4310114362097133e-05, - "loss": 0.0647, + "epoch": 3.7926937926937927, + "grad_norm": 0.6481065154075623, + "learning_rate": 7.2438372438372435e-06, + "loss": 0.0552, "step": 12770 }, { - "epoch": 0.9490568840041587, - "grad_norm": 0.7862587571144104, - "learning_rate": 2.4305658695975048e-05, - "loss": 0.0754, + "epoch": 3.7956637956637955, + "grad_norm": 0.6549276113510132, + "learning_rate": 7.2260172260172265e-06, + "loss": 0.052, "step": 12780 }, { - "epoch": 0.9497994950245061, - "grad_norm": 0.7664479613304138, - "learning_rate": 2.4301203029852967e-05, - "loss": 0.0759, + "epoch": 3.798633798633799, + "grad_norm": 1.2700341939926147, + "learning_rate": 7.208197208197208e-06, + "loss": 0.052, "step": 12790 }, { - "epoch": 0.9505421060448537, - "grad_norm": 1.1246778964996338, - "learning_rate": 2.4296747363730878e-05, - "loss": 0.0671, + "epoch": 3.8016038016038016, + "grad_norm": 0.7553129196166992, + "learning_rate": 7.190377190377191e-06, + "loss": 0.0506, "step": 12800 }, { - "epoch": 0.9512847170652012, - "grad_norm": 1.6385598182678223, - "learning_rate": 2.4292291697608793e-05, - "loss": 0.1062, + "epoch": 3.8045738045738045, + "grad_norm": 0.6122118830680847, + "learning_rate": 7.172557172557173e-06, + "loss": 0.0588, "step": 12810 }, { - "epoch": 0.9520273280855488, - "grad_norm": 1.8573966026306152, - "learning_rate": 2.4287836031486705e-05, - "loss": 0.0691, + "epoch": 3.8075438075438077, + "grad_norm": 1.0293030738830566, + "learning_rate": 7.154737154737155e-06, + "loss": 0.0626, "step": 12820 }, { - "epoch": 0.9527699391058964, - "grad_norm": 1.2876501083374023, - "learning_rate": 2.4283380365364623e-05, - "loss": 0.0699, + "epoch": 3.8105138105138106, + "grad_norm": 0.8084139227867126, + "learning_rate": 7.136917136917137e-06, + "loss": 0.0523, "step": 12830 }, { - "epoch": 0.9535125501262439, - "grad_norm": 3.32975435256958, - "learning_rate": 2.4278924699242538e-05, - "loss": 0.0984, + "epoch": 3.8134838134838134, + "grad_norm": 0.83390212059021, + "learning_rate": 7.119097119097119e-06, + "loss": 0.0561, "step": 12840 }, { - "epoch": 0.9542551611465914, - "grad_norm": 2.0247581005096436, - "learning_rate": 2.427446903312045e-05, - "loss": 0.0784, + "epoch": 3.8164538164538166, + "grad_norm": 0.8345874547958374, + "learning_rate": 7.101277101277102e-06, + "loss": 0.0578, "step": 12850 }, { - "epoch": 0.9549977721669389, - "grad_norm": 3.556913375854492, - "learning_rate": 2.4270013366998368e-05, - "loss": 0.0756, + "epoch": 3.8194238194238195, + "grad_norm": 0.9062905311584473, + "learning_rate": 7.083457083457083e-06, + "loss": 0.0488, "step": 12860 }, { - "epoch": 0.9557403831872865, - "grad_norm": 1.9502661228179932, - "learning_rate": 2.4265557700876283e-05, - "loss": 0.1019, + "epoch": 3.8223938223938223, + "grad_norm": 0.855656623840332, + "learning_rate": 7.065637065637066e-06, + "loss": 0.0469, "step": 12870 }, { - "epoch": 0.956482994207634, - "grad_norm": 1.5548827648162842, - "learning_rate": 2.4261102034754195e-05, - "loss": 0.077, + "epoch": 3.8253638253638256, + "grad_norm": 0.7304750084877014, + "learning_rate": 7.047817047817048e-06, + "loss": 0.0612, "step": 12880 }, { - "epoch": 0.9572256052279816, - "grad_norm": 2.959385871887207, - "learning_rate": 2.4256646368632113e-05, - "loss": 0.0799, + "epoch": 3.8283338283338284, + "grad_norm": 1.3436956405639648, + "learning_rate": 7.02999702999703e-06, + "loss": 0.0692, "step": 12890 }, { - "epoch": 0.9579682162483292, - "grad_norm": 0.9673056602478027, - "learning_rate": 2.4252190702510025e-05, - "loss": 0.0768, + "epoch": 3.8313038313038312, + "grad_norm": 0.6037256121635437, + "learning_rate": 7.0121770121770125e-06, + "loss": 0.0527, "step": 12900 }, { - "epoch": 0.9587108272686766, - "grad_norm": 1.747621774673462, - "learning_rate": 2.424773503638794e-05, - "loss": 0.0604, + "epoch": 3.8342738342738345, + "grad_norm": 0.9017062187194824, + "learning_rate": 6.994356994356995e-06, + "loss": 0.0544, "step": 12910 }, { - "epoch": 0.9594534382890242, - "grad_norm": 1.6321135759353638, - "learning_rate": 2.4243279370265855e-05, - "loss": 0.0706, + "epoch": 3.8372438372438373, + "grad_norm": 0.5321645736694336, + "learning_rate": 6.976536976536977e-06, + "loss": 0.0616, "step": 12920 }, { - "epoch": 0.9601960493093717, - "grad_norm": 2.2727482318878174, - "learning_rate": 2.423882370414377e-05, - "loss": 0.0939, + "epoch": 3.84021384021384, + "grad_norm": 0.7365061044692993, + "learning_rate": 6.958716958716958e-06, + "loss": 0.0544, "step": 12930 }, { - "epoch": 0.9609386603297193, - "grad_norm": 0.6536130309104919, - "learning_rate": 2.4234368038021685e-05, - "loss": 0.0881, + "epoch": 3.8431838431838434, + "grad_norm": 0.6113480925559998, + "learning_rate": 6.940896940896941e-06, + "loss": 0.0547, "step": 12940 }, { - "epoch": 0.9616812713500669, - "grad_norm": 2.0956132411956787, - "learning_rate": 2.42299123718996e-05, - "loss": 0.0837, + "epoch": 3.8461538461538463, + "grad_norm": 0.6009132266044617, + "learning_rate": 6.923076923076923e-06, + "loss": 0.0585, "step": 12950 }, { - "epoch": 0.9624238823704144, - "grad_norm": 1.1719980239868164, - "learning_rate": 2.4225456705777515e-05, - "loss": 0.0789, + "epoch": 3.849123849123849, + "grad_norm": 0.9174481630325317, + "learning_rate": 6.9052569052569056e-06, + "loss": 0.0622, "step": 12960 }, { - "epoch": 0.9631664933907619, - "grad_norm": 3.233799934387207, - "learning_rate": 2.422100103965543e-05, - "loss": 0.0597, + "epoch": 3.8520938520938524, + "grad_norm": 0.3774741590023041, + "learning_rate": 6.887436887436888e-06, + "loss": 0.0545, "step": 12970 }, { - "epoch": 0.9639091044111094, - "grad_norm": 3.5093204975128174, - "learning_rate": 2.4216545373533345e-05, - "loss": 0.1026, + "epoch": 3.855063855063855, + "grad_norm": 1.134770154953003, + "learning_rate": 6.86961686961687e-06, + "loss": 0.0471, "step": 12980 }, { - "epoch": 0.964651715431457, - "grad_norm": 1.597965121269226, - "learning_rate": 2.4212089707411256e-05, - "loss": 0.0852, + "epoch": 3.858033858033858, + "grad_norm": 0.8583316802978516, + "learning_rate": 6.851796851796852e-06, + "loss": 0.0535, "step": 12990 }, { - "epoch": 0.9653943264518046, - "grad_norm": 1.1015452146530151, - "learning_rate": 2.4207634041289175e-05, - "loss": 0.0651, + "epoch": 3.861003861003861, + "grad_norm": 0.6052166223526001, + "learning_rate": 6.833976833976834e-06, + "loss": 0.0557, "step": 13000 }, { - "epoch": 0.9661369374721521, - "grad_norm": 0.7174215316772461, - "learning_rate": 2.4203178375167086e-05, - "loss": 0.0693, + "epoch": 3.863973863973864, + "grad_norm": 1.0991127490997314, + "learning_rate": 6.816156816156816e-06, + "loss": 0.0528, "step": 13010 }, { - "epoch": 0.9668795484924996, - "grad_norm": 1.493241786956787, - "learning_rate": 2.4198722709045e-05, - "loss": 0.0757, + "epoch": 3.866943866943867, + "grad_norm": 0.7694152593612671, + "learning_rate": 6.7983367983367986e-06, + "loss": 0.0689, "step": 13020 }, { - "epoch": 0.9676221595128471, - "grad_norm": 2.9904990196228027, - "learning_rate": 2.419426704292292e-05, - "loss": 0.0782, + "epoch": 3.8699138699138698, + "grad_norm": 0.9624953866004944, + "learning_rate": 6.780516780516781e-06, + "loss": 0.0585, "step": 13030 }, { - "epoch": 0.9683647705331947, - "grad_norm": 1.8118109703063965, - "learning_rate": 2.418981137680083e-05, - "loss": 0.0718, + "epoch": 3.872883872883873, + "grad_norm": 0.613135814666748, + "learning_rate": 6.762696762696763e-06, + "loss": 0.0636, "step": 13040 }, { - "epoch": 0.9691073815535423, - "grad_norm": 3.275655508041382, - "learning_rate": 2.4185355710678746e-05, - "loss": 0.0977, + "epoch": 3.875853875853876, + "grad_norm": 0.6408237814903259, + "learning_rate": 6.744876744876745e-06, + "loss": 0.0525, "step": 13050 }, { - "epoch": 0.9698499925738898, - "grad_norm": 1.704103708267212, - "learning_rate": 2.4180900044556665e-05, - "loss": 0.0578, + "epoch": 3.8788238788238787, + "grad_norm": 0.5397794842720032, + "learning_rate": 6.727056727056727e-06, + "loss": 0.0538, "step": 13060 }, { - "epoch": 0.9705926035942374, - "grad_norm": 1.3468433618545532, - "learning_rate": 2.4176444378434576e-05, - "loss": 0.0595, + "epoch": 3.8817938817938815, + "grad_norm": 0.569254457950592, + "learning_rate": 6.7092367092367094e-06, + "loss": 0.0494, "step": 13070 }, { - "epoch": 0.9713352146145848, - "grad_norm": 0.6979770064353943, - "learning_rate": 2.417198871231249e-05, - "loss": 0.0875, + "epoch": 3.884763884763885, + "grad_norm": 0.7283187508583069, + "learning_rate": 6.691416691416692e-06, + "loss": 0.0428, "step": 13080 }, { - "epoch": 0.9720778256349324, - "grad_norm": 3.1347808837890625, - "learning_rate": 2.4167533046190406e-05, - "loss": 0.1095, + "epoch": 3.8877338877338876, + "grad_norm": 0.5344458818435669, + "learning_rate": 6.673596673596674e-06, + "loss": 0.0487, "step": 13090 }, { - "epoch": 0.97282043665528, - "grad_norm": 2.341815710067749, - "learning_rate": 2.416307738006832e-05, - "loss": 0.0909, + "epoch": 3.8907038907038904, + "grad_norm": 0.8869258165359497, + "learning_rate": 6.655776655776656e-06, + "loss": 0.0517, "step": 13100 }, { - "epoch": 0.9735630476756275, - "grad_norm": 1.4380031824111938, - "learning_rate": 2.4158621713946236e-05, - "loss": 0.0855, + "epoch": 3.8936738936738937, + "grad_norm": 0.6164690256118774, + "learning_rate": 6.637956637956638e-06, + "loss": 0.056, "step": 13110 }, { - "epoch": 0.9743056586959751, - "grad_norm": 2.384162425994873, - "learning_rate": 2.4154166047824148e-05, - "loss": 0.0851, + "epoch": 3.8966438966438965, + "grad_norm": 1.2430282831192017, + "learning_rate": 6.62013662013662e-06, + "loss": 0.0546, "step": 13120 }, { - "epoch": 0.9750482697163226, - "grad_norm": 3.7416675090789795, - "learning_rate": 2.4149710381702066e-05, - "loss": 0.0855, + "epoch": 3.8996138996138994, + "grad_norm": 0.5532137751579285, + "learning_rate": 6.6023166023166025e-06, + "loss": 0.0545, "step": 13130 }, { - "epoch": 0.9757908807366701, - "grad_norm": 1.3692728281021118, - "learning_rate": 2.414525471557998e-05, - "loss": 0.0492, + "epoch": 3.9025839025839026, + "grad_norm": 0.9797311425209045, + "learning_rate": 6.584496584496585e-06, + "loss": 0.0532, "step": 13140 }, { - "epoch": 0.9765334917570176, - "grad_norm": 2.30066180229187, - "learning_rate": 2.4140799049457893e-05, - "loss": 0.0818, + "epoch": 3.9055539055539055, + "grad_norm": 1.118308186531067, + "learning_rate": 6.566676566676567e-06, + "loss": 0.0609, "step": 13150 }, { - "epoch": 0.9772761027773652, - "grad_norm": 0.9965130686759949, - "learning_rate": 2.4136343383335808e-05, - "loss": 0.08, + "epoch": 3.9085239085239083, + "grad_norm": 0.8075883388519287, + "learning_rate": 6.548856548856549e-06, + "loss": 0.0623, "step": 13160 }, { - "epoch": 0.9780187137977128, - "grad_norm": 3.0066161155700684, - "learning_rate": 2.4131887717213726e-05, - "loss": 0.0932, + "epoch": 3.9114939114939116, + "grad_norm": 0.5950072407722473, + "learning_rate": 6.531036531036531e-06, + "loss": 0.0556, "step": 13170 }, { - "epoch": 0.9787613248180603, - "grad_norm": 2.1586639881134033, - "learning_rate": 2.4127432051091638e-05, - "loss": 0.0582, + "epoch": 3.9144639144639144, + "grad_norm": 0.7014681696891785, + "learning_rate": 6.513216513216513e-06, + "loss": 0.0685, "step": 13180 }, { - "epoch": 0.9795039358384079, - "grad_norm": 1.0595694780349731, - "learning_rate": 2.4122976384969553e-05, - "loss": 0.0731, + "epoch": 3.9174339174339172, + "grad_norm": 0.8518680334091187, + "learning_rate": 6.4953964953964955e-06, + "loss": 0.0561, "step": 13190 }, { - "epoch": 0.9802465468587553, - "grad_norm": 1.56610906124115, - "learning_rate": 2.411852071884747e-05, - "loss": 0.054, + "epoch": 3.9204039204039205, + "grad_norm": 0.61479651927948, + "learning_rate": 6.477576477576478e-06, + "loss": 0.0512, "step": 13200 }, { - "epoch": 0.9809891578791029, - "grad_norm": 0.9875765442848206, - "learning_rate": 2.4114065052725383e-05, - "loss": 0.0748, + "epoch": 3.9233739233739233, + "grad_norm": 0.8155126571655273, + "learning_rate": 6.45975645975646e-06, + "loss": 0.0469, "step": 13210 }, { - "epoch": 0.9817317688994505, - "grad_norm": 0.9391055703163147, - "learning_rate": 2.4109609386603298e-05, - "loss": 0.0635, + "epoch": 3.926343926343926, + "grad_norm": 0.8962894082069397, + "learning_rate": 6.441936441936442e-06, + "loss": 0.0517, "step": 13220 }, { - "epoch": 0.982474379919798, - "grad_norm": 2.4511687755584717, - "learning_rate": 2.410515372048121e-05, - "loss": 0.0709, + "epoch": 3.9293139293139294, + "grad_norm": 0.8040223717689514, + "learning_rate": 6.424116424116425e-06, + "loss": 0.0546, "step": 13230 }, { - "epoch": 0.9832169909401456, - "grad_norm": 1.185548186302185, - "learning_rate": 2.4100698054359128e-05, - "loss": 0.0477, + "epoch": 3.9322839322839322, + "grad_norm": 0.6404465436935425, + "learning_rate": 6.406296406296406e-06, + "loss": 0.057, "step": 13240 }, { - "epoch": 0.9839596019604931, - "grad_norm": 2.511913299560547, - "learning_rate": 2.4096242388237043e-05, - "loss": 0.055, + "epoch": 3.935253935253935, + "grad_norm": 0.6395756602287292, + "learning_rate": 6.3884763884763885e-06, + "loss": 0.0552, "step": 13250 }, { - "epoch": 0.9847022129808406, - "grad_norm": 3.263899326324463, - "learning_rate": 2.4091786722114954e-05, - "loss": 0.0808, + "epoch": 3.9382239382239383, + "grad_norm": 0.8880889415740967, + "learning_rate": 6.370656370656371e-06, + "loss": 0.0529, "step": 13260 }, { - "epoch": 0.9854448240011882, - "grad_norm": 1.0159684419631958, - "learning_rate": 2.4087331055992873e-05, - "loss": 0.0897, + "epoch": 3.941193941193941, + "grad_norm": 0.8830543160438538, + "learning_rate": 6.352836352836353e-06, + "loss": 0.0532, "step": 13270 }, { - "epoch": 0.9861874350215357, - "grad_norm": 1.7687329053878784, - "learning_rate": 2.4082875389870788e-05, - "loss": 0.0789, + "epoch": 3.944163944163944, + "grad_norm": 1.0157254934310913, + "learning_rate": 6.335016335016335e-06, + "loss": 0.0504, "step": 13280 }, { - "epoch": 0.9869300460418833, - "grad_norm": 3.7545151710510254, - "learning_rate": 2.40784197237487e-05, - "loss": 0.0785, + "epoch": 3.9471339471339473, + "grad_norm": 0.43136101961135864, + "learning_rate": 6.317196317196317e-06, + "loss": 0.0422, "step": 13290 }, { - "epoch": 0.9876726570622308, - "grad_norm": 4.160385608673096, - "learning_rate": 2.4073964057626618e-05, - "loss": 0.0784, + "epoch": 3.95010395010395, + "grad_norm": 0.990959107875824, + "learning_rate": 6.2993762993763e-06, + "loss": 0.0685, "step": 13300 }, { - "epoch": 0.9884152680825783, - "grad_norm": 2.456456184387207, - "learning_rate": 2.4069508391504533e-05, - "loss": 0.0863, + "epoch": 3.953073953073953, + "grad_norm": 0.7046756744384766, + "learning_rate": 6.2815562815562815e-06, + "loss": 0.0482, "step": 13310 }, { - "epoch": 0.9891578791029259, - "grad_norm": 1.3430705070495605, - "learning_rate": 2.4065052725382444e-05, - "loss": 0.0961, + "epoch": 3.956043956043956, + "grad_norm": 0.8372275829315186, + "learning_rate": 6.2637362637362645e-06, + "loss": 0.0458, "step": 13320 }, { - "epoch": 0.9899004901232734, - "grad_norm": 1.3270246982574463, - "learning_rate": 2.406059705926036e-05, - "loss": 0.0721, + "epoch": 3.959013959013959, + "grad_norm": 0.7267201542854309, + "learning_rate": 6.245916245916246e-06, + "loss": 0.0461, "step": 13330 }, { - "epoch": 0.990643101143621, - "grad_norm": 1.4000052213668823, - "learning_rate": 2.4056141393138274e-05, - "loss": 0.0679, + "epoch": 3.961983961983962, + "grad_norm": 1.0572090148925781, + "learning_rate": 6.228096228096228e-06, + "loss": 0.0502, "step": 13340 }, { - "epoch": 0.9913857121639685, - "grad_norm": 3.377154588699341, - "learning_rate": 2.405168572701619e-05, - "loss": 0.0651, + "epoch": 3.964953964953965, + "grad_norm": 1.4934335947036743, + "learning_rate": 6.21027621027621e-06, + "loss": 0.0622, "step": 13350 }, { - "epoch": 0.9921283231843161, - "grad_norm": 1.1014114618301392, - "learning_rate": 2.4047230060894104e-05, - "loss": 0.1156, + "epoch": 3.967923967923968, + "grad_norm": 0.9395650029182434, + "learning_rate": 6.192456192456192e-06, + "loss": 0.0438, "step": 13360 }, { - "epoch": 0.9928709342046635, - "grad_norm": 2.2198503017425537, - "learning_rate": 2.404277439477202e-05, - "loss": 0.0821, + "epoch": 3.970893970893971, + "grad_norm": 0.7308318018913269, + "learning_rate": 6.174636174636175e-06, + "loss": 0.0547, "step": 13370 }, { - "epoch": 0.9936135452250111, - "grad_norm": 2.7557029724121094, - "learning_rate": 2.4038318728649934e-05, - "loss": 0.0833, + "epoch": 3.973863973863974, + "grad_norm": 1.219499111175537, + "learning_rate": 6.156816156816157e-06, + "loss": 0.0698, "step": 13380 }, { - "epoch": 0.9943561562453587, - "grad_norm": 3.052049160003662, - "learning_rate": 2.403386306252785e-05, - "loss": 0.0832, + "epoch": 3.976833976833977, + "grad_norm": 1.007973074913025, + "learning_rate": 6.13899613899614e-06, + "loss": 0.0598, "step": 13390 }, { - "epoch": 0.9950987672657062, - "grad_norm": 1.0992413759231567, - "learning_rate": 2.402940739640576e-05, - "loss": 0.1037, + "epoch": 3.9798039798039797, + "grad_norm": 0.569019615650177, + "learning_rate": 6.121176121176121e-06, + "loss": 0.0567, "step": 13400 }, { - "epoch": 0.9958413782860538, - "grad_norm": 0.9476717114448547, - "learning_rate": 2.402495173028368e-05, - "loss": 0.0732, + "epoch": 3.982773982773983, + "grad_norm": 1.0321439504623413, + "learning_rate": 6.103356103356103e-06, + "loss": 0.0512, "step": 13410 }, { - "epoch": 0.9965839893064014, - "grad_norm": 0.403255820274353, - "learning_rate": 2.402049606416159e-05, - "loss": 0.0565, + "epoch": 3.985743985743986, + "grad_norm": 0.6399171352386475, + "learning_rate": 6.085536085536085e-06, + "loss": 0.0551, "step": 13420 }, { - "epoch": 0.9973266003267488, - "grad_norm": 0.33830782771110535, - "learning_rate": 2.4016040398039506e-05, - "loss": 0.0614, + "epoch": 3.9887139887139886, + "grad_norm": 0.7629494071006775, + "learning_rate": 6.0677160677160676e-06, + "loss": 0.0577, "step": 13430 }, { - "epoch": 0.9980692113470964, - "grad_norm": 1.7131567001342773, - "learning_rate": 2.4011584731917424e-05, - "loss": 0.1047, + "epoch": 3.991683991683992, + "grad_norm": 0.564344584941864, + "learning_rate": 6.049896049896051e-06, + "loss": 0.0512, "step": 13440 }, { - "epoch": 0.9988118223674439, - "grad_norm": 1.484653115272522, - "learning_rate": 2.4007129065795336e-05, - "loss": 0.089, + "epoch": 3.9946539946539947, + "grad_norm": 0.5358871817588806, + "learning_rate": 6.032076032076032e-06, + "loss": 0.0552, "step": 13450 }, { - "epoch": 0.9995544333877915, - "grad_norm": 1.4303562641143799, - "learning_rate": 2.400267339967325e-05, - "loss": 0.0672, + "epoch": 3.9976239976239976, + "grad_norm": 0.9634397625923157, + "learning_rate": 6.014256014256015e-06, + "loss": 0.0539, "step": 13460 }, { - "epoch": 1.0, - "eval_f1": 0.0, - "eval_loss": 0.06958512961864471, - "eval_runtime": 835.1475, - "eval_samples_per_second": 45.524, - "eval_steps_per_second": 2.846, - "step": 13466 + "epoch": 4.0, + "eval_f1": 0.33031292965957215, + "eval_loss": 0.043538980185985565, + "eval_runtime": 164.5887, + "eval_samples_per_second": 230.994, + "eval_steps_per_second": 3.615, + "step": 13468 }, { - "epoch": 1.000297044408139, - "grad_norm": 1.3870664834976196, - "learning_rate": 2.399821773355117e-05, - "loss": 0.0639, + "epoch": 4.000594000594001, + "grad_norm": 1.27556574344635, + "learning_rate": 5.996435996435996e-06, + "loss": 0.0485, "step": 13470 }, { - "epoch": 1.0010396554284866, - "grad_norm": 1.3017977476119995, - "learning_rate": 2.399376206742908e-05, - "loss": 0.0541, + "epoch": 4.003564003564003, + "grad_norm": 1.009068489074707, + "learning_rate": 5.978615978615979e-06, + "loss": 0.0567, "step": 13480 }, { - "epoch": 1.001782266448834, - "grad_norm": 3.119060516357422, - "learning_rate": 2.3989306401306996e-05, - "loss": 0.0774, + "epoch": 4.0065340065340065, + "grad_norm": 0.7276560068130493, + "learning_rate": 5.960795960795961e-06, + "loss": 0.0475, "step": 13490 }, { - "epoch": 1.0025248774691817, - "grad_norm": 1.463212013244629, - "learning_rate": 2.398485073518491e-05, - "loss": 0.0869, + "epoch": 4.00950400950401, + "grad_norm": 1.1445473432540894, + "learning_rate": 5.942975942975943e-06, + "loss": 0.0555, "step": 13500 }, { - "epoch": 1.0032674884895292, - "grad_norm": 1.4256367683410645, - "learning_rate": 2.3980395069062826e-05, - "loss": 0.0856, + "epoch": 4.012474012474012, + "grad_norm": 0.8062208294868469, + "learning_rate": 5.925155925155926e-06, + "loss": 0.0482, "step": 13510 }, { - "epoch": 1.0040100995098766, - "grad_norm": 2.062420606613159, - "learning_rate": 2.397593940294074e-05, - "loss": 0.0575, + "epoch": 4.015444015444015, + "grad_norm": 0.8470934629440308, + "learning_rate": 5.907335907335907e-06, + "loss": 0.0553, "step": 13520 }, { - "epoch": 1.0047527105302243, - "grad_norm": 1.5311766862869263, - "learning_rate": 2.3971483736818653e-05, - "loss": 0.1091, + "epoch": 4.018414018414019, + "grad_norm": 0.5474221706390381, + "learning_rate": 5.88951588951589e-06, + "loss": 0.0534, "step": 13530 }, { - "epoch": 1.0054953215505718, - "grad_norm": 1.6785446405410767, - "learning_rate": 2.396702807069657e-05, - "loss": 0.0978, + "epoch": 4.021384021384021, + "grad_norm": 0.6959888339042664, + "learning_rate": 5.8716958716958714e-06, + "loss": 0.0538, "step": 13540 }, { - "epoch": 1.0062379325709194, - "grad_norm": 2.021958827972412, - "learning_rate": 2.3962572404574486e-05, - "loss": 0.1013, + "epoch": 4.024354024354024, + "grad_norm": 0.9099267721176147, + "learning_rate": 5.8538758538758545e-06, + "loss": 0.055, "step": 13550 }, { - "epoch": 1.0069805435912669, - "grad_norm": 1.236924409866333, - "learning_rate": 2.3958116738452397e-05, - "loss": 0.0621, + "epoch": 4.027324027324028, + "grad_norm": 1.2985259294509888, + "learning_rate": 5.836055836055836e-06, + "loss": 0.0594, "step": 13560 }, { - "epoch": 1.0077231546116145, - "grad_norm": 0.7201011180877686, - "learning_rate": 2.3953661072330312e-05, - "loss": 0.0832, + "epoch": 4.03029403029403, + "grad_norm": 0.9521999359130859, + "learning_rate": 5.818235818235818e-06, + "loss": 0.0588, "step": 13570 }, { - "epoch": 1.008465765631962, - "grad_norm": 2.1270737648010254, - "learning_rate": 2.394920540620823e-05, - "loss": 0.0553, + "epoch": 4.033264033264033, + "grad_norm": 0.5641045570373535, + "learning_rate": 5.800415800415801e-06, + "loss": 0.0638, "step": 13580 }, { - "epoch": 1.0092083766523094, - "grad_norm": 2.2044267654418945, - "learning_rate": 2.3944749740086142e-05, - "loss": 0.091, + "epoch": 4.0362340362340365, + "grad_norm": 1.3589891195297241, + "learning_rate": 5.782595782595782e-06, + "loss": 0.0558, "step": 13590 }, { - "epoch": 1.0099509876726571, - "grad_norm": 1.7166577577590942, - "learning_rate": 2.3940294073964057e-05, - "loss": 0.0774, + "epoch": 4.039204039204039, + "grad_norm": 1.1346710920333862, + "learning_rate": 5.764775764775765e-06, + "loss": 0.0548, "step": 13600 }, { - "epoch": 1.0106935986930046, - "grad_norm": 1.7393256425857544, - "learning_rate": 2.3935838407841976e-05, - "loss": 0.0649, + "epoch": 4.042174042174042, + "grad_norm": 1.2047168016433716, + "learning_rate": 5.746955746955747e-06, + "loss": 0.0485, "step": 13610 }, { - "epoch": 1.0114362097133522, - "grad_norm": 1.1891884803771973, - "learning_rate": 2.3931382741719887e-05, - "loss": 0.0873, + "epoch": 4.0451440451440455, + "grad_norm": 0.673353910446167, + "learning_rate": 5.72913572913573e-06, + "loss": 0.0518, "step": 13620 }, { - "epoch": 1.0121788207336997, - "grad_norm": 0.8433098793029785, - "learning_rate": 2.3926927075597802e-05, - "loss": 0.0742, + "epoch": 4.048114048114048, + "grad_norm": 0.6137884855270386, + "learning_rate": 5.711315711315711e-06, + "loss": 0.0594, "step": 13630 }, { - "epoch": 1.0129214317540471, - "grad_norm": 1.3263208866119385, - "learning_rate": 2.3922471409475717e-05, - "loss": 0.0663, + "epoch": 4.051084051084051, + "grad_norm": 0.7243009805679321, + "learning_rate": 5.693495693495694e-06, + "loss": 0.0528, "step": 13640 }, { - "epoch": 1.0136640427743948, - "grad_norm": 2.3335938453674316, - "learning_rate": 2.3918015743353632e-05, - "loss": 0.0824, + "epoch": 4.054054054054054, + "grad_norm": 0.5329965353012085, + "learning_rate": 5.675675675675676e-06, + "loss": 0.049, "step": 13650 }, { - "epoch": 1.0144066537947423, - "grad_norm": 0.9380444884300232, - "learning_rate": 2.3913560077231547e-05, - "loss": 0.0787, + "epoch": 4.057024057024057, + "grad_norm": 0.4849202334880829, + "learning_rate": 5.6578556578556575e-06, + "loss": 0.0541, "step": 13660 }, { - "epoch": 1.01514926481509, - "grad_norm": 1.832839846611023, - "learning_rate": 2.390910441110946e-05, - "loss": 0.0656, + "epoch": 4.05999405999406, + "grad_norm": 0.6682773232460022, + "learning_rate": 5.6400356400356405e-06, + "loss": 0.0571, "step": 13670 }, { - "epoch": 1.0158918758354374, - "grad_norm": 1.5540494918823242, - "learning_rate": 2.3904648744987377e-05, - "loss": 0.0403, + "epoch": 4.062964062964063, + "grad_norm": 0.7048366069793701, + "learning_rate": 5.622215622215622e-06, + "loss": 0.0582, "step": 13680 }, { - "epoch": 1.0166344868557848, - "grad_norm": 3.9998695850372314, - "learning_rate": 2.3900193078865292e-05, - "loss": 0.1071, + "epoch": 4.065934065934066, + "grad_norm": 0.7056940793991089, + "learning_rate": 5.604395604395605e-06, + "loss": 0.0437, "step": 13690 }, { - "epoch": 1.0173770978761325, - "grad_norm": 2.738377571105957, - "learning_rate": 2.3895737412743204e-05, - "loss": 0.0826, + "epoch": 4.068904068904069, + "grad_norm": 0.6416358351707458, + "learning_rate": 5.586575586575586e-06, + "loss": 0.0458, "step": 13700 }, { - "epoch": 1.01811970889648, - "grad_norm": 5.063292503356934, - "learning_rate": 2.3891281746621122e-05, - "loss": 0.0653, + "epoch": 4.071874071874072, + "grad_norm": 0.7514241933822632, + "learning_rate": 5.568755568755569e-06, + "loss": 0.0589, "step": 13710 }, { - "epoch": 1.0188623199168276, - "grad_norm": 0.8923290967941284, - "learning_rate": 2.3886826080499037e-05, - "loss": 0.089, + "epoch": 4.074844074844075, + "grad_norm": 0.6604739427566528, + "learning_rate": 5.550935550935551e-06, + "loss": 0.0562, "step": 13720 }, { - "epoch": 1.019604930937175, - "grad_norm": 0.7146623134613037, - "learning_rate": 2.388237041437695e-05, - "loss": 0.0731, + "epoch": 4.077814077814078, + "grad_norm": 0.4404093325138092, + "learning_rate": 5.533115533115533e-06, + "loss": 0.0561, "step": 13730 }, { - "epoch": 1.0203475419575228, - "grad_norm": 5.034801006317139, - "learning_rate": 2.3877914748254864e-05, - "loss": 0.0837, + "epoch": 4.080784080784081, + "grad_norm": 0.5778560042381287, + "learning_rate": 5.515295515295516e-06, + "loss": 0.0664, "step": 13740 }, { - "epoch": 1.0210901529778702, - "grad_norm": 2.207108736038208, - "learning_rate": 2.387345908213278e-05, - "loss": 0.0966, + "epoch": 4.0837540837540836, + "grad_norm": 0.8316758275032043, + "learning_rate": 5.497475497475497e-06, + "loss": 0.0623, "step": 13750 }, { - "epoch": 1.0218327639982177, - "grad_norm": 4.187524795532227, - "learning_rate": 2.3869003416010694e-05, - "loss": 0.0873, + "epoch": 4.086724086724087, + "grad_norm": 1.186763882637024, + "learning_rate": 5.47965547965548e-06, + "loss": 0.0524, "step": 13760 }, { - "epoch": 1.0225753750185653, - "grad_norm": 2.0011796951293945, - "learning_rate": 2.386454774988861e-05, - "loss": 0.1067, + "epoch": 4.08969408969409, + "grad_norm": 0.4923029839992523, + "learning_rate": 5.461835461835461e-06, + "loss": 0.0601, "step": 13770 }, { - "epoch": 1.0233179860389128, - "grad_norm": 1.419082760810852, - "learning_rate": 2.3860092083766524e-05, - "loss": 0.1013, + "epoch": 4.0926640926640925, + "grad_norm": 0.8885074257850647, + "learning_rate": 5.444015444015444e-06, + "loss": 0.0549, "step": 13780 }, { - "epoch": 1.0240605970592604, - "grad_norm": 1.378090500831604, - "learning_rate": 2.385563641764444e-05, - "loss": 0.0844, + "epoch": 4.095634095634096, + "grad_norm": 1.2533655166625977, + "learning_rate": 5.4261954261954265e-06, + "loss": 0.0662, "step": 13790 }, { - "epoch": 1.024803208079608, - "grad_norm": 1.8442440032958984, - "learning_rate": 2.3851180751522354e-05, - "loss": 0.0922, + "epoch": 4.098604098604099, + "grad_norm": 0.7636746168136597, + "learning_rate": 5.408375408375409e-06, + "loss": 0.056, "step": 13800 }, { - "epoch": 1.0255458190999553, - "grad_norm": 1.9411593675613403, - "learning_rate": 2.3846725085400266e-05, - "loss": 0.0699, + "epoch": 4.101574101574101, + "grad_norm": 0.5362575054168701, + "learning_rate": 5.390555390555391e-06, + "loss": 0.0492, "step": 13810 }, { - "epoch": 1.026288430120303, - "grad_norm": 1.1153783798217773, - "learning_rate": 2.3842269419278184e-05, - "loss": 0.0986, + "epoch": 4.104544104544105, + "grad_norm": 0.8624958992004395, + "learning_rate": 5.372735372735372e-06, + "loss": 0.0472, "step": 13820 }, { - "epoch": 1.0270310411406505, - "grad_norm": 2.119503974914551, - "learning_rate": 2.3837813753156096e-05, - "loss": 0.0851, + "epoch": 4.107514107514108, + "grad_norm": 0.8159658908843994, + "learning_rate": 5.354915354915355e-06, + "loss": 0.052, "step": 13830 }, { - "epoch": 1.0277736521609981, - "grad_norm": 2.0890700817108154, - "learning_rate": 2.383335808703401e-05, - "loss": 0.0687, + "epoch": 4.11048411048411, + "grad_norm": 1.0133655071258545, + "learning_rate": 5.337095337095337e-06, + "loss": 0.0675, "step": 13840 }, { - "epoch": 1.0285162631813456, - "grad_norm": 3.1581473350524902, - "learning_rate": 2.382890242091193e-05, - "loss": 0.0774, + "epoch": 4.113454113454114, + "grad_norm": 0.7075998783111572, + "learning_rate": 5.3192753192753196e-06, + "loss": 0.0521, "step": 13850 }, { - "epoch": 1.0292588742016933, - "grad_norm": 1.0169066190719604, - "learning_rate": 2.382444675478984e-05, - "loss": 0.0636, + "epoch": 4.116424116424117, + "grad_norm": 0.7996563911437988, + "learning_rate": 5.301455301455302e-06, + "loss": 0.0569, "step": 13860 }, { - "epoch": 1.0300014852220407, - "grad_norm": 1.5723227262496948, - "learning_rate": 2.3819991088667756e-05, - "loss": 0.0878, + "epoch": 4.119394119394119, + "grad_norm": 0.7214605808258057, + "learning_rate": 5.283635283635284e-06, + "loss": 0.0444, "step": 13870 }, { - "epoch": 1.0307440962423882, - "grad_norm": 1.652784824371338, - "learning_rate": 2.3815535422545674e-05, - "loss": 0.0862, + "epoch": 4.1223641223641225, + "grad_norm": 0.5179925560951233, + "learning_rate": 5.265815265815266e-06, + "loss": 0.0473, "step": 13880 }, { - "epoch": 1.0314867072627358, - "grad_norm": 1.3980462551116943, - "learning_rate": 2.3811079756423586e-05, - "loss": 0.0937, + "epoch": 4.125334125334125, + "grad_norm": 0.7396313548088074, + "learning_rate": 5.247995247995247e-06, + "loss": 0.0549, "step": 13890 }, { - "epoch": 1.0322293182830833, - "grad_norm": 0.9966709613800049, - "learning_rate": 2.38066240903015e-05, - "loss": 0.0551, + "epoch": 4.128304128304128, + "grad_norm": 0.8080036640167236, + "learning_rate": 5.2301752301752304e-06, + "loss": 0.0415, "step": 13900 }, { - "epoch": 1.032971929303431, - "grad_norm": 1.0643346309661865, - "learning_rate": 2.3802168424179416e-05, - "loss": 0.0647, + "epoch": 4.1312741312741315, + "grad_norm": 0.6321327090263367, + "learning_rate": 5.212355212355213e-06, + "loss": 0.0503, "step": 13910 }, { - "epoch": 1.0337145403237784, - "grad_norm": 1.1754989624023438, - "learning_rate": 2.379771275805733e-05, - "loss": 0.0859, + "epoch": 4.134244134244134, + "grad_norm": 0.8221378922462463, + "learning_rate": 5.194535194535195e-06, + "loss": 0.0456, "step": 13920 }, { - "epoch": 1.0344571513441259, - "grad_norm": 1.513095736503601, - "learning_rate": 2.3793257091935245e-05, - "loss": 0.1229, + "epoch": 4.137214137214137, + "grad_norm": 1.2352089881896973, + "learning_rate": 5.176715176715177e-06, + "loss": 0.0631, "step": 13930 }, { - "epoch": 1.0351997623644735, - "grad_norm": 2.310269832611084, - "learning_rate": 2.3788801425813157e-05, - "loss": 0.0641, + "epoch": 4.14018414018414, + "grad_norm": 0.7337819933891296, + "learning_rate": 5.158895158895159e-06, + "loss": 0.0568, "step": 13940 }, { - "epoch": 1.035942373384821, - "grad_norm": 1.0776907205581665, - "learning_rate": 2.3784345759691075e-05, - "loss": 0.0738, + "epoch": 4.143154143154143, + "grad_norm": 0.5924921035766602, + "learning_rate": 5.141075141075141e-06, + "loss": 0.0591, "step": 13950 }, { - "epoch": 1.0366849844051687, - "grad_norm": 1.2684451341629028, - "learning_rate": 2.377989009356899e-05, - "loss": 0.1101, + "epoch": 4.146124146124146, + "grad_norm": 0.6949560642242432, + "learning_rate": 5.1232551232551234e-06, + "loss": 0.0448, "step": 13960 }, { - "epoch": 1.037427595425516, - "grad_norm": 2.617946147918701, - "learning_rate": 2.3775434427446902e-05, + "epoch": 4.149094149094149, + "grad_norm": 0.686150074005127, + "learning_rate": 5.105435105435106e-06, "loss": 0.0603, "step": 13970 }, { - "epoch": 1.0381702064458636, - "grad_norm": 2.604550361633301, - "learning_rate": 2.3770978761324817e-05, - "loss": 0.1068, + "epoch": 4.152064152064152, + "grad_norm": 0.9512919783592224, + "learning_rate": 5.087615087615088e-06, + "loss": 0.0757, "step": 13980 }, { - "epoch": 1.0389128174662112, - "grad_norm": 2.7444822788238525, - "learning_rate": 2.3766523095202735e-05, - "loss": 0.081, + "epoch": 4.155034155034155, + "grad_norm": 1.0221737623214722, + "learning_rate": 5.06979506979507e-06, + "loss": 0.0431, "step": 13990 }, { - "epoch": 1.0396554284865587, - "grad_norm": 2.1153526306152344, - "learning_rate": 2.3762067429080647e-05, - "loss": 0.0752, + "epoch": 4.158004158004158, + "grad_norm": 0.5151529312133789, + "learning_rate": 5.051975051975052e-06, + "loss": 0.0446, "step": 14000 }, { - "epoch": 1.0403980395069063, - "grad_norm": 2.4944326877593994, - "learning_rate": 2.3757611762958562e-05, - "loss": 0.1094, + "epoch": 4.160974160974161, + "grad_norm": 0.5607339143753052, + "learning_rate": 5.034155034155034e-06, + "loss": 0.0582, "step": 14010 }, { - "epoch": 1.0411406505272538, - "grad_norm": 2.197497606277466, - "learning_rate": 2.375315609683648e-05, - "loss": 0.0758, + "epoch": 4.163944163944164, + "grad_norm": 0.5247116088867188, + "learning_rate": 5.0163350163350165e-06, + "loss": 0.0601, "step": 14020 }, { - "epoch": 1.0418832615476015, - "grad_norm": 1.1447632312774658, - "learning_rate": 2.3748700430714392e-05, - "loss": 0.0637, + "epoch": 4.166914166914167, + "grad_norm": 0.5927850604057312, + "learning_rate": 4.998514998514999e-06, + "loss": 0.0519, "step": 14030 }, { - "epoch": 1.042625872567949, - "grad_norm": 2.18037486076355, - "learning_rate": 2.3744244764592307e-05, - "loss": 0.0899, + "epoch": 4.1698841698841695, + "grad_norm": 1.483964443206787, + "learning_rate": 4.980694980694981e-06, + "loss": 0.0551, "step": 14040 }, { - "epoch": 1.0433684835882964, - "grad_norm": 1.8311865329742432, - "learning_rate": 2.3739789098470222e-05, - "loss": 0.0924, + "epoch": 4.172854172854173, + "grad_norm": 0.7660327553749084, + "learning_rate": 4.962874962874963e-06, + "loss": 0.0511, "step": 14050 }, { - "epoch": 1.044111094608644, - "grad_norm": 1.3843854665756226, - "learning_rate": 2.3735333432348137e-05, - "loss": 0.0737, + "epoch": 4.175824175824176, + "grad_norm": 0.7715831995010376, + "learning_rate": 4.945054945054945e-06, + "loss": 0.069, "step": 14060 }, { - "epoch": 1.0448537056289915, - "grad_norm": 2.358302116394043, - "learning_rate": 2.3730877766226052e-05, - "loss": 0.1003, + "epoch": 4.1787941787941785, + "grad_norm": 0.8233699798583984, + "learning_rate": 4.927234927234927e-06, + "loss": 0.0532, "step": 14070 }, { - "epoch": 1.0455963166493392, - "grad_norm": 0.5193414688110352, - "learning_rate": 2.3726422100103964e-05, - "loss": 0.0753, + "epoch": 4.181764181764182, + "grad_norm": 1.1859806776046753, + "learning_rate": 4.9094149094149095e-06, + "loss": 0.0636, "step": 14080 }, { - "epoch": 1.0463389276696866, - "grad_norm": 2.2045769691467285, - "learning_rate": 2.3721966433981882e-05, - "loss": 0.0484, + "epoch": 4.184734184734185, + "grad_norm": 0.6633349061012268, + "learning_rate": 4.891594891594892e-06, + "loss": 0.0535, "step": 14090 }, { - "epoch": 1.047081538690034, - "grad_norm": 1.636837124824524, - "learning_rate": 2.3717510767859797e-05, - "loss": 0.0846, + "epoch": 4.187704187704187, + "grad_norm": 0.6353622674942017, + "learning_rate": 4.873774873774874e-06, + "loss": 0.0511, "step": 14100 }, { - "epoch": 1.0478241497103817, - "grad_norm": 2.898538112640381, - "learning_rate": 2.371305510173771e-05, - "loss": 0.1106, + "epoch": 4.190674190674191, + "grad_norm": 0.7265152335166931, + "learning_rate": 4.855954855954856e-06, + "loss": 0.0562, "step": 14110 }, { - "epoch": 1.0485667607307292, - "grad_norm": 1.6339201927185059, - "learning_rate": 2.3708599435615627e-05, - "loss": 0.0969, + "epoch": 4.193644193644194, + "grad_norm": 0.8203964233398438, + "learning_rate": 4.838134838134839e-06, + "loss": 0.062, "step": 14120 }, { - "epoch": 1.0493093717510769, - "grad_norm": 4.053697109222412, - "learning_rate": 2.3704143769493542e-05, - "loss": 0.0898, + "epoch": 4.196614196614196, + "grad_norm": 0.6999956369400024, + "learning_rate": 4.82031482031482e-06, + "loss": 0.0468, "step": 14130 }, { - "epoch": 1.0500519827714243, - "grad_norm": 2.793971300125122, - "learning_rate": 2.3699688103371454e-05, - "loss": 0.0996, + "epoch": 4.1995841995842, + "grad_norm": 0.7191495299339294, + "learning_rate": 4.8024948024948025e-06, + "loss": 0.0521, "step": 14140 }, { - "epoch": 1.050794593791772, - "grad_norm": 1.560320258140564, - "learning_rate": 2.369523243724937e-05, - "loss": 0.1084, + "epoch": 4.202554202554203, + "grad_norm": 1.0970999002456665, + "learning_rate": 4.784674784674785e-06, + "loss": 0.0538, "step": 14150 }, { - "epoch": 1.0515372048121194, - "grad_norm": 2.7155508995056152, - "learning_rate": 2.3690776771127284e-05, - "loss": 0.0503, + "epoch": 4.205524205524205, + "grad_norm": 0.8598058223724365, + "learning_rate": 4.766854766854767e-06, + "loss": 0.0607, "step": 14160 }, { - "epoch": 1.0522798158324669, - "grad_norm": 1.7181273698806763, - "learning_rate": 2.36863211050052e-05, - "loss": 0.0793, + "epoch": 4.2084942084942085, + "grad_norm": 0.6349924206733704, + "learning_rate": 4.749034749034749e-06, + "loss": 0.0684, "step": 14170 }, { - "epoch": 1.0530224268528146, - "grad_norm": 3.6145498752593994, - "learning_rate": 2.3681865438883114e-05, - "loss": 0.0727, + "epoch": 4.211464211464212, + "grad_norm": 0.836054265499115, + "learning_rate": 4.731214731214731e-06, + "loss": 0.0429, "step": 14180 }, { - "epoch": 1.053765037873162, - "grad_norm": 1.1024489402770996, - "learning_rate": 2.367740977276103e-05, - "loss": 0.0675, + "epoch": 4.214434214434214, + "grad_norm": 1.0408257246017456, + "learning_rate": 4.713394713394714e-06, + "loss": 0.0579, "step": 14190 }, { - "epoch": 1.0545076488935097, - "grad_norm": 1.619287371635437, - "learning_rate": 2.3672954106638944e-05, - "loss": 0.0636, + "epoch": 4.2174042174042174, + "grad_norm": 0.69316166639328, + "learning_rate": 4.6955746955746955e-06, + "loss": 0.0506, "step": 14200 }, { - "epoch": 1.0552502599138571, - "grad_norm": 2.327834367752075, - "learning_rate": 2.366849844051686e-05, - "loss": 0.0938, + "epoch": 4.220374220374221, + "grad_norm": 0.9772447347640991, + "learning_rate": 4.677754677754678e-06, + "loss": 0.0597, "step": 14210 }, { - "epoch": 1.0559928709342046, - "grad_norm": 2.3670992851257324, - "learning_rate": 2.366404277439477e-05, - "loss": 0.0786, + "epoch": 4.223344223344223, + "grad_norm": 0.7558261156082153, + "learning_rate": 4.65993465993466e-06, + "loss": 0.0596, "step": 14220 }, { - "epoch": 1.0567354819545522, - "grad_norm": 2.3187897205352783, - "learning_rate": 2.365958710827269e-05, - "loss": 0.1029, + "epoch": 4.226314226314226, + "grad_norm": 1.0452693700790405, + "learning_rate": 4.642114642114642e-06, + "loss": 0.0593, "step": 14230 }, { - "epoch": 1.0574780929748997, - "grad_norm": 2.0554943084716797, - "learning_rate": 2.3655131442150604e-05, - "loss": 0.0771, + "epoch": 4.22928422928423, + "grad_norm": 0.8529985547065735, + "learning_rate": 4.624294624294624e-06, + "loss": 0.0565, "step": 14240 }, { - "epoch": 1.0582207039952474, - "grad_norm": 1.3860465288162231, - "learning_rate": 2.3650675776028515e-05, - "loss": 0.0645, + "epoch": 4.232254232254232, + "grad_norm": 0.7685134410858154, + "learning_rate": 4.606474606474606e-06, + "loss": 0.0399, "step": 14250 }, { - "epoch": 1.0589633150155948, - "grad_norm": 0.33183351159095764, - "learning_rate": 2.3646220109906434e-05, - "loss": 0.0551, + "epoch": 4.235224235224235, + "grad_norm": 0.6068404912948608, + "learning_rate": 4.588654588654589e-06, + "loss": 0.0574, "step": 14260 }, { - "epoch": 1.0597059260359423, - "grad_norm": 0.7407335042953491, - "learning_rate": 2.3641764443784345e-05, - "loss": 0.1163, + "epoch": 4.238194238194239, + "grad_norm": 0.9283270835876465, + "learning_rate": 4.570834570834571e-06, + "loss": 0.0578, "step": 14270 }, { - "epoch": 1.06044853705629, - "grad_norm": 1.9329426288604736, - "learning_rate": 2.363730877766226e-05, - "loss": 0.0972, + "epoch": 4.241164241164241, + "grad_norm": 1.1383968591690063, + "learning_rate": 4.553014553014554e-06, + "loss": 0.0581, "step": 14280 }, { - "epoch": 1.0611911480766374, - "grad_norm": 0.46645310521125793, - "learning_rate": 2.363285311154018e-05, - "loss": 0.0547, + "epoch": 4.244134244134244, + "grad_norm": 1.4468823671340942, + "learning_rate": 4.535194535194535e-06, + "loss": 0.0472, "step": 14290 }, { - "epoch": 1.061933759096985, - "grad_norm": 0.9803817272186279, - "learning_rate": 2.362839744541809e-05, - "loss": 0.0556, + "epoch": 4.2471042471042475, + "grad_norm": 0.7596200108528137, + "learning_rate": 4.517374517374517e-06, + "loss": 0.0529, "step": 14300 }, { - "epoch": 1.0626763701173325, - "grad_norm": 2.7999866008758545, - "learning_rate": 2.3623941779296005e-05, - "loss": 0.0965, + "epoch": 4.25007425007425, + "grad_norm": 0.31641915440559387, + "learning_rate": 4.499554499554499e-06, + "loss": 0.0515, "step": 14310 }, { - "epoch": 1.0634189811376802, - "grad_norm": 0.8801766037940979, - "learning_rate": 2.361948611317392e-05, - "loss": 0.0768, + "epoch": 4.253044253044253, + "grad_norm": 0.7532891035079956, + "learning_rate": 4.481734481734482e-06, + "loss": 0.0618, "step": 14320 }, { - "epoch": 1.0641615921580276, - "grad_norm": 1.0280200242996216, - "learning_rate": 2.3615030447051835e-05, - "loss": 0.0472, + "epoch": 4.256014256014256, + "grad_norm": 0.5696304440498352, + "learning_rate": 4.463914463914465e-06, + "loss": 0.0508, "step": 14330 }, { - "epoch": 1.064904203178375, - "grad_norm": 1.479634165763855, - "learning_rate": 2.361057478092975e-05, - "loss": 0.0671, + "epoch": 4.258984258984259, + "grad_norm": 0.7461338639259338, + "learning_rate": 4.446094446094446e-06, + "loss": 0.0619, "step": 14340 }, { - "epoch": 1.0656468141987228, - "grad_norm": 0.7711525559425354, - "learning_rate": 2.3606119114807662e-05, - "loss": 0.0725, + "epoch": 4.261954261954262, + "grad_norm": 0.6547114849090576, + "learning_rate": 4.428274428274429e-06, + "loss": 0.054, "step": 14350 }, { - "epoch": 1.0663894252190702, - "grad_norm": 2.1043448448181152, - "learning_rate": 2.360166344868558e-05, - "loss": 0.0788, + "epoch": 4.2649242649242645, + "grad_norm": 0.47888076305389404, + "learning_rate": 4.41045441045441e-06, + "loss": 0.0528, "step": 14360 }, { - "epoch": 1.0671320362394179, - "grad_norm": 1.8969895839691162, - "learning_rate": 2.3597207782563495e-05, - "loss": 0.0519, + "epoch": 4.267894267894268, + "grad_norm": 0.500554084777832, + "learning_rate": 4.392634392634393e-06, + "loss": 0.0508, "step": 14370 }, { - "epoch": 1.0678746472597653, - "grad_norm": 3.172367811203003, - "learning_rate": 2.3592752116441407e-05, - "loss": 0.1093, + "epoch": 4.270864270864271, + "grad_norm": 0.8352609276771545, + "learning_rate": 4.374814374814375e-06, + "loss": 0.0517, "step": 14380 }, { - "epoch": 1.0686172582801128, - "grad_norm": 2.328660726547241, - "learning_rate": 2.358829645031932e-05, - "loss": 0.0743, + "epoch": 4.273834273834273, + "grad_norm": 0.8386610150337219, + "learning_rate": 4.356994356994357e-06, + "loss": 0.0537, "step": 14390 }, { - "epoch": 1.0693598693004605, - "grad_norm": 1.0215742588043213, - "learning_rate": 2.358384078419724e-05, - "loss": 0.0577, + "epoch": 4.276804276804277, + "grad_norm": 0.6090155243873596, + "learning_rate": 4.33917433917434e-06, + "loss": 0.0624, "step": 14400 }, { - "epoch": 1.070102480320808, - "grad_norm": 2.7807462215423584, - "learning_rate": 2.357938511807515e-05, - "loss": 0.0538, + "epoch": 4.27977427977428, + "grad_norm": 0.5954572558403015, + "learning_rate": 4.321354321354321e-06, + "loss": 0.0562, "step": 14410 }, { - "epoch": 1.0708450913411556, - "grad_norm": 2.544025421142578, - "learning_rate": 2.3574929451953067e-05, - "loss": 0.0984, + "epoch": 4.282744282744282, + "grad_norm": 1.196431279182434, + "learning_rate": 4.303534303534304e-06, + "loss": 0.0625, "step": 14420 }, { - "epoch": 1.071587702361503, - "grad_norm": 1.8840546607971191, - "learning_rate": 2.3570473785830985e-05, - "loss": 0.0891, + "epoch": 4.285714285714286, + "grad_norm": 0.8351851105690002, + "learning_rate": 4.2857142857142855e-06, + "loss": 0.0473, "step": 14430 }, { - "epoch": 1.0723303133818507, - "grad_norm": 1.2693723440170288, - "learning_rate": 2.3566018119708897e-05, - "loss": 0.098, + "epoch": 4.288684288684289, + "grad_norm": 1.5514023303985596, + "learning_rate": 4.2678942678942685e-06, + "loss": 0.0521, "step": 14440 }, { - "epoch": 1.0730729244021981, - "grad_norm": 2.465930938720703, - "learning_rate": 2.356156245358681e-05, - "loss": 0.1154, + "epoch": 4.291654291654291, + "grad_norm": 0.5265604257583618, + "learning_rate": 4.25007425007425e-06, + "loss": 0.0527, "step": 14450 }, { - "epoch": 1.0738155354225456, - "grad_norm": 3.5287487506866455, - "learning_rate": 2.3557106787464727e-05, - "loss": 0.0794, + "epoch": 4.2946242946242945, + "grad_norm": 0.746711015701294, + "learning_rate": 4.232254232254232e-06, + "loss": 0.0539, "step": 14460 }, { - "epoch": 1.0745581464428933, - "grad_norm": 2.1311392784118652, - "learning_rate": 2.355265112134264e-05, - "loss": 0.0911, + "epoch": 4.297594297594298, + "grad_norm": 1.2712050676345825, + "learning_rate": 4.214434214434215e-06, + "loss": 0.061, "step": 14470 }, { - "epoch": 1.0753007574632407, - "grad_norm": 1.7194443941116333, - "learning_rate": 2.3548195455220557e-05, - "loss": 0.0689, + "epoch": 4.3005643005643, + "grad_norm": 0.7480892539024353, + "learning_rate": 4.196614196614196e-06, + "loss": 0.0546, "step": 14480 }, { - "epoch": 1.0760433684835884, - "grad_norm": 1.9258701801300049, - "learning_rate": 2.3543739789098468e-05, - "loss": 0.0488, + "epoch": 4.303534303534303, + "grad_norm": 0.9045110940933228, + "learning_rate": 4.178794178794179e-06, + "loss": 0.0528, "step": 14490 }, { - "epoch": 1.0767859795039358, - "grad_norm": 1.2407386302947998, - "learning_rate": 2.3539284122976387e-05, - "loss": 0.066, + "epoch": 4.306504306504307, + "grad_norm": 0.7714174389839172, + "learning_rate": 4.160974160974161e-06, + "loss": 0.0498, "step": 14500 }, { - "epoch": 1.0775285905242833, - "grad_norm": 0.2325424998998642, - "learning_rate": 2.35348284568543e-05, - "loss": 0.0667, + "epoch": 4.309474309474309, + "grad_norm": 0.9806069731712341, + "learning_rate": 4.143154143154144e-06, + "loss": 0.0642, "step": 14510 }, { - "epoch": 1.078271201544631, - "grad_norm": 0.7023411989212036, - "learning_rate": 2.3530372790732213e-05, - "loss": 0.0774, + "epoch": 4.312444312444312, + "grad_norm": 0.8302409052848816, + "learning_rate": 4.125334125334125e-06, + "loss": 0.0682, "step": 14520 }, { - "epoch": 1.0790138125649784, - "grad_norm": 0.42877697944641113, - "learning_rate": 2.352591712461013e-05, - "loss": 0.0441, + "epoch": 4.315414315414316, + "grad_norm": 1.0357773303985596, + "learning_rate": 4.107514107514108e-06, + "loss": 0.0543, "step": 14530 }, { - "epoch": 1.079756423585326, - "grad_norm": 0.8632937669754028, - "learning_rate": 2.3521461458488047e-05, - "loss": 0.0564, + "epoch": 4.318384318384318, + "grad_norm": 0.711145281791687, + "learning_rate": 4.08969408969409e-06, + "loss": 0.0546, "step": 14540 }, { - "epoch": 1.0804990346056735, - "grad_norm": 7.092894077301025, - "learning_rate": 2.3517005792365958e-05, - "loss": 0.0802, + "epoch": 4.321354321354321, + "grad_norm": 0.4952409863471985, + "learning_rate": 4.0718740718740715e-06, + "loss": 0.0483, "step": 14550 }, { - "epoch": 1.081241645626021, - "grad_norm": 2.007236957550049, - "learning_rate": 2.3512550126243873e-05, - "loss": 0.1011, + "epoch": 4.324324324324325, + "grad_norm": 0.7028197050094604, + "learning_rate": 4.0540540540540545e-06, + "loss": 0.0449, "step": 14560 }, { - "epoch": 1.0819842566463687, - "grad_norm": 2.4065308570861816, - "learning_rate": 2.3508094460121788e-05, - "loss": 0.0985, + "epoch": 4.327294327294327, + "grad_norm": 0.7618030905723572, + "learning_rate": 4.036234036234036e-06, + "loss": 0.0533, "step": 14570 }, { - "epoch": 1.082726867666716, - "grad_norm": 4.827093601226807, - "learning_rate": 2.3503638793999703e-05, - "loss": 0.1154, + "epoch": 4.33026433026433, + "grad_norm": 0.4399206340312958, + "learning_rate": 4.018414018414019e-06, + "loss": 0.0504, "step": 14580 }, { - "epoch": 1.0834694786870638, - "grad_norm": 2.402787208557129, - "learning_rate": 2.3499183127877618e-05, - "loss": 0.0893, + "epoch": 4.3332343332343335, + "grad_norm": 0.4202236533164978, + "learning_rate": 4.000594000594e-06, + "loss": 0.0553, "step": 14590 }, { - "epoch": 1.0842120897074112, - "grad_norm": 1.0034596920013428, - "learning_rate": 2.3494727461755533e-05, - "loss": 0.1013, + "epoch": 4.336204336204336, + "grad_norm": 0.4028279185295105, + "learning_rate": 3.982773982773983e-06, + "loss": 0.0548, "step": 14600 }, { - "epoch": 1.084954700727759, - "grad_norm": 1.7055171728134155, - "learning_rate": 2.3490271795633448e-05, - "loss": 0.0918, + "epoch": 4.339174339174339, + "grad_norm": 0.7991787195205688, + "learning_rate": 3.964953964953965e-06, + "loss": 0.0462, "step": 14610 }, { - "epoch": 1.0856973117481064, - "grad_norm": 2.0638813972473145, - "learning_rate": 2.3485816129511363e-05, - "loss": 0.0888, + "epoch": 4.342144342144342, + "grad_norm": 0.8941323757171631, + "learning_rate": 3.947133947133947e-06, + "loss": 0.0396, "step": 14620 }, { - "epoch": 1.0864399227684538, - "grad_norm": 1.2625740766525269, - "learning_rate": 2.3481360463389275e-05, - "loss": 0.0901, + "epoch": 4.345114345114345, + "grad_norm": 0.6208845973014832, + "learning_rate": 3.92931392931393e-06, + "loss": 0.0491, "step": 14630 }, { - "epoch": 1.0871825337888015, - "grad_norm": 1.9347448348999023, - "learning_rate": 2.3476904797267193e-05, - "loss": 0.0508, + "epoch": 4.348084348084348, + "grad_norm": 0.7600147724151611, + "learning_rate": 3.911493911493911e-06, + "loss": 0.0625, "step": 14640 }, { - "epoch": 1.087925144809149, - "grad_norm": 1.2374624013900757, - "learning_rate": 2.3472449131145108e-05, - "loss": 0.0544, + "epoch": 4.351054351054351, + "grad_norm": 0.4878835678100586, + "learning_rate": 3.893673893673894e-06, + "loss": 0.0517, "step": 14650 }, { - "epoch": 1.0886677558294966, - "grad_norm": 1.0130029916763306, - "learning_rate": 2.346799346502302e-05, - "loss": 0.0371, + "epoch": 4.354024354024354, + "grad_norm": 0.7598239183425903, + "learning_rate": 3.875853875853875e-06, + "loss": 0.0534, "step": 14660 }, { - "epoch": 1.089410366849844, - "grad_norm": 2.272545576095581, - "learning_rate": 2.3463537798900938e-05, - "loss": 0.0668, + "epoch": 4.356994356994357, + "grad_norm": 1.0485094785690308, + "learning_rate": 3.858033858033858e-06, + "loss": 0.0429, "step": 14670 }, { - "epoch": 1.0901529778701915, - "grad_norm": 1.1492127180099487, - "learning_rate": 2.345908213277885e-05, - "loss": 0.0909, + "epoch": 4.35996435996436, + "grad_norm": 1.0490139722824097, + "learning_rate": 3.8402138402138406e-06, + "loss": 0.0608, "step": 14680 }, { - "epoch": 1.0908955888905392, - "grad_norm": 0.4936734735965729, - "learning_rate": 2.3454626466656765e-05, - "loss": 0.0579, + "epoch": 4.362934362934363, + "grad_norm": 0.6799785494804382, + "learning_rate": 3.822393822393823e-06, + "loss": 0.0582, "step": 14690 }, { - "epoch": 1.0916381999108866, - "grad_norm": 1.2032221555709839, - "learning_rate": 2.3450170800534683e-05, - "loss": 0.0953, + "epoch": 4.365904365904366, + "grad_norm": 0.6751995086669922, + "learning_rate": 3.804573804573805e-06, + "loss": 0.0436, "step": 14700 }, { - "epoch": 1.0923808109312343, - "grad_norm": 1.325255036354065, - "learning_rate": 2.3445715134412595e-05, - "loss": 0.0653, + "epoch": 4.368874368874369, + "grad_norm": 0.8852332830429077, + "learning_rate": 3.7867537867537867e-06, + "loss": 0.0505, "step": 14710 }, { - "epoch": 1.0931234219515817, - "grad_norm": 2.2962052822113037, - "learning_rate": 2.344125946829051e-05, - "loss": 0.0618, + "epoch": 4.371844371844372, + "grad_norm": 1.6373037099838257, + "learning_rate": 3.7689337689337693e-06, + "loss": 0.0592, "step": 14720 }, { - "epoch": 1.0938660329719294, - "grad_norm": 0.9786393642425537, - "learning_rate": 2.3436803802168425e-05, - "loss": 0.0784, + "epoch": 4.374814374814375, + "grad_norm": 0.8025867342948914, + "learning_rate": 3.751113751113751e-06, + "loss": 0.0429, "step": 14730 }, { - "epoch": 1.0946086439922769, - "grad_norm": 1.7134709358215332, - "learning_rate": 2.343234813604634e-05, - "loss": 0.0592, + "epoch": 4.377784377784378, + "grad_norm": 1.4527711868286133, + "learning_rate": 3.733293733293733e-06, + "loss": 0.0479, "step": 14740 }, { - "epoch": 1.0953512550126243, - "grad_norm": 0.9471766948699951, - "learning_rate": 2.3427892469924255e-05, - "loss": 0.112, + "epoch": 4.3807543807543805, + "grad_norm": 0.726428210735321, + "learning_rate": 3.7154737154737153e-06, + "loss": 0.0496, "step": 14750 }, { - "epoch": 1.096093866032972, - "grad_norm": 0.9687842130661011, - "learning_rate": 2.342343680380217e-05, - "loss": 0.0799, + "epoch": 4.383724383724384, + "grad_norm": 0.6071370840072632, + "learning_rate": 3.6976536976536975e-06, + "loss": 0.0461, "step": 14760 }, { - "epoch": 1.0968364770533194, - "grad_norm": 1.6770538091659546, - "learning_rate": 2.3418981137680085e-05, - "loss": 0.0894, + "epoch": 4.386694386694387, + "grad_norm": 0.7492642998695374, + "learning_rate": 3.67983367983368e-06, + "loss": 0.0476, "step": 14770 }, { - "epoch": 1.097579088073667, - "grad_norm": 2.252725124359131, - "learning_rate": 2.3414525471558e-05, - "loss": 0.0903, + "epoch": 4.389664389664389, + "grad_norm": 0.6537764072418213, + "learning_rate": 3.6620136620136623e-06, + "loss": 0.0514, "step": 14780 }, { - "epoch": 1.0983216990940146, - "grad_norm": 1.531714677810669, - "learning_rate": 2.341006980543591e-05, - "loss": 0.0644, + "epoch": 4.392634392634393, + "grad_norm": 0.9229190349578857, + "learning_rate": 3.6441936441936444e-06, + "loss": 0.0522, "step": 14790 }, { - "epoch": 1.099064310114362, - "grad_norm": 2.1551620960235596, - "learning_rate": 2.3405614139313826e-05, - "loss": 0.073, + "epoch": 4.395604395604396, + "grad_norm": 0.7735339999198914, + "learning_rate": 3.6263736263736266e-06, + "loss": 0.0492, "step": 14800 }, { - "epoch": 1.0998069211347097, - "grad_norm": 1.436985969543457, - "learning_rate": 2.3401158473191745e-05, - "loss": 0.0695, + "epoch": 4.398574398574398, + "grad_norm": 0.7553290128707886, + "learning_rate": 3.6085536085536088e-06, + "loss": 0.0399, "step": 14810 }, { - "epoch": 1.1005495321550571, - "grad_norm": 2.5473999977111816, - "learning_rate": 2.3396702807069656e-05, - "loss": 0.037, + "epoch": 4.401544401544402, + "grad_norm": 0.988293468952179, + "learning_rate": 3.5907335907335905e-06, + "loss": 0.0616, "step": 14820 }, { - "epoch": 1.1012921431754048, - "grad_norm": 2.2128050327301025, - "learning_rate": 2.339224714094757e-05, - "loss": 0.0767, + "epoch": 4.404514404514405, + "grad_norm": 0.6976864337921143, + "learning_rate": 3.5729135729135727e-06, + "loss": 0.045, "step": 14830 }, { - "epoch": 1.1020347541957523, - "grad_norm": 0.8083871006965637, - "learning_rate": 2.338779147482549e-05, - "loss": 0.0557, + "epoch": 4.407484407484407, + "grad_norm": 1.1107743978500366, + "learning_rate": 3.5550935550935553e-06, + "loss": 0.0592, "step": 14840 }, { - "epoch": 1.1027773652160997, - "grad_norm": 2.2617440223693848, - "learning_rate": 2.33833358087034e-05, - "loss": 0.0922, + "epoch": 4.410454410454411, + "grad_norm": 0.5012500286102295, + "learning_rate": 3.5372735372735375e-06, + "loss": 0.0691, "step": 14850 }, { - "epoch": 1.1035199762364474, - "grad_norm": 0.7747202515602112, - "learning_rate": 2.3378880142581316e-05, - "loss": 0.0625, + "epoch": 4.413424413424414, + "grad_norm": 0.6141711473464966, + "learning_rate": 3.5194535194535196e-06, + "loss": 0.0504, "step": 14860 }, { - "epoch": 1.1042625872567948, - "grad_norm": 1.1605490446090698, - "learning_rate": 2.337442447645923e-05, - "loss": 0.0792, + "epoch": 4.416394416394416, + "grad_norm": 0.9581981897354126, + "learning_rate": 3.501633501633502e-06, + "loss": 0.0506, "step": 14870 }, { - "epoch": 1.1050051982771425, - "grad_norm": 1.8753223419189453, - "learning_rate": 2.3369968810337146e-05, - "loss": 0.0943, + "epoch": 4.4193644193644195, + "grad_norm": 0.5101330280303955, + "learning_rate": 3.483813483813484e-06, + "loss": 0.051, "step": 14880 }, { - "epoch": 1.10574780929749, - "grad_norm": 0.9175904393196106, - "learning_rate": 2.336551314421506e-05, - "loss": 0.0921, + "epoch": 4.422334422334423, + "grad_norm": 1.3231449127197266, + "learning_rate": 3.465993465993466e-06, + "loss": 0.0683, "step": 14890 }, { - "epoch": 1.1064904203178376, - "grad_norm": 0.9853323101997375, - "learning_rate": 2.3361057478092973e-05, - "loss": 0.0689, + "epoch": 4.425304425304425, + "grad_norm": 0.7032347917556763, + "learning_rate": 3.448173448173448e-06, + "loss": 0.0544, "step": 14900 }, { - "epoch": 1.107233031338185, - "grad_norm": 1.734784722328186, - "learning_rate": 2.335660181197089e-05, - "loss": 0.0748, + "epoch": 4.428274428274428, + "grad_norm": 1.0103317499160767, + "learning_rate": 3.4303534303534305e-06, + "loss": 0.0686, "step": 14910 }, { - "epoch": 1.1079756423585325, - "grad_norm": 3.9313881397247314, - "learning_rate": 2.3352146145848806e-05, - "loss": 0.0829, + "epoch": 4.431244431244432, + "grad_norm": 0.8349617719650269, + "learning_rate": 3.4125334125334127e-06, + "loss": 0.0532, "step": 14920 }, { - "epoch": 1.1087182533788802, - "grad_norm": 0.6658304929733276, - "learning_rate": 2.3347690479726718e-05, - "loss": 0.0597, + "epoch": 4.434214434214434, + "grad_norm": 0.7502866983413696, + "learning_rate": 3.394713394713395e-06, + "loss": 0.0585, "step": 14930 }, { - "epoch": 1.1094608643992276, - "grad_norm": 1.8570702075958252, - "learning_rate": 2.3343234813604636e-05, - "loss": 0.0969, + "epoch": 4.437184437184437, + "grad_norm": 0.6169337630271912, + "learning_rate": 3.376893376893377e-06, + "loss": 0.0643, "step": 14940 }, { - "epoch": 1.1102034754195753, - "grad_norm": 2.4052164554595947, - "learning_rate": 2.333877914748255e-05, - "loss": 0.0937, + "epoch": 4.440154440154441, + "grad_norm": 1.1160919666290283, + "learning_rate": 3.359073359073359e-06, + "loss": 0.0545, "step": 14950 }, { - "epoch": 1.1109460864399228, - "grad_norm": 2.2822628021240234, - "learning_rate": 2.3334323481360463e-05, - "loss": 0.086, + "epoch": 4.443124443124443, + "grad_norm": 1.2266151905059814, + "learning_rate": 3.3412533412533413e-06, + "loss": 0.0514, "step": 14960 }, { - "epoch": 1.1116886974602702, - "grad_norm": 4.604571342468262, - "learning_rate": 2.3329867815238378e-05, - "loss": 0.1085, + "epoch": 4.446094446094446, + "grad_norm": 0.4717876613140106, + "learning_rate": 3.3234333234333235e-06, + "loss": 0.0492, "step": 14970 }, { - "epoch": 1.1124313084806179, - "grad_norm": 2.1120212078094482, - "learning_rate": 2.3325412149116293e-05, - "loss": 0.0738, + "epoch": 4.4490644490644495, + "grad_norm": 0.7887519598007202, + "learning_rate": 3.3056133056133057e-06, + "loss": 0.053, "step": 14980 }, { - "epoch": 1.1131739195009653, - "grad_norm": 2.652463912963867, - "learning_rate": 2.3320956482994208e-05, - "loss": 0.0612, + "epoch": 4.452034452034452, + "grad_norm": 0.6707944273948669, + "learning_rate": 3.287793287793288e-06, + "loss": 0.0636, "step": 14990 }, { - "epoch": 1.113916530521313, - "grad_norm": 1.0508460998535156, - "learning_rate": 2.3316500816872123e-05, - "loss": 0.0624, + "epoch": 4.455004455004455, + "grad_norm": 0.632908821105957, + "learning_rate": 3.26997326997327e-06, + "loss": 0.0563, "step": 15000 }, { - "epoch": 1.1146591415416605, - "grad_norm": 2.2255547046661377, - "learning_rate": 2.3312045150750038e-05, - "loss": 0.0836, + "epoch": 4.457974457974458, + "grad_norm": 0.597366213798523, + "learning_rate": 3.252153252153252e-06, + "loss": 0.0605, "step": 15010 }, { - "epoch": 1.1154017525620081, - "grad_norm": 1.7754733562469482, - "learning_rate": 2.3307589484627953e-05, - "loss": 0.1167, + "epoch": 4.460944460944461, + "grad_norm": 0.9385949969291687, + "learning_rate": 3.2343332343332344e-06, + "loss": 0.0562, "step": 15020 }, { - "epoch": 1.1161443635823556, - "grad_norm": 1.857144832611084, - "learning_rate": 2.3303133818505868e-05, - "loss": 0.0596, + "epoch": 4.463914463914464, + "grad_norm": 0.7558770179748535, + "learning_rate": 3.2165132165132165e-06, + "loss": 0.0629, "step": 15030 }, { - "epoch": 1.116886974602703, - "grad_norm": 3.7554049491882324, - "learning_rate": 2.3298678152383783e-05, - "loss": 0.0647, + "epoch": 4.4668844668844665, + "grad_norm": 0.5903274416923523, + "learning_rate": 3.1986931986931987e-06, + "loss": 0.0642, "step": 15040 }, { - "epoch": 1.1176295856230507, - "grad_norm": 3.861762762069702, - "learning_rate": 2.3294222486261698e-05, - "loss": 0.083, + "epoch": 4.46985446985447, + "grad_norm": 0.564150869846344, + "learning_rate": 3.1808731808731813e-06, + "loss": 0.0541, "step": 15050 }, { - "epoch": 1.1183721966433982, - "grad_norm": 0.333187997341156, - "learning_rate": 2.3289766820139613e-05, - "loss": 0.0567, + "epoch": 4.472824472824473, + "grad_norm": 0.41265928745269775, + "learning_rate": 3.163053163053163e-06, + "loss": 0.0532, "step": 15060 }, { - "epoch": 1.1191148076637458, - "grad_norm": 3.2010586261749268, - "learning_rate": 2.3285311154017524e-05, - "loss": 0.0925, + "epoch": 4.475794475794475, + "grad_norm": 0.43676692247390747, + "learning_rate": 3.1452331452331452e-06, + "loss": 0.0448, "step": 15070 }, { - "epoch": 1.1198574186840933, - "grad_norm": 3.66748046875, - "learning_rate": 2.3280855487895443e-05, - "loss": 0.0884, + "epoch": 4.478764478764479, + "grad_norm": 0.9410478472709656, + "learning_rate": 3.1274131274131274e-06, + "loss": 0.0521, "step": 15080 }, { - "epoch": 1.1206000297044407, - "grad_norm": 0.8364987969398499, - "learning_rate": 2.3276399821773354e-05, - "loss": 0.0644, + "epoch": 4.481734481734482, + "grad_norm": 0.7335745096206665, + "learning_rate": 3.1095931095931096e-06, + "loss": 0.0431, "step": 15090 }, { - "epoch": 1.1213426407247884, - "grad_norm": 2.9807636737823486, - "learning_rate": 2.327194415565127e-05, - "loss": 0.0687, + "epoch": 4.484704484704484, + "grad_norm": 0.8039221167564392, + "learning_rate": 3.0917730917730917e-06, + "loss": 0.0561, "step": 15100 }, { - "epoch": 1.1220852517451358, - "grad_norm": 0.977165937423706, - "learning_rate": 2.3267488489529188e-05, - "loss": 0.0714, + "epoch": 4.487674487674488, + "grad_norm": 0.7728500366210938, + "learning_rate": 3.073953073953074e-06, + "loss": 0.0487, "step": 15110 }, { - "epoch": 1.1228278627654835, - "grad_norm": 1.4593790769577026, - "learning_rate": 2.32630328234071e-05, - "loss": 0.0698, + "epoch": 4.490644490644491, + "grad_norm": 0.8309100270271301, + "learning_rate": 3.0561330561330565e-06, + "loss": 0.0506, "step": 15120 }, { - "epoch": 1.123570473785831, - "grad_norm": 0.9217209219932556, - "learning_rate": 2.3258577157285014e-05, - "loss": 0.0764, + "epoch": 4.493614493614493, + "grad_norm": 0.8935027718544006, + "learning_rate": 3.0383130383130387e-06, + "loss": 0.0479, "step": 15130 }, { - "epoch": 1.1243130848061784, - "grad_norm": 1.4398505687713623, - "learning_rate": 2.325412149116293e-05, - "loss": 0.1199, + "epoch": 4.4965844965844965, + "grad_norm": 1.1319783926010132, + "learning_rate": 3.0204930204930204e-06, + "loss": 0.0542, "step": 15140 }, { - "epoch": 1.125055695826526, - "grad_norm": 1.7291533946990967, - "learning_rate": 2.3249665825040844e-05, - "loss": 0.0899, + "epoch": 4.4995544995545, + "grad_norm": 0.41596898436546326, + "learning_rate": 3.0026730026730026e-06, + "loss": 0.0375, "step": 15150 }, { - "epoch": 1.1257983068468735, - "grad_norm": 2.176523208618164, - "learning_rate": 2.324521015891876e-05, - "loss": 0.0746, + "epoch": 4.502524502524502, + "grad_norm": 1.0437852144241333, + "learning_rate": 2.9848529848529848e-06, + "loss": 0.0566, "step": 15160 }, { - "epoch": 1.1265409178672212, - "grad_norm": 2.276003122329712, - "learning_rate": 2.3240754492796674e-05, - "loss": 0.1026, + "epoch": 4.5054945054945055, + "grad_norm": 0.41360408067703247, + "learning_rate": 2.967032967032967e-06, + "loss": 0.0503, "step": 15170 }, { - "epoch": 1.1272835288875687, - "grad_norm": 2.0524089336395264, - "learning_rate": 2.323629882667459e-05, - "loss": 0.0692, + "epoch": 4.508464508464509, + "grad_norm": 0.5282860994338989, + "learning_rate": 2.949212949212949e-06, + "loss": 0.0495, "step": 15180 }, { - "epoch": 1.1280261399079161, - "grad_norm": 0.9038380980491638, - "learning_rate": 2.3231843160552504e-05, - "loss": 0.0775, + "epoch": 4.511434511434511, + "grad_norm": 0.764944851398468, + "learning_rate": 2.9313929313929317e-06, + "loss": 0.0546, "step": 15190 }, { - "epoch": 1.1287687509282638, - "grad_norm": 2.3695902824401855, - "learning_rate": 2.3227387494430416e-05, - "loss": 0.1008, + "epoch": 4.514404514404514, + "grad_norm": 0.7760726809501648, + "learning_rate": 2.913572913572914e-06, + "loss": 0.0552, "step": 15200 }, { - "epoch": 1.1295113619486112, - "grad_norm": 1.6643588542938232, - "learning_rate": 2.322293182830833e-05, - "loss": 0.0637, + "epoch": 4.517374517374518, + "grad_norm": 0.6407870650291443, + "learning_rate": 2.895752895752896e-06, + "loss": 0.0624, "step": 15210 }, { - "epoch": 1.130253972968959, - "grad_norm": 0.4091331660747528, - "learning_rate": 2.321847616218625e-05, - "loss": 0.0768, + "epoch": 4.52034452034452, + "grad_norm": 0.7598323822021484, + "learning_rate": 2.877932877932878e-06, + "loss": 0.0547, "step": 15220 }, { - "epoch": 1.1309965839893064, - "grad_norm": 2.0745925903320312, - "learning_rate": 2.321402049606416e-05, - "loss": 0.082, + "epoch": 4.523314523314523, + "grad_norm": 0.6085183024406433, + "learning_rate": 2.86011286011286e-06, + "loss": 0.047, "step": 15230 }, { - "epoch": 1.131739195009654, - "grad_norm": 3.2917673587799072, - "learning_rate": 2.3209564829942076e-05, - "loss": 0.1074, + "epoch": 4.526284526284527, + "grad_norm": 0.5744462609291077, + "learning_rate": 2.842292842292842e-06, + "loss": 0.0476, "step": 15240 }, { - "epoch": 1.1324818060300015, - "grad_norm": 2.7250659465789795, - "learning_rate": 2.3205109163819994e-05, - "loss": 0.0742, + "epoch": 4.529254529254529, + "grad_norm": 0.6744916439056396, + "learning_rate": 2.8244728244728243e-06, + "loss": 0.0503, "step": 15250 }, { - "epoch": 1.133224417050349, - "grad_norm": 0.4812146723270416, - "learning_rate": 2.3200653497697906e-05, - "loss": 0.0644, + "epoch": 4.532224532224532, + "grad_norm": 0.939346194267273, + "learning_rate": 2.806652806652807e-06, + "loss": 0.0602, "step": 15260 }, { - "epoch": 1.1339670280706966, - "grad_norm": 1.7859218120574951, - "learning_rate": 2.319619783157582e-05, - "loss": 0.1025, + "epoch": 4.5351945351945355, + "grad_norm": 1.4114429950714111, + "learning_rate": 2.788832788832789e-06, + "loss": 0.0659, "step": 15270 }, { - "epoch": 1.134709639091044, - "grad_norm": 1.6448826789855957, - "learning_rate": 2.3191742165453736e-05, - "loss": 0.0694, + "epoch": 4.538164538164538, + "grad_norm": 0.6457445025444031, + "learning_rate": 2.7710127710127712e-06, + "loss": 0.0554, "step": 15280 }, { - "epoch": 1.1354522501113917, - "grad_norm": 1.5270838737487793, - "learning_rate": 2.318728649933165e-05, - "loss": 0.084, + "epoch": 4.541134541134541, + "grad_norm": 0.6651527881622314, + "learning_rate": 2.7531927531927534e-06, + "loss": 0.0468, "step": 15290 }, { - "epoch": 1.1361948611317392, - "grad_norm": 2.1386685371398926, - "learning_rate": 2.3182830833209566e-05, - "loss": 0.0864, + "epoch": 4.5441045441045445, + "grad_norm": 0.45064395666122437, + "learning_rate": 2.7353727353727356e-06, + "loss": 0.0649, "step": 15300 }, { - "epoch": 1.1369374721520868, - "grad_norm": 2.1466033458709717, - "learning_rate": 2.3178375167087477e-05, - "loss": 0.0792, + "epoch": 4.547074547074547, + "grad_norm": 0.4725130498409271, + "learning_rate": 2.7175527175527173e-06, + "loss": 0.051, "step": 15310 }, { - "epoch": 1.1376800831724343, - "grad_norm": 1.733211874961853, - "learning_rate": 2.3173919500965396e-05, - "loss": 0.0966, + "epoch": 4.55004455004455, + "grad_norm": 0.7358287572860718, + "learning_rate": 2.6997326997326995e-06, + "loss": 0.0481, "step": 15320 }, { - "epoch": 1.1384226941927817, - "grad_norm": 0.9286133646965027, - "learning_rate": 2.316946383484331e-05, - "loss": 0.0736, + "epoch": 4.553014553014553, + "grad_norm": 0.6313060522079468, + "learning_rate": 2.681912681912682e-06, + "loss": 0.0463, "step": 15330 }, { - "epoch": 1.1391653052131294, - "grad_norm": 3.003708839416504, - "learning_rate": 2.3165008168721222e-05, - "loss": 0.1012, + "epoch": 4.555984555984556, + "grad_norm": 1.0503184795379639, + "learning_rate": 2.6640926640926642e-06, + "loss": 0.0597, "step": 15340 }, { - "epoch": 1.1399079162334769, - "grad_norm": 0.6814678311347961, - "learning_rate": 2.316055250259914e-05, - "loss": 0.0611, + "epoch": 4.558954558954559, + "grad_norm": 1.3355445861816406, + "learning_rate": 2.6462726462726464e-06, + "loss": 0.0545, "step": 15350 }, { - "epoch": 1.1406505272538245, - "grad_norm": 1.6721028089523315, - "learning_rate": 2.3156096836477056e-05, - "loss": 0.0972, + "epoch": 4.561924561924562, + "grad_norm": 0.9509602785110474, + "learning_rate": 2.6284526284526286e-06, + "loss": 0.0629, "step": 15360 }, { - "epoch": 1.141393138274172, - "grad_norm": 1.2787104845046997, - "learning_rate": 2.3151641170354967e-05, - "loss": 0.0652, + "epoch": 4.564894564894565, + "grad_norm": 1.1099562644958496, + "learning_rate": 2.6106326106326108e-06, + "loss": 0.0707, "step": 15370 }, { - "epoch": 1.1421357492945194, - "grad_norm": 1.4805560111999512, - "learning_rate": 2.3147185504232882e-05, - "loss": 0.0678, + "epoch": 4.567864567864568, + "grad_norm": 1.1157077550888062, + "learning_rate": 2.592812592812593e-06, + "loss": 0.0609, "step": 15380 }, { - "epoch": 1.1428783603148671, - "grad_norm": 0.9559769034385681, - "learning_rate": 2.3142729838110797e-05, - "loss": 0.0718, + "epoch": 4.57083457083457, + "grad_norm": 0.5642300844192505, + "learning_rate": 2.574992574992575e-06, + "loss": 0.0572, "step": 15390 }, { - "epoch": 1.1436209713352146, - "grad_norm": 2.3336753845214844, - "learning_rate": 2.3138274171988712e-05, - "loss": 0.0716, + "epoch": 4.573804573804574, + "grad_norm": 1.1523653268814087, + "learning_rate": 2.5571725571725573e-06, + "loss": 0.0585, "step": 15400 }, { - "epoch": 1.1443635823555622, - "grad_norm": 1.7206687927246094, - "learning_rate": 2.3133818505866627e-05, - "loss": 0.0532, + "epoch": 4.576774576774577, + "grad_norm": 0.6620275378227234, + "learning_rate": 2.5393525393525394e-06, + "loss": 0.0428, "step": 15410 }, { - "epoch": 1.1451061933759097, - "grad_norm": 4.165546894073486, - "learning_rate": 2.3129362839744542e-05, - "loss": 0.0606, + "epoch": 4.579744579744579, + "grad_norm": 0.8893596529960632, + "learning_rate": 2.5215325215325216e-06, + "loss": 0.0529, "step": 15420 }, { - "epoch": 1.1458488043962571, - "grad_norm": 3.124039888381958, - "learning_rate": 2.3124907173622457e-05, - "loss": 0.111, + "epoch": 4.5827145827145825, + "grad_norm": 0.3965216875076294, + "learning_rate": 2.5037125037125038e-06, + "loss": 0.0501, "step": 15430 }, { - "epoch": 1.1465914154166048, - "grad_norm": 0.9052489995956421, - "learning_rate": 2.3120451507500372e-05, - "loss": 0.0805, + "epoch": 4.585684585684586, + "grad_norm": 0.888308584690094, + "learning_rate": 2.485892485892486e-06, + "loss": 0.0411, "step": 15440 }, { - "epoch": 1.1473340264369523, - "grad_norm": 1.5409538745880127, - "learning_rate": 2.3115995841378287e-05, - "loss": 0.0552, + "epoch": 4.588654588654588, + "grad_norm": 0.7801947593688965, + "learning_rate": 2.468072468072468e-06, + "loss": 0.0465, "step": 15450 }, { - "epoch": 1.1480766374573, - "grad_norm": 0.6143955588340759, - "learning_rate": 2.3111540175256202e-05, - "loss": 0.0527, + "epoch": 4.5916245916245915, + "grad_norm": 0.5786198377609253, + "learning_rate": 2.4502524502524507e-06, + "loss": 0.0546, "step": 15460 }, { - "epoch": 1.1488192484776474, - "grad_norm": 2.081989288330078, - "learning_rate": 2.3107084509134117e-05, - "loss": 0.1173, + "epoch": 4.594594594594595, + "grad_norm": 0.537132978439331, + "learning_rate": 2.4324324324324325e-06, + "loss": 0.0602, "step": 15470 }, { - "epoch": 1.1495618594979948, - "grad_norm": 1.4942225217819214, - "learning_rate": 2.310262884301203e-05, - "loss": 0.0951, + "epoch": 4.597564597564597, + "grad_norm": 0.9715235829353333, + "learning_rate": 2.4146124146124146e-06, + "loss": 0.058, "step": 15480 }, { - "epoch": 1.1503044705183425, - "grad_norm": 3.5311174392700195, - "learning_rate": 2.3098173176889947e-05, - "loss": 0.1277, + "epoch": 4.6005346005346, + "grad_norm": 1.2125133275985718, + "learning_rate": 2.396792396792397e-06, + "loss": 0.0663, "step": 15490 }, { - "epoch": 1.15104708153869, - "grad_norm": 1.772064447402954, - "learning_rate": 2.309371751076786e-05, - "loss": 0.0568, + "epoch": 4.603504603504604, + "grad_norm": 0.955084502696991, + "learning_rate": 2.378972378972379e-06, + "loss": 0.058, "step": 15500 }, { - "epoch": 1.1517896925590376, - "grad_norm": 1.9447821378707886, - "learning_rate": 2.3089261844645774e-05, - "loss": 0.0542, + "epoch": 4.606474606474606, + "grad_norm": 1.2165902853012085, + "learning_rate": 2.361152361152361e-06, + "loss": 0.0537, "step": 15510 }, { - "epoch": 1.152532303579385, - "grad_norm": 2.5192413330078125, - "learning_rate": 2.3084806178523692e-05, - "loss": 0.0636, + "epoch": 4.609444609444609, + "grad_norm": 0.5308700203895569, + "learning_rate": 2.3433323433323433e-06, + "loss": 0.0489, "step": 15520 }, { - "epoch": 1.1532749145997327, - "grad_norm": 0.6667538285255432, - "learning_rate": 2.3080350512401604e-05, - "loss": 0.0897, + "epoch": 4.612414612414613, + "grad_norm": 1.492642879486084, + "learning_rate": 2.325512325512326e-06, + "loss": 0.0535, "step": 15530 }, { - "epoch": 1.1540175256200802, - "grad_norm": 1.988601803779602, - "learning_rate": 2.307589484627952e-05, - "loss": 0.1259, + "epoch": 4.615384615384615, + "grad_norm": 0.8670569062232971, + "learning_rate": 2.307692307692308e-06, + "loss": 0.0635, "step": 15540 }, { - "epoch": 1.1547601366404276, - "grad_norm": 1.6338027715682983, - "learning_rate": 2.3071439180157434e-05, - "loss": 0.0578, + "epoch": 4.618354618354618, + "grad_norm": 0.5621850490570068, + "learning_rate": 2.28987228987229e-06, + "loss": 0.0563, "step": 15550 }, { - "epoch": 1.1555027476607753, - "grad_norm": 3.52496600151062, - "learning_rate": 2.306698351403535e-05, - "loss": 0.074, + "epoch": 4.6213246213246215, + "grad_norm": 0.7570291757583618, + "learning_rate": 2.272052272052272e-06, + "loss": 0.0554, "step": 15560 }, { - "epoch": 1.1562453586811228, - "grad_norm": 1.9696495532989502, - "learning_rate": 2.3062527847913264e-05, - "loss": 0.1064, + "epoch": 4.624294624294624, + "grad_norm": 0.7704951763153076, + "learning_rate": 2.254232254232254e-06, + "loss": 0.0504, "step": 15570 }, { - "epoch": 1.1569879697014704, - "grad_norm": 1.104413390159607, - "learning_rate": 2.305807218179118e-05, - "loss": 0.0936, + "epoch": 4.627264627264627, + "grad_norm": 1.1347087621688843, + "learning_rate": 2.2364122364122363e-06, + "loss": 0.0586, "step": 15580 }, { - "epoch": 1.157730580721818, - "grad_norm": 2.5523598194122314, - "learning_rate": 2.3053616515669094e-05, - "loss": 0.0714, + "epoch": 4.63023463023463, + "grad_norm": 0.9112380743026733, + "learning_rate": 2.2185922185922185e-06, + "loss": 0.0489, "step": 15590 }, { - "epoch": 1.1584731917421656, - "grad_norm": 1.7481622695922852, - "learning_rate": 2.304916084954701e-05, - "loss": 0.1121, + "epoch": 4.633204633204633, + "grad_norm": 0.9339880347251892, + "learning_rate": 2.200772200772201e-06, + "loss": 0.0487, "step": 15600 }, { - "epoch": 1.159215802762513, - "grad_norm": 3.4101874828338623, - "learning_rate": 2.304470518342492e-05, - "loss": 0.0916, + "epoch": 4.636174636174636, + "grad_norm": 0.5124456286430359, + "learning_rate": 2.1829521829521833e-06, + "loss": 0.0682, "step": 15610 }, { - "epoch": 1.1599584137828605, - "grad_norm": 1.432702660560608, - "learning_rate": 2.3040249517302835e-05, - "loss": 0.1003, + "epoch": 4.639144639144639, + "grad_norm": 0.742302417755127, + "learning_rate": 2.1651321651321654e-06, + "loss": 0.0579, "step": 15620 }, { - "epoch": 1.1607010248032081, - "grad_norm": 0.6182481646537781, - "learning_rate": 2.3035793851180754e-05, - "loss": 0.0504, + "epoch": 4.642114642114642, + "grad_norm": 0.7987191677093506, + "learning_rate": 2.147312147312147e-06, + "loss": 0.0501, "step": 15630 }, { - "epoch": 1.1614436358235556, - "grad_norm": 2.7727530002593994, - "learning_rate": 2.3031338185058665e-05, - "loss": 0.0671, + "epoch": 4.645084645084645, + "grad_norm": 1.0972774028778076, + "learning_rate": 2.1294921294921294e-06, + "loss": 0.0657, "step": 15640 }, { - "epoch": 1.1621862468439033, - "grad_norm": 0.7968599200248718, - "learning_rate": 2.302688251893658e-05, - "loss": 0.0742, + "epoch": 4.648054648054648, + "grad_norm": 0.855499804019928, + "learning_rate": 2.1116721116721115e-06, + "loss": 0.0623, "step": 15650 }, { - "epoch": 1.1629288578642507, - "grad_norm": 1.9094624519348145, - "learning_rate": 2.30224268528145e-05, - "loss": 0.0778, + "epoch": 4.651024651024651, + "grad_norm": 0.6510183215141296, + "learning_rate": 2.0938520938520937e-06, + "loss": 0.0556, "step": 15660 }, { - "epoch": 1.1636714688845982, - "grad_norm": 2.032755136489868, - "learning_rate": 2.301797118669241e-05, - "loss": 0.0674, + "epoch": 4.653994653994654, + "grad_norm": 0.7300544381141663, + "learning_rate": 2.0760320760320763e-06, + "loss": 0.047, "step": 15670 }, { - "epoch": 1.1644140799049458, - "grad_norm": 2.188685417175293, - "learning_rate": 2.3013515520570325e-05, - "loss": 0.1031, + "epoch": 4.656964656964657, + "grad_norm": 1.2278099060058594, + "learning_rate": 2.0582120582120585e-06, + "loss": 0.0551, "step": 15680 }, { - "epoch": 1.1651566909252933, - "grad_norm": 0.8258926868438721, - "learning_rate": 2.3009059854448244e-05, - "loss": 0.0641, + "epoch": 4.65993465993466, + "grad_norm": 0.751706063747406, + "learning_rate": 2.0403920403920406e-06, + "loss": 0.0583, "step": 15690 }, { - "epoch": 1.165899301945641, - "grad_norm": 0.9573965668678284, - "learning_rate": 2.3004604188326155e-05, - "loss": 0.0601, + "epoch": 4.662904662904663, + "grad_norm": 0.32578542828559875, + "learning_rate": 2.022572022572023e-06, + "loss": 0.0566, "step": 15700 }, { - "epoch": 1.1666419129659884, - "grad_norm": 1.2031244039535522, - "learning_rate": 2.300014852220407e-05, - "loss": 0.0518, + "epoch": 4.665874665874666, + "grad_norm": 0.8979377746582031, + "learning_rate": 2.0047520047520046e-06, + "loss": 0.0574, "step": 15710 }, { - "epoch": 1.1673845239863359, - "grad_norm": 3.0771892070770264, - "learning_rate": 2.2995692856081982e-05, - "loss": 0.1124, + "epoch": 4.6688446688446685, + "grad_norm": 0.816284716129303, + "learning_rate": 1.9869319869319867e-06, + "loss": 0.0534, "step": 15720 }, { - "epoch": 1.1681271350066835, - "grad_norm": 1.2991482019424438, - "learning_rate": 2.29912371899599e-05, - "loss": 0.0508, + "epoch": 4.671814671814672, + "grad_norm": 0.9114980101585388, + "learning_rate": 1.969111969111969e-06, + "loss": 0.0599, "step": 15730 }, { - "epoch": 1.168869746027031, - "grad_norm": 0.9079421162605286, - "learning_rate": 2.2986781523837815e-05, - "loss": 0.0638, + "epoch": 4.674784674784675, + "grad_norm": 0.8003864884376526, + "learning_rate": 1.9512919512919515e-06, + "loss": 0.0571, "step": 15740 }, { - "epoch": 1.1696123570473786, - "grad_norm": 1.197899341583252, - "learning_rate": 2.2982325857715727e-05, - "loss": 0.0828, + "epoch": 4.6777546777546775, + "grad_norm": 0.7641117572784424, + "learning_rate": 1.9334719334719337e-06, + "loss": 0.0523, "step": 15750 }, { - "epoch": 1.170354968067726, - "grad_norm": 1.4822005033493042, - "learning_rate": 2.2977870191593645e-05, - "loss": 0.0511, + "epoch": 4.680724680724681, + "grad_norm": 0.795187771320343, + "learning_rate": 1.915651915651916e-06, + "loss": 0.0652, "step": 15760 }, { - "epoch": 1.1710975790880735, - "grad_norm": 2.059238910675049, - "learning_rate": 2.297341452547156e-05, - "loss": 0.0762, + "epoch": 4.683694683694684, + "grad_norm": 0.7843266129493713, + "learning_rate": 1.8978318978318978e-06, + "loss": 0.0541, "step": 15770 }, { - "epoch": 1.1718401901084212, - "grad_norm": 2.548032283782959, - "learning_rate": 2.2968958859349472e-05, - "loss": 0.0711, + "epoch": 4.686664686664686, + "grad_norm": 1.0619968175888062, + "learning_rate": 1.88001188001188e-06, + "loss": 0.061, "step": 15780 }, { - "epoch": 1.1725828011287687, - "grad_norm": 2.6228041648864746, - "learning_rate": 2.2964503193227387e-05, - "loss": 0.0709, + "epoch": 4.68963468963469, + "grad_norm": 0.6766983270645142, + "learning_rate": 1.8621918621918623e-06, + "loss": 0.0479, "step": 15790 }, { - "epoch": 1.1733254121491163, - "grad_norm": 1.9563509225845337, - "learning_rate": 2.2960047527105302e-05, - "loss": 0.0589, + "epoch": 4.692604692604693, + "grad_norm": 1.0215040445327759, + "learning_rate": 1.8443718443718445e-06, + "loss": 0.0479, "step": 15800 }, { - "epoch": 1.1740680231694638, - "grad_norm": 1.4019722938537598, - "learning_rate": 2.2955591860983217e-05, - "loss": 0.0733, + "epoch": 4.695574695574695, + "grad_norm": 1.0211207866668701, + "learning_rate": 1.8265518265518265e-06, + "loss": 0.0595, "step": 15810 }, { - "epoch": 1.1748106341898115, - "grad_norm": 1.6653647422790527, - "learning_rate": 2.2951136194861132e-05, - "loss": 0.0505, + "epoch": 4.698544698544699, + "grad_norm": 0.677951455116272, + "learning_rate": 1.8087318087318088e-06, + "loss": 0.0389, "step": 15820 }, { - "epoch": 1.175553245210159, - "grad_norm": 2.455420732498169, - "learning_rate": 2.2946680528739047e-05, - "loss": 0.1232, + "epoch": 4.701514701514702, + "grad_norm": 0.9057896137237549, + "learning_rate": 1.790911790911791e-06, + "loss": 0.0694, "step": 15830 }, { - "epoch": 1.1762958562305064, - "grad_norm": 1.1181570291519165, - "learning_rate": 2.2942224862616962e-05, - "loss": 0.0626, + "epoch": 4.704484704484704, + "grad_norm": 0.49817752838134766, + "learning_rate": 1.7730917730917732e-06, + "loss": 0.0572, "step": 15840 }, { - "epoch": 1.177038467250854, - "grad_norm": 1.8775357007980347, - "learning_rate": 2.2937769196494877e-05, - "loss": 0.0971, + "epoch": 4.7074547074547075, + "grad_norm": 0.554590106010437, + "learning_rate": 1.7552717552717551e-06, + "loss": 0.0381, "step": 15850 }, { - "epoch": 1.1777810782712015, - "grad_norm": 1.8063764572143555, - "learning_rate": 2.2933313530372792e-05, - "loss": 0.12, + "epoch": 4.710424710424711, + "grad_norm": 0.6778869032859802, + "learning_rate": 1.7374517374517375e-06, + "loss": 0.0488, "step": 15860 }, { - "epoch": 1.1785236892915492, - "grad_norm": 0.9645183682441711, - "learning_rate": 2.2928857864250707e-05, - "loss": 0.0783, + "epoch": 4.713394713394713, + "grad_norm": 0.38889896869659424, + "learning_rate": 1.7196317196317197e-06, + "loss": 0.0454, "step": 15870 }, { - "epoch": 1.1792663003118966, - "grad_norm": 2.029613971710205, - "learning_rate": 2.2924402198128622e-05, - "loss": 0.1101, + "epoch": 4.716364716364716, + "grad_norm": 0.6715492010116577, + "learning_rate": 1.7018117018117019e-06, + "loss": 0.0532, "step": 15880 }, { - "epoch": 1.1800089113322443, - "grad_norm": 0.7178744673728943, - "learning_rate": 2.2919946532006534e-05, - "loss": 0.0689, + "epoch": 4.71933471933472, + "grad_norm": 0.9232339262962341, + "learning_rate": 1.683991683991684e-06, + "loss": 0.0407, "step": 15890 }, { - "epoch": 1.1807515223525917, - "grad_norm": 1.271041750907898, - "learning_rate": 2.2915490865884452e-05, - "loss": 0.0662, + "epoch": 4.722304722304722, + "grad_norm": 0.9645439386367798, + "learning_rate": 1.6661716661716662e-06, + "loss": 0.0553, "step": 15900 }, { - "epoch": 1.1814941333729392, - "grad_norm": 2.572619915008545, - "learning_rate": 2.2911035199762364e-05, - "loss": 0.0677, + "epoch": 4.725274725274725, + "grad_norm": 0.6320471167564392, + "learning_rate": 1.6483516483516484e-06, + "loss": 0.0413, "step": 15910 }, { - "epoch": 1.1822367443932869, - "grad_norm": 0.5422751307487488, - "learning_rate": 2.290657953364028e-05, - "loss": 0.0662, + "epoch": 4.728244728244729, + "grad_norm": 0.7938999533653259, + "learning_rate": 1.6305316305316306e-06, + "loss": 0.0497, "step": 15920 }, { - "epoch": 1.1829793554136343, - "grad_norm": 2.011805295944214, - "learning_rate": 2.2902123867518197e-05, - "loss": 0.078, + "epoch": 4.731214731214731, + "grad_norm": 0.5799199938774109, + "learning_rate": 1.6127116127116127e-06, + "loss": 0.0608, "step": 15930 }, { - "epoch": 1.183721966433982, - "grad_norm": 1.7894953489303589, - "learning_rate": 2.289766820139611e-05, - "loss": 0.0817, + "epoch": 4.734184734184734, + "grad_norm": 0.6402101516723633, + "learning_rate": 1.594891594891595e-06, + "loss": 0.0524, "step": 15940 }, { - "epoch": 1.1844645774543294, - "grad_norm": 0.9145591855049133, - "learning_rate": 2.2893212535274024e-05, - "loss": 0.0867, + "epoch": 4.737154737154738, + "grad_norm": 0.6040619015693665, + "learning_rate": 1.577071577071577e-06, + "loss": 0.0486, "step": 15950 }, { - "epoch": 1.1852071884746769, - "grad_norm": 2.027958631515503, - "learning_rate": 2.288875686915194e-05, - "loss": 0.0579, + "epoch": 4.74012474012474, + "grad_norm": 0.4295766055583954, + "learning_rate": 1.5592515592515594e-06, + "loss": 0.0481, "step": 15960 }, { - "epoch": 1.1859497994950245, - "grad_norm": 0.9415296316146851, - "learning_rate": 2.2884301203029853e-05, - "loss": 0.0854, + "epoch": 4.743094743094743, + "grad_norm": 0.9524640440940857, + "learning_rate": 1.5414315414315414e-06, + "loss": 0.0501, "step": 15970 }, { - "epoch": 1.186692410515372, - "grad_norm": 2.021057367324829, - "learning_rate": 2.287984553690777e-05, - "loss": 0.073, + "epoch": 4.7460647460647465, + "grad_norm": 0.6644571423530579, + "learning_rate": 1.5236115236115236e-06, + "loss": 0.069, "step": 15980 }, { - "epoch": 1.1874350215357197, - "grad_norm": 1.6900122165679932, - "learning_rate": 2.2875389870785683e-05, - "loss": 0.0772, + "epoch": 4.749034749034749, + "grad_norm": 0.9465572237968445, + "learning_rate": 1.5057915057915057e-06, + "loss": 0.0499, "step": 15990 }, { - "epoch": 1.1881776325560671, - "grad_norm": 0.6172839999198914, - "learning_rate": 2.28709342046636e-05, - "loss": 0.0522, + "epoch": 4.752004752004752, + "grad_norm": 0.6358723640441895, + "learning_rate": 1.4879714879714881e-06, + "loss": 0.0516, "step": 16000 }, { - "epoch": 1.1889202435764146, - "grad_norm": 2.918687105178833, - "learning_rate": 2.2866478538541513e-05, - "loss": 0.0566, + "epoch": 4.754974754974755, + "grad_norm": 0.6010512709617615, + "learning_rate": 1.47015147015147e-06, + "loss": 0.0378, "step": 16010 }, { - "epoch": 1.1896628545967622, - "grad_norm": 1.5707764625549316, - "learning_rate": 2.2862022872419425e-05, - "loss": 0.0721, + "epoch": 4.757944757944758, + "grad_norm": 0.5566834211349487, + "learning_rate": 1.4523314523314523e-06, + "loss": 0.046, "step": 16020 }, { - "epoch": 1.1904054656171097, - "grad_norm": 1.0280346870422363, - "learning_rate": 2.285756720629734e-05, - "loss": 0.0646, + "epoch": 4.760914760914761, + "grad_norm": 0.8823090195655823, + "learning_rate": 1.4345114345114346e-06, + "loss": 0.0516, "step": 16030 }, { - "epoch": 1.1911480766374574, - "grad_norm": 1.4963786602020264, - "learning_rate": 2.285311154017526e-05, - "loss": 0.0555, + "epoch": 4.763884763884764, + "grad_norm": 0.9315813183784485, + "learning_rate": 1.4166914166914168e-06, + "loss": 0.0585, "step": 16040 }, { - "epoch": 1.1918906876578048, - "grad_norm": 0.9982985258102417, - "learning_rate": 2.284865587405317e-05, - "loss": 0.0647, + "epoch": 4.766854766854767, + "grad_norm": 0.848888635635376, + "learning_rate": 1.3988713988713988e-06, + "loss": 0.0544, "step": 16050 }, { - "epoch": 1.1926332986781523, - "grad_norm": 1.1375582218170166, - "learning_rate": 2.2844200207931085e-05, - "loss": 0.0728, + "epoch": 4.76982476982477, + "grad_norm": 0.7777525782585144, + "learning_rate": 1.381051381051381e-06, + "loss": 0.0606, "step": 16060 }, { - "epoch": 1.1933759096985, - "grad_norm": 4.054408073425293, - "learning_rate": 2.2839744541809003e-05, - "loss": 0.0874, + "epoch": 4.772794772794773, + "grad_norm": 0.7137773036956787, + "learning_rate": 1.3632313632313633e-06, + "loss": 0.0561, "step": 16070 }, { - "epoch": 1.1941185207188474, - "grad_norm": 3.6856000423431396, - "learning_rate": 2.2835288875686915e-05, - "loss": 0.1011, + "epoch": 4.775764775764776, + "grad_norm": 0.7126787900924683, + "learning_rate": 1.3454113454113455e-06, + "loss": 0.0597, "step": 16080 }, { - "epoch": 1.194861131739195, - "grad_norm": 3.7098820209503174, - "learning_rate": 2.283083320956483e-05, - "loss": 0.0638, + "epoch": 4.778734778734779, + "grad_norm": 1.0160871744155884, + "learning_rate": 1.3275913275913275e-06, + "loss": 0.0566, "step": 16090 }, { - "epoch": 1.1956037427595425, - "grad_norm": 0.7960993647575378, - "learning_rate": 2.282637754344275e-05, - "loss": 0.0832, + "epoch": 4.781704781704782, + "grad_norm": 1.4480925798416138, + "learning_rate": 1.3097713097713098e-06, + "loss": 0.072, "step": 16100 }, { - "epoch": 1.1963463537798902, - "grad_norm": 1.0602184534072876, - "learning_rate": 2.282192187732066e-05, - "loss": 0.0824, + "epoch": 4.784674784674785, + "grad_norm": 0.7537574172019958, + "learning_rate": 1.291951291951292e-06, + "loss": 0.0528, "step": 16110 }, { - "epoch": 1.1970889648002376, - "grad_norm": 1.5129131078720093, - "learning_rate": 2.2817466211198575e-05, - "loss": 0.0734, + "epoch": 4.787644787644788, + "grad_norm": 1.0016658306121826, + "learning_rate": 1.2741312741312742e-06, + "loss": 0.0501, "step": 16120 }, { - "epoch": 1.197831575820585, - "grad_norm": 2.3577966690063477, - "learning_rate": 2.2813010545076487e-05, - "loss": 0.0931, + "epoch": 4.79061479061479, + "grad_norm": 0.7509961128234863, + "learning_rate": 1.2563112563112563e-06, + "loss": 0.0573, "step": 16130 }, { - "epoch": 1.1985741868409328, - "grad_norm": 2.6945302486419678, - "learning_rate": 2.2808554878954405e-05, - "loss": 0.1088, + "epoch": 4.7935847935847935, + "grad_norm": 0.7173300981521606, + "learning_rate": 1.2384912384912385e-06, + "loss": 0.0545, "step": 16140 }, { - "epoch": 1.1993167978612802, - "grad_norm": 1.0213791131973267, - "learning_rate": 2.280409921283232e-05, - "loss": 0.0683, + "epoch": 4.796554796554797, + "grad_norm": 0.6532518267631531, + "learning_rate": 1.2206712206712207e-06, + "loss": 0.0711, "step": 16150 }, { - "epoch": 1.2000594088816279, - "grad_norm": 0.7909874320030212, - "learning_rate": 2.279964354671023e-05, - "loss": 0.0453, + "epoch": 4.799524799524799, + "grad_norm": 0.7350220084190369, + "learning_rate": 1.2028512028512029e-06, + "loss": 0.0511, "step": 16160 }, { - "epoch": 1.2008020199019753, - "grad_norm": 1.2978131771087646, - "learning_rate": 2.279518788058815e-05, - "loss": 0.0787, + "epoch": 4.802494802494802, + "grad_norm": 0.8470587730407715, + "learning_rate": 1.185031185031185e-06, + "loss": 0.045, "step": 16170 }, { - "epoch": 1.201544630922323, - "grad_norm": 0.5896059274673462, - "learning_rate": 2.2790732214466065e-05, - "loss": 0.0719, + "epoch": 4.805464805464806, + "grad_norm": 0.7634150981903076, + "learning_rate": 1.1672111672111672e-06, + "loss": 0.0529, "step": 16180 }, { - "epoch": 1.2022872419426704, - "grad_norm": 2.9565205574035645, - "learning_rate": 2.2786276548343977e-05, - "loss": 0.1046, + "epoch": 4.808434808434808, + "grad_norm": 0.8620032668113708, + "learning_rate": 1.1493911493911494e-06, + "loss": 0.0527, "step": 16190 }, { - "epoch": 1.203029852963018, - "grad_norm": 4.079509258270264, - "learning_rate": 2.278182088222189e-05, - "loss": 0.0686, + "epoch": 4.811404811404811, + "grad_norm": 0.770086944103241, + "learning_rate": 1.1315711315711318e-06, + "loss": 0.0521, "step": 16200 }, { - "epoch": 1.2037724639833656, - "grad_norm": 2.044127941131592, - "learning_rate": 2.277736521609981e-05, - "loss": 0.0618, + "epoch": 4.814374814374815, + "grad_norm": 0.6775233149528503, + "learning_rate": 1.1137511137511137e-06, + "loss": 0.0465, "step": 16210 }, { - "epoch": 1.204515075003713, - "grad_norm": 2.2022972106933594, - "learning_rate": 2.277290954997772e-05, - "loss": 0.0973, + "epoch": 4.817344817344817, + "grad_norm": 0.7453165054321289, + "learning_rate": 1.0959310959310959e-06, + "loss": 0.0547, "step": 16220 }, { - "epoch": 1.2052576860240607, - "grad_norm": 1.957979440689087, - "learning_rate": 2.2768453883855637e-05, - "loss": 0.0765, + "epoch": 4.82031482031482, + "grad_norm": 1.0375672578811646, + "learning_rate": 1.078111078111078e-06, + "loss": 0.0583, "step": 16230 }, { - "epoch": 1.2060002970444081, - "grad_norm": 0.7121636271476746, - "learning_rate": 2.276399821773355e-05, - "loss": 0.0732, + "epoch": 4.8232848232848236, + "grad_norm": 0.8512599468231201, + "learning_rate": 1.0602910602910604e-06, + "loss": 0.0587, "step": 16240 }, { - "epoch": 1.2067429080647556, - "grad_norm": 2.143155574798584, - "learning_rate": 2.2759542551611467e-05, - "loss": 0.0564, + "epoch": 4.826254826254826, + "grad_norm": 0.7857792377471924, + "learning_rate": 1.0424710424710424e-06, + "loss": 0.0596, "step": 16250 }, { - "epoch": 1.2074855190851033, - "grad_norm": 2.7049849033355713, - "learning_rate": 2.275508688548938e-05, - "loss": 0.0806, + "epoch": 4.829224829224829, + "grad_norm": 0.6470934152603149, + "learning_rate": 1.0246510246510246e-06, + "loss": 0.0538, "step": 16260 }, { - "epoch": 1.2082281301054507, - "grad_norm": 2.114739418029785, - "learning_rate": 2.2750631219367297e-05, - "loss": 0.0569, + "epoch": 4.8321948321948325, + "grad_norm": 0.8165239095687866, + "learning_rate": 1.006831006831007e-06, + "loss": 0.0567, "step": 16270 }, { - "epoch": 1.2089707411257984, - "grad_norm": 2.1519935131073, - "learning_rate": 2.274617555324521e-05, - "loss": 0.0727, + "epoch": 4.835164835164835, + "grad_norm": 0.5124844908714294, + "learning_rate": 9.890109890109891e-07, + "loss": 0.0493, "step": 16280 }, { - "epoch": 1.2097133521461458, - "grad_norm": 0.7403703927993774, - "learning_rate": 2.2741719887123127e-05, - "loss": 0.0468, + "epoch": 4.838134838134838, + "grad_norm": 1.0751707553863525, + "learning_rate": 9.711909711909713e-07, + "loss": 0.0559, "step": 16290 }, { - "epoch": 1.2104559631664933, - "grad_norm": 1.2450178861618042, - "learning_rate": 2.2737264221001038e-05, - "loss": 0.09, + "epoch": 4.841104841104841, + "grad_norm": 0.9530063271522522, + "learning_rate": 9.533709533709534e-07, + "loss": 0.0474, "step": 16300 }, { - "epoch": 1.211198574186841, - "grad_norm": 2.1269819736480713, - "learning_rate": 2.2732808554878957e-05, - "loss": 0.0532, + "epoch": 4.844074844074844, + "grad_norm": 1.0915278196334839, + "learning_rate": 9.355509355509356e-07, + "loss": 0.0565, "step": 16310 }, { - "epoch": 1.2119411852071884, - "grad_norm": 1.752208948135376, - "learning_rate": 2.2728352888756868e-05, - "loss": 0.0908, + "epoch": 4.847044847044847, + "grad_norm": 1.2590452432632446, + "learning_rate": 9.177309177309178e-07, + "loss": 0.043, "step": 16320 }, { - "epoch": 1.212683796227536, - "grad_norm": 2.0299859046936035, - "learning_rate": 2.2723897222634783e-05, - "loss": 0.1128, + "epoch": 4.85001485001485, + "grad_norm": 0.6076927781105042, + "learning_rate": 8.999108999109e-07, + "loss": 0.056, "step": 16330 }, { - "epoch": 1.2134264072478835, - "grad_norm": 0.9291142225265503, - "learning_rate": 2.27194415565127e-05, - "loss": 0.07, + "epoch": 4.852984852984853, + "grad_norm": 0.8309280276298523, + "learning_rate": 8.820908820908821e-07, + "loss": 0.0486, "step": 16340 }, { - "epoch": 1.214169018268231, - "grad_norm": 1.3614659309387207, - "learning_rate": 2.2714985890390613e-05, - "loss": 0.1037, + "epoch": 4.855954855954856, + "grad_norm": 0.8125685453414917, + "learning_rate": 8.642708642708643e-07, + "loss": 0.0499, "step": 16350 }, { - "epoch": 1.2149116292885787, - "grad_norm": 0.6833984851837158, - "learning_rate": 2.2710530224268528e-05, - "loss": 0.0574, + "epoch": 4.858924858924859, + "grad_norm": 0.6055704951286316, + "learning_rate": 8.464508464508465e-07, + "loss": 0.0454, "step": 16360 }, { - "epoch": 1.215654240308926, - "grad_norm": 1.6602541208267212, - "learning_rate": 2.2706074558146443e-05, - "loss": 0.0677, + "epoch": 4.861894861894862, + "grad_norm": 0.6113276481628418, + "learning_rate": 8.286308286308286e-07, + "loss": 0.0446, "step": 16370 }, { - "epoch": 1.2163968513292738, - "grad_norm": 3.005326509475708, - "learning_rate": 2.2701618892024358e-05, - "loss": 0.0806, + "epoch": 4.864864864864865, + "grad_norm": 0.6283566951751709, + "learning_rate": 8.108108108108109e-07, + "loss": 0.0643, "step": 16380 }, { - "epoch": 1.2171394623496212, - "grad_norm": 0.7319986820220947, - "learning_rate": 2.2697163225902273e-05, - "loss": 0.0621, + "epoch": 4.867834867834868, + "grad_norm": 0.6934186220169067, + "learning_rate": 7.92990792990793e-07, + "loss": 0.0506, "step": 16390 }, { - "epoch": 1.217882073369969, - "grad_norm": 1.5393048524856567, - "learning_rate": 2.2692707559780188e-05, - "loss": 0.0795, + "epoch": 4.870804870804871, + "grad_norm": 0.7844398021697998, + "learning_rate": 7.751707751707753e-07, + "loss": 0.0551, "step": 16400 }, { - "epoch": 1.2186246843903163, - "grad_norm": 1.8766040802001953, - "learning_rate": 2.2688251893658103e-05, - "loss": 0.0525, + "epoch": 4.873774873774874, + "grad_norm": 0.7214897274971008, + "learning_rate": 7.573507573507573e-07, + "loss": 0.0648, "step": 16410 }, { - "epoch": 1.2193672954106638, - "grad_norm": 0.824567437171936, - "learning_rate": 2.2683796227536018e-05, - "loss": 0.0746, + "epoch": 4.876744876744877, + "grad_norm": 1.6125606298446655, + "learning_rate": 7.395307395307396e-07, + "loss": 0.0462, "step": 16420 }, { - "epoch": 1.2201099064310115, - "grad_norm": 0.9083713293075562, - "learning_rate": 2.267934056141393e-05, - "loss": 0.0791, + "epoch": 4.8797148797148795, + "grad_norm": 1.0362389087677002, + "learning_rate": 7.217107217107217e-07, + "loss": 0.0579, "step": 16430 }, { - "epoch": 1.220852517451359, - "grad_norm": 1.9115943908691406, - "learning_rate": 2.2674884895291848e-05, - "loss": 0.076, + "epoch": 4.882684882684883, + "grad_norm": 1.2571192979812622, + "learning_rate": 7.03890703890704e-07, + "loss": 0.0526, "step": 16440 }, { - "epoch": 1.2215951284717066, - "grad_norm": 1.7898435592651367, - "learning_rate": 2.2670429229169763e-05, - "loss": 0.0917, + "epoch": 4.885654885654886, + "grad_norm": 0.7572282552719116, + "learning_rate": 6.860706860706861e-07, + "loss": 0.0551, "step": 16450 }, { - "epoch": 1.222337739492054, - "grad_norm": 3.3459744453430176, - "learning_rate": 2.2665973563047675e-05, - "loss": 0.0693, + "epoch": 4.888624888624888, + "grad_norm": 0.7616530060768127, + "learning_rate": 6.682506682506683e-07, + "loss": 0.0557, "step": 16460 }, { - "epoch": 1.2230803505124017, - "grad_norm": 2.533830404281616, - "learning_rate": 2.266151789692559e-05, - "loss": 0.0948, + "epoch": 4.891594891594892, + "grad_norm": 1.0284298658370972, + "learning_rate": 6.504306504306505e-07, + "loss": 0.0501, "step": 16470 }, { - "epoch": 1.2238229615327492, - "grad_norm": 0.8340369462966919, - "learning_rate": 2.2657062230803508e-05, - "loss": 0.0518, + "epoch": 4.894564894564894, + "grad_norm": 0.9173740148544312, + "learning_rate": 6.326106326106326e-07, + "loss": 0.0554, "step": 16480 }, { - "epoch": 1.2245655725530966, - "grad_norm": 0.5460755825042725, - "learning_rate": 2.265260656468142e-05, - "loss": 0.0776, + "epoch": 4.897534897534897, + "grad_norm": 0.5375432968139648, + "learning_rate": 6.147906147906148e-07, + "loss": 0.0534, "step": 16490 }, { - "epoch": 1.2253081835734443, - "grad_norm": 0.6815189719200134, - "learning_rate": 2.2648150898559335e-05, - "loss": 0.0595, + "epoch": 4.900504900504901, + "grad_norm": 0.7436822056770325, + "learning_rate": 5.969705969705971e-07, + "loss": 0.0528, "step": 16500 }, { - "epoch": 1.2260507945937917, - "grad_norm": 1.1513220071792603, - "learning_rate": 2.2643695232437253e-05, - "loss": 0.146, + "epoch": 4.903474903474903, + "grad_norm": 0.3775230348110199, + "learning_rate": 5.791505791505791e-07, + "loss": 0.0584, "step": 16510 }, { - "epoch": 1.2267934056141394, - "grad_norm": 1.9079151153564453, - "learning_rate": 2.2639239566315165e-05, - "loss": 0.0827, + "epoch": 4.906444906444906, + "grad_norm": 0.792444109916687, + "learning_rate": 5.613305613305614e-07, + "loss": 0.0578, "step": 16520 }, { - "epoch": 1.2275360166344869, - "grad_norm": 1.8923051357269287, - "learning_rate": 2.263478390019308e-05, - "loss": 0.1135, + "epoch": 4.9094149094149095, + "grad_norm": 0.527422308921814, + "learning_rate": 5.435105435105435e-07, + "loss": 0.0581, "step": 16530 }, { - "epoch": 1.2282786276548343, - "grad_norm": 1.8979130983352661, - "learning_rate": 2.263032823407099e-05, - "loss": 0.0895, + "epoch": 4.912384912384912, + "grad_norm": 0.6384781002998352, + "learning_rate": 5.256905256905258e-07, + "loss": 0.0492, "step": 16540 }, { - "epoch": 1.229021238675182, - "grad_norm": 1.6301295757293701, - "learning_rate": 2.262587256794891e-05, - "loss": 0.0891, + "epoch": 4.915354915354915, + "grad_norm": 0.8578511476516724, + "learning_rate": 5.078705078705078e-07, + "loss": 0.069, "step": 16550 }, { - "epoch": 1.2297638496955294, - "grad_norm": 1.7979081869125366, - "learning_rate": 2.2621416901826825e-05, - "loss": 0.0765, + "epoch": 4.9183249183249185, + "grad_norm": 0.7936479449272156, + "learning_rate": 4.900504900504901e-07, + "loss": 0.0533, "step": 16560 }, { - "epoch": 1.230506460715877, - "grad_norm": 0.5826703310012817, - "learning_rate": 2.2616961235704736e-05, - "loss": 0.0668, + "epoch": 4.921294921294921, + "grad_norm": 0.6151940226554871, + "learning_rate": 4.7223047223047227e-07, + "loss": 0.0473, "step": 16570 }, { - "epoch": 1.2312490717362246, - "grad_norm": 1.6885042190551758, - "learning_rate": 2.2612505569582655e-05, - "loss": 0.0646, + "epoch": 4.924264924264924, + "grad_norm": 0.8337469696998596, + "learning_rate": 4.544104544104544e-07, + "loss": 0.0434, "step": 16580 }, { - "epoch": 1.231991682756572, - "grad_norm": 0.9739753603935242, - "learning_rate": 2.260804990346057e-05, - "loss": 0.0566, + "epoch": 4.927234927234927, + "grad_norm": 0.7319076657295227, + "learning_rate": 4.365904365904366e-07, + "loss": 0.057, "step": 16590 }, { - "epoch": 1.2327342937769197, - "grad_norm": 0.9012984037399292, - "learning_rate": 2.260359423733848e-05, - "loss": 0.0785, + "epoch": 4.93020493020493, + "grad_norm": 0.8618406653404236, + "learning_rate": 4.187704187704188e-07, + "loss": 0.0613, "step": 16600 }, { - "epoch": 1.2334769047972671, - "grad_norm": 0.8952996134757996, - "learning_rate": 2.2599138571216396e-05, - "loss": 0.0655, + "epoch": 4.933174933174933, + "grad_norm": 1.1733934879302979, + "learning_rate": 4.0095040095040095e-07, + "loss": 0.0491, "step": 16610 }, { - "epoch": 1.2342195158176148, - "grad_norm": 2.116847515106201, - "learning_rate": 2.2594682905094315e-05, - "loss": 0.077, + "epoch": 4.936144936144936, + "grad_norm": 0.41288742423057556, + "learning_rate": 3.831303831303831e-07, + "loss": 0.0443, "step": 16620 }, { - "epoch": 1.2349621268379622, - "grad_norm": 0.5466039180755615, - "learning_rate": 2.2590227238972226e-05, - "loss": 0.0754, + "epoch": 4.939114939114939, + "grad_norm": 0.854500412940979, + "learning_rate": 3.653103653103653e-07, + "loss": 0.0625, "step": 16630 }, { - "epoch": 1.2357047378583097, - "grad_norm": 0.7208026647567749, - "learning_rate": 2.258577157285014e-05, - "loss": 0.0987, + "epoch": 4.942084942084942, + "grad_norm": 0.610560953617096, + "learning_rate": 3.4749034749034746e-07, + "loss": 0.0492, "step": 16640 }, { - "epoch": 1.2364473488786574, - "grad_norm": 1.444373607635498, - "learning_rate": 2.2581315906728056e-05, - "loss": 0.0711, + "epoch": 4.945054945054945, + "grad_norm": 0.6326998472213745, + "learning_rate": 3.296703296703297e-07, + "loss": 0.0608, "step": 16650 }, { - "epoch": 1.2371899598990048, - "grad_norm": 1.2086124420166016, - "learning_rate": 2.257686024060597e-05, - "loss": 0.0818, + "epoch": 4.948024948024948, + "grad_norm": 0.4535106122493744, + "learning_rate": 3.1185031185031186e-07, + "loss": 0.0415, "step": 16660 }, { - "epoch": 1.2379325709193525, - "grad_norm": 1.3369284868240356, - "learning_rate": 2.2572404574483886e-05, - "loss": 0.0722, + "epoch": 4.950994950994951, + "grad_norm": 0.8340437412261963, + "learning_rate": 2.9403029403029403e-07, + "loss": 0.0505, "step": 16670 }, { - "epoch": 1.2386751819397, - "grad_norm": 0.9845725893974304, - "learning_rate": 2.25679489083618e-05, - "loss": 0.072, + "epoch": 4.953964953964954, + "grad_norm": 0.5445932149887085, + "learning_rate": 2.762102762102762e-07, + "loss": 0.0482, "step": 16680 }, { - "epoch": 1.2394177929600476, - "grad_norm": 0.6263337135314941, - "learning_rate": 2.2563493242239716e-05, - "loss": 0.0455, + "epoch": 4.956934956934957, + "grad_norm": 0.6914054155349731, + "learning_rate": 2.5839025839025837e-07, + "loss": 0.0588, "step": 16690 }, { - "epoch": 1.240160403980395, - "grad_norm": 1.889050841331482, - "learning_rate": 2.255903757611763e-05, - "loss": 0.0967, + "epoch": 4.95990495990496, + "grad_norm": 0.9241282343864441, + "learning_rate": 2.4057024057024054e-07, + "loss": 0.0549, "step": 16700 }, { - "epoch": 1.2409030150007425, - "grad_norm": 0.5217537879943848, - "learning_rate": 2.2554581909995543e-05, - "loss": 0.0857, + "epoch": 4.962874962874963, + "grad_norm": 0.5624737739562988, + "learning_rate": 2.2275022275022276e-07, + "loss": 0.0588, "step": 16710 }, { - "epoch": 1.2416456260210902, - "grad_norm": 1.730975866317749, - "learning_rate": 2.255012624387346e-05, - "loss": 0.0831, + "epoch": 4.9658449658449655, + "grad_norm": 1.0242942571640015, + "learning_rate": 2.0493020493020493e-07, + "loss": 0.0457, "step": 16720 }, { - "epoch": 1.2423882370414376, - "grad_norm": 0.8797131776809692, - "learning_rate": 2.2545670577751373e-05, - "loss": 0.071, + "epoch": 4.968814968814969, + "grad_norm": 1.025058388710022, + "learning_rate": 1.8711018711018713e-07, + "loss": 0.0524, "step": 16730 }, { - "epoch": 1.2431308480617853, - "grad_norm": 2.5811779499053955, - "learning_rate": 2.2541214911629288e-05, - "loss": 0.0956, + "epoch": 4.971784971784972, + "grad_norm": 1.143563985824585, + "learning_rate": 1.692901692901693e-07, + "loss": 0.0391, "step": 16740 }, { - "epoch": 1.2438734590821328, - "grad_norm": 1.4201879501342773, - "learning_rate": 2.2536759245507206e-05, - "loss": 0.0722, + "epoch": 4.974754974754974, + "grad_norm": 0.7179445028305054, + "learning_rate": 1.5147015147015147e-07, + "loss": 0.054, "step": 16750 }, { - "epoch": 1.2446160701024804, - "grad_norm": 2.9858505725860596, - "learning_rate": 2.2532303579385118e-05, - "loss": 0.0867, + "epoch": 4.977724977724978, + "grad_norm": 1.321466088294983, + "learning_rate": 1.3365013365013367e-07, + "loss": 0.0569, "step": 16760 }, { - "epoch": 1.2453586811228279, - "grad_norm": 2.050238609313965, - "learning_rate": 2.2527847913263033e-05, - "loss": 0.0588, + "epoch": 4.980694980694981, + "grad_norm": 0.7663488388061523, + "learning_rate": 1.1583011583011584e-07, + "loss": 0.0542, "step": 16770 }, { - "epoch": 1.2461012921431753, - "grad_norm": 3.033705472946167, - "learning_rate": 2.2523392247140948e-05, - "loss": 0.0687, + "epoch": 4.983664983664983, + "grad_norm": 0.7234964966773987, + "learning_rate": 9.801009801009801e-08, + "loss": 0.0414, "step": 16780 }, { - "epoch": 1.246843903163523, - "grad_norm": 1.8493062257766724, - "learning_rate": 2.2518936581018863e-05, - "loss": 0.0653, + "epoch": 4.986634986634987, + "grad_norm": 0.8341914415359497, + "learning_rate": 8.019008019008019e-08, + "loss": 0.0647, "step": 16790 }, { - "epoch": 1.2475865141838705, - "grad_norm": 2.4249043464660645, - "learning_rate": 2.2514480914896778e-05, - "loss": 0.1034, + "epoch": 4.98960498960499, + "grad_norm": 0.9533319473266602, + "learning_rate": 6.237006237006238e-08, + "loss": 0.0513, "step": 16800 }, { - "epoch": 1.2483291252042181, - "grad_norm": 0.8284013271331787, - "learning_rate": 2.2510025248774693e-05, - "loss": 0.0978, + "epoch": 4.992574992574992, + "grad_norm": 1.162724494934082, + "learning_rate": 4.4550044550044554e-08, + "loss": 0.0543, "step": 16810 }, { - "epoch": 1.2490717362245656, - "grad_norm": 1.3202928304672241, - "learning_rate": 2.2505569582652608e-05, - "loss": 0.0709, + "epoch": 4.9955449955449955, + "grad_norm": 1.0774705410003662, + "learning_rate": 2.673002673002673e-08, + "loss": 0.0559, "step": 16820 }, { - "epoch": 1.249814347244913, - "grad_norm": 1.9100357294082642, - "learning_rate": 2.2501113916530523e-05, - "loss": 0.085, + "epoch": 4.998514998514999, + "grad_norm": 0.7029628157615662, + "learning_rate": 8.91000891000891e-09, + "loss": 0.0568, "step": 16830 }, - { - "epoch": 1.2505569582652607, - "grad_norm": 1.8051328659057617, - "learning_rate": 2.2496658250408434e-05, - "loss": 0.0653, - "step": 16840 - }, - { - "epoch": 1.2512995692856081, - "grad_norm": 0.7372807860374451, - "learning_rate": 2.2492202584286353e-05, - "loss": 0.0798, - "step": 16850 - }, - { - "epoch": 1.2520421803059558, - "grad_norm": 1.493801474571228, - "learning_rate": 2.2487746918164268e-05, - "loss": 0.0862, - "step": 16860 - }, - { - "epoch": 1.2527847913263033, - "grad_norm": 0.8077939748764038, - "learning_rate": 2.248329125204218e-05, - "loss": 0.0651, - "step": 16870 - }, - { - "epoch": 1.2535274023466507, - "grad_norm": 1.4354842901229858, - "learning_rate": 2.2478835585920094e-05, - "loss": 0.0754, - "step": 16880 - }, - { - "epoch": 1.2542700133669984, - "grad_norm": 1.9970204830169678, - "learning_rate": 2.2474379919798013e-05, - "loss": 0.0746, - "step": 16890 - }, - { - "epoch": 1.2550126243873458, - "grad_norm": 0.7201411724090576, - "learning_rate": 2.2469924253675924e-05, - "loss": 0.0706, - "step": 16900 - }, - { - "epoch": 1.2557552354076935, - "grad_norm": 2.7510123252868652, - "learning_rate": 2.246546858755384e-05, - "loss": 0.0799, - "step": 16910 - }, - { - "epoch": 1.256497846428041, - "grad_norm": 1.3006263971328735, - "learning_rate": 2.2461012921431758e-05, - "loss": 0.0689, - "step": 16920 - }, - { - "epoch": 1.2572404574483884, - "grad_norm": 2.1631722450256348, - "learning_rate": 2.245655725530967e-05, - "loss": 0.0484, - "step": 16930 - }, - { - "epoch": 1.257983068468736, - "grad_norm": 0.6136536598205566, - "learning_rate": 2.2452101589187584e-05, - "loss": 0.1071, - "step": 16940 - }, - { - "epoch": 1.2587256794890835, - "grad_norm": 2.494858503341675, - "learning_rate": 2.2447645923065496e-05, - "loss": 0.0693, - "step": 16950 - }, - { - "epoch": 1.2594682905094312, - "grad_norm": 3.487287998199463, - "learning_rate": 2.2443190256943414e-05, - "loss": 0.0533, - "step": 16960 - }, - { - "epoch": 1.2602109015297787, - "grad_norm": 1.1997121572494507, - "learning_rate": 2.243873459082133e-05, - "loss": 0.0713, - "step": 16970 - }, - { - "epoch": 1.260953512550126, - "grad_norm": 1.8079684972763062, - "learning_rate": 2.243427892469924e-05, - "loss": 0.0849, - "step": 16980 - }, - { - "epoch": 1.2616961235704738, - "grad_norm": 1.9124133586883545, - "learning_rate": 2.242982325857716e-05, - "loss": 0.0789, - "step": 16990 - }, - { - "epoch": 1.2624387345908215, - "grad_norm": 1.6641535758972168, - "learning_rate": 2.2425367592455074e-05, - "loss": 0.0694, - "step": 17000 - }, - { - "epoch": 1.263181345611169, - "grad_norm": 1.7782231569290161, - "learning_rate": 2.2420911926332986e-05, - "loss": 0.0773, - "step": 17010 - }, - { - "epoch": 1.2639239566315164, - "grad_norm": 1.2206722497940063, - "learning_rate": 2.24164562602109e-05, - "loss": 0.0739, - "step": 17020 - }, - { - "epoch": 1.264666567651864, - "grad_norm": 0.541761577129364, - "learning_rate": 2.241200059408882e-05, - "loss": 0.0639, - "step": 17030 - }, - { - "epoch": 1.2654091786722115, - "grad_norm": 0.547622561454773, - "learning_rate": 2.240754492796673e-05, - "loss": 0.0702, - "step": 17040 - }, - { - "epoch": 1.2661517896925591, - "grad_norm": 4.135508060455322, - "learning_rate": 2.2403089261844646e-05, - "loss": 0.0927, - "step": 17050 - }, - { - "epoch": 1.2668944007129066, - "grad_norm": 3.19258975982666, - "learning_rate": 2.239863359572256e-05, - "loss": 0.0736, - "step": 17060 - }, - { - "epoch": 1.267637011733254, - "grad_norm": 1.1759032011032104, - "learning_rate": 2.2394177929600476e-05, - "loss": 0.0686, - "step": 17070 - }, - { - "epoch": 1.2683796227536017, - "grad_norm": 0.7790769338607788, - "learning_rate": 2.238972226347839e-05, - "loss": 0.0961, - "step": 17080 - }, - { - "epoch": 1.2691222337739492, - "grad_norm": 0.9479905962944031, - "learning_rate": 2.2385266597356306e-05, - "loss": 0.0555, - "step": 17090 - }, - { - "epoch": 1.2698648447942968, - "grad_norm": 1.7313250303268433, - "learning_rate": 2.238081093123422e-05, - "loss": 0.099, - "step": 17100 - }, - { - "epoch": 1.2706074558146443, - "grad_norm": 0.5795320272445679, - "learning_rate": 2.2376355265112136e-05, - "loss": 0.0669, - "step": 17110 - }, - { - "epoch": 1.2713500668349917, - "grad_norm": 1.634346604347229, - "learning_rate": 2.2371899598990047e-05, - "loss": 0.0771, - "step": 17120 - }, - { - "epoch": 1.2720926778553394, - "grad_norm": 2.006558656692505, - "learning_rate": 2.2367443932867966e-05, - "loss": 0.0762, - "step": 17130 - }, - { - "epoch": 1.2728352888756869, - "grad_norm": 1.0356532335281372, - "learning_rate": 2.236298826674588e-05, - "loss": 0.0903, - "step": 17140 - }, - { - "epoch": 1.2735778998960345, - "grad_norm": 1.004071831703186, - "learning_rate": 2.2358532600623792e-05, - "loss": 0.0676, - "step": 17150 - }, - { - "epoch": 1.274320510916382, - "grad_norm": 1.3854845762252808, - "learning_rate": 2.235407693450171e-05, - "loss": 0.0705, - "step": 17160 - }, - { - "epoch": 1.2750631219367294, - "grad_norm": 2.1504805088043213, - "learning_rate": 2.2349621268379622e-05, - "loss": 0.072, - "step": 17170 - }, - { - "epoch": 1.275805732957077, - "grad_norm": 1.1549479961395264, - "learning_rate": 2.2345165602257537e-05, - "loss": 0.0642, - "step": 17180 - }, - { - "epoch": 1.2765483439774246, - "grad_norm": 2.8275554180145264, - "learning_rate": 2.2340709936135452e-05, - "loss": 0.0646, - "step": 17190 - }, - { - "epoch": 1.2772909549977722, - "grad_norm": 0.6290885806083679, - "learning_rate": 2.2336254270013367e-05, - "loss": 0.0875, - "step": 17200 - }, - { - "epoch": 1.2780335660181197, - "grad_norm": 2.0349278450012207, - "learning_rate": 2.2331798603891282e-05, - "loss": 0.0624, - "step": 17210 - }, - { - "epoch": 1.2787761770384671, - "grad_norm": 1.7029626369476318, - "learning_rate": 2.2327342937769197e-05, - "loss": 0.057, - "step": 17220 - }, - { - "epoch": 1.2795187880588148, - "grad_norm": 0.9866172075271606, - "learning_rate": 2.2322887271647112e-05, - "loss": 0.08, - "step": 17230 - }, - { - "epoch": 1.2802613990791623, - "grad_norm": 1.6005713939666748, - "learning_rate": 2.2318431605525027e-05, - "loss": 0.0767, - "step": 17240 - }, - { - "epoch": 1.28100401009951, - "grad_norm": 0.7228248119354248, - "learning_rate": 2.231397593940294e-05, - "loss": 0.1184, - "step": 17250 - }, - { - "epoch": 1.2817466211198574, - "grad_norm": 2.2156078815460205, - "learning_rate": 2.2309520273280857e-05, - "loss": 0.1104, - "step": 17260 - }, - { - "epoch": 1.2824892321402048, - "grad_norm": 1.3294280767440796, - "learning_rate": 2.2305064607158772e-05, - "loss": 0.0742, - "step": 17270 - }, - { - "epoch": 1.2832318431605525, - "grad_norm": 1.8837758302688599, - "learning_rate": 2.2300608941036684e-05, - "loss": 0.0753, - "step": 17280 - }, - { - "epoch": 1.2839744541809002, - "grad_norm": 1.9538664817810059, - "learning_rate": 2.22961532749146e-05, - "loss": 0.0685, - "step": 17290 - }, - { - "epoch": 1.2847170652012476, - "grad_norm": 1.3086044788360596, - "learning_rate": 2.2291697608792517e-05, - "loss": 0.069, - "step": 17300 - }, - { - "epoch": 1.285459676221595, - "grad_norm": 0.43306243419647217, - "learning_rate": 2.228724194267043e-05, - "loss": 0.0688, - "step": 17310 - }, - { - "epoch": 1.2862022872419427, - "grad_norm": 1.7001709938049316, - "learning_rate": 2.2282786276548344e-05, - "loss": 0.0964, - "step": 17320 - }, - { - "epoch": 1.2869448982622902, - "grad_norm": 0.9813358187675476, - "learning_rate": 2.2278330610426262e-05, - "loss": 0.0748, - "step": 17330 - }, - { - "epoch": 1.2876875092826379, - "grad_norm": 1.8679172992706299, - "learning_rate": 2.2273874944304174e-05, - "loss": 0.0962, - "step": 17340 - }, - { - "epoch": 1.2884301203029853, - "grad_norm": 1.7753219604492188, - "learning_rate": 2.226941927818209e-05, - "loss": 0.0818, - "step": 17350 - }, - { - "epoch": 1.2891727313233328, - "grad_norm": 1.1424388885498047, - "learning_rate": 2.226496361206e-05, - "loss": 0.0718, - "step": 17360 - }, - { - "epoch": 1.2899153423436804, - "grad_norm": 1.6380572319030762, - "learning_rate": 2.226050794593792e-05, - "loss": 0.0815, - "step": 17370 - }, - { - "epoch": 1.2906579533640279, - "grad_norm": 0.8902571201324463, - "learning_rate": 2.2256052279815834e-05, - "loss": 0.075, - "step": 17380 - }, - { - "epoch": 1.2914005643843756, - "grad_norm": 0.8039567470550537, - "learning_rate": 2.2251596613693745e-05, - "loss": 0.0554, - "step": 17390 - }, - { - "epoch": 1.292143175404723, - "grad_norm": 2.0586135387420654, - "learning_rate": 2.2247140947571664e-05, - "loss": 0.0662, - "step": 17400 - }, - { - "epoch": 1.2928857864250705, - "grad_norm": 3.4961864948272705, - "learning_rate": 2.224268528144958e-05, - "loss": 0.1205, - "step": 17410 - }, - { - "epoch": 1.2936283974454181, - "grad_norm": 1.724418044090271, - "learning_rate": 2.223822961532749e-05, - "loss": 0.0581, - "step": 17420 - }, - { - "epoch": 1.2943710084657656, - "grad_norm": 1.78573739528656, - "learning_rate": 2.2233773949205405e-05, - "loss": 0.0827, - "step": 17430 - }, - { - "epoch": 1.2951136194861133, - "grad_norm": 1.7535440921783447, - "learning_rate": 2.2229318283083324e-05, - "loss": 0.0835, - "step": 17440 - }, - { - "epoch": 1.2958562305064607, - "grad_norm": 0.7381752729415894, - "learning_rate": 2.2224862616961235e-05, - "loss": 0.0748, - "step": 17450 - }, - { - "epoch": 1.2965988415268082, - "grad_norm": 2.1226701736450195, - "learning_rate": 2.222040695083915e-05, - "loss": 0.0663, - "step": 17460 - }, - { - "epoch": 1.2973414525471558, - "grad_norm": 1.3175716400146484, - "learning_rate": 2.2215951284717065e-05, - "loss": 0.1009, - "step": 17470 - }, - { - "epoch": 1.2980840635675033, - "grad_norm": 1.1516002416610718, - "learning_rate": 2.221149561859498e-05, - "loss": 0.075, - "step": 17480 - }, - { - "epoch": 1.298826674587851, - "grad_norm": 2.0485615730285645, - "learning_rate": 2.2207039952472895e-05, - "loss": 0.0753, - "step": 17490 - }, - { - "epoch": 1.2995692856081984, - "grad_norm": 1.492017149925232, - "learning_rate": 2.220258428635081e-05, - "loss": 0.0801, - "step": 17500 - }, - { - "epoch": 1.3003118966285458, - "grad_norm": 1.2517192363739014, - "learning_rate": 2.2198128620228725e-05, - "loss": 0.0704, - "step": 17510 - }, - { - "epoch": 1.3010545076488935, - "grad_norm": 1.73708176612854, - "learning_rate": 2.219367295410664e-05, - "loss": 0.0564, - "step": 17520 - }, - { - "epoch": 1.301797118669241, - "grad_norm": 1.0200793743133545, - "learning_rate": 2.2189217287984552e-05, - "loss": 0.0521, - "step": 17530 - }, - { - "epoch": 1.3025397296895886, - "grad_norm": 3.1325795650482178, - "learning_rate": 2.218476162186247e-05, - "loss": 0.0628, - "step": 17540 - }, - { - "epoch": 1.303282340709936, - "grad_norm": 2.2476203441619873, - "learning_rate": 2.2180305955740385e-05, - "loss": 0.0747, - "step": 17550 - }, - { - "epoch": 1.3040249517302835, - "grad_norm": 0.6878722906112671, - "learning_rate": 2.2175850289618297e-05, - "loss": 0.0894, - "step": 17560 - }, - { - "epoch": 1.3047675627506312, - "grad_norm": 2.48412823677063, - "learning_rate": 2.2171394623496215e-05, - "loss": 0.0809, - "step": 17570 - }, - { - "epoch": 1.3055101737709789, - "grad_norm": 1.0366617441177368, - "learning_rate": 2.2166938957374127e-05, - "loss": 0.0724, - "step": 17580 - }, - { - "epoch": 1.3062527847913263, - "grad_norm": 0.9051563739776611, - "learning_rate": 2.2162483291252042e-05, - "loss": 0.1042, - "step": 17590 - }, - { - "epoch": 1.3069953958116738, - "grad_norm": 0.7146435976028442, - "learning_rate": 2.2158027625129957e-05, - "loss": 0.1003, - "step": 17600 - }, - { - "epoch": 1.3077380068320215, - "grad_norm": 1.7611632347106934, - "learning_rate": 2.2153571959007872e-05, - "loss": 0.0837, - "step": 17610 - }, - { - "epoch": 1.308480617852369, - "grad_norm": 1.3389374017715454, - "learning_rate": 2.2149116292885787e-05, - "loss": 0.0652, - "step": 17620 - }, - { - "epoch": 1.3092232288727166, - "grad_norm": 2.716177463531494, - "learning_rate": 2.2144660626763702e-05, - "loss": 0.0718, - "step": 17630 - }, - { - "epoch": 1.309965839893064, - "grad_norm": 0.6163918972015381, - "learning_rate": 2.2140204960641617e-05, - "loss": 0.0744, - "step": 17640 - }, - { - "epoch": 1.3107084509134115, - "grad_norm": 2.4319982528686523, - "learning_rate": 2.2135749294519532e-05, - "loss": 0.097, - "step": 17650 - }, - { - "epoch": 1.3114510619337592, - "grad_norm": 2.8777670860290527, - "learning_rate": 2.2131293628397447e-05, - "loss": 0.0849, - "step": 17660 - }, - { - "epoch": 1.3121936729541066, - "grad_norm": 2.7861387729644775, - "learning_rate": 2.2126837962275362e-05, - "loss": 0.076, - "step": 17670 - }, - { - "epoch": 1.3129362839744543, - "grad_norm": 0.650431752204895, - "learning_rate": 2.2122382296153277e-05, - "loss": 0.0888, - "step": 17680 - }, - { - "epoch": 1.3136788949948017, - "grad_norm": 0.553596019744873, - "learning_rate": 2.211792663003119e-05, - "loss": 0.0619, - "step": 17690 - }, - { - "epoch": 1.3144215060151492, - "grad_norm": 1.0089176893234253, - "learning_rate": 2.2113470963909103e-05, - "loss": 0.0777, - "step": 17700 - }, - { - "epoch": 1.3151641170354968, - "grad_norm": 1.6355758905410767, - "learning_rate": 2.2109015297787022e-05, - "loss": 0.0507, - "step": 17710 - }, - { - "epoch": 1.3159067280558443, - "grad_norm": 1.1922086477279663, - "learning_rate": 2.2104559631664933e-05, - "loss": 0.0612, - "step": 17720 - }, - { - "epoch": 1.316649339076192, - "grad_norm": 3.1001734733581543, - "learning_rate": 2.210010396554285e-05, - "loss": 0.103, - "step": 17730 - }, - { - "epoch": 1.3173919500965394, - "grad_norm": 2.3296868801116943, - "learning_rate": 2.2095648299420767e-05, - "loss": 0.0626, - "step": 17740 - }, - { - "epoch": 1.3181345611168869, - "grad_norm": 1.527961015701294, - "learning_rate": 2.209119263329868e-05, - "loss": 0.0762, - "step": 17750 - }, - { - "epoch": 1.3188771721372345, - "grad_norm": 2.4450912475585938, - "learning_rate": 2.2086736967176593e-05, - "loss": 0.0799, - "step": 17760 - }, - { - "epoch": 1.319619783157582, - "grad_norm": 1.477561354637146, - "learning_rate": 2.2082281301054505e-05, - "loss": 0.0466, - "step": 17770 - }, - { - "epoch": 1.3203623941779297, - "grad_norm": 2.102966070175171, - "learning_rate": 2.2077825634932423e-05, - "loss": 0.0812, - "step": 17780 - }, - { - "epoch": 1.3211050051982771, - "grad_norm": 1.731831669807434, - "learning_rate": 2.207336996881034e-05, - "loss": 0.0636, - "step": 17790 - }, - { - "epoch": 1.3218476162186246, - "grad_norm": 1.5313726663589478, - "learning_rate": 2.206891430268825e-05, - "loss": 0.0619, - "step": 17800 - }, - { - "epoch": 1.3225902272389722, - "grad_norm": 1.2742550373077393, - "learning_rate": 2.206445863656617e-05, - "loss": 0.0647, - "step": 17810 - }, - { - "epoch": 1.3233328382593197, - "grad_norm": 0.7429075241088867, - "learning_rate": 2.2060002970444083e-05, - "loss": 0.0574, - "step": 17820 - }, - { - "epoch": 1.3240754492796674, - "grad_norm": 2.3844103813171387, - "learning_rate": 2.2055547304321995e-05, - "loss": 0.0455, - "step": 17830 - }, - { - "epoch": 1.3248180603000148, - "grad_norm": 3.4696733951568604, - "learning_rate": 2.2051091638199913e-05, - "loss": 0.0856, - "step": 17840 - }, - { - "epoch": 1.3255606713203623, - "grad_norm": 1.2580214738845825, - "learning_rate": 2.204663597207783e-05, - "loss": 0.0525, - "step": 17850 - }, - { - "epoch": 1.32630328234071, - "grad_norm": 2.1355206966400146, - "learning_rate": 2.204218030595574e-05, - "loss": 0.0532, - "step": 17860 - }, - { - "epoch": 1.3270458933610576, - "grad_norm": 3.5435104370117188, - "learning_rate": 2.2037724639833655e-05, - "loss": 0.0589, - "step": 17870 - }, - { - "epoch": 1.327788504381405, - "grad_norm": 0.7375121712684631, - "learning_rate": 2.203326897371157e-05, - "loss": 0.0516, - "step": 17880 - }, - { - "epoch": 1.3285311154017525, - "grad_norm": 1.3405452966690063, - "learning_rate": 2.2028813307589485e-05, - "loss": 0.0609, - "step": 17890 - }, - { - "epoch": 1.3292737264221002, - "grad_norm": 1.448654294013977, - "learning_rate": 2.20243576414674e-05, - "loss": 0.0716, - "step": 17900 - }, - { - "epoch": 1.3300163374424476, - "grad_norm": 1.0215409994125366, - "learning_rate": 2.2019901975345315e-05, - "loss": 0.0627, - "step": 17910 - }, - { - "epoch": 1.3307589484627953, - "grad_norm": 2.50747013092041, - "learning_rate": 2.201544630922323e-05, - "loss": 0.0607, - "step": 17920 - }, - { - "epoch": 1.3315015594831427, - "grad_norm": 0.947949230670929, - "learning_rate": 2.2010990643101145e-05, - "loss": 0.092, - "step": 17930 - }, - { - "epoch": 1.3322441705034902, - "grad_norm": 1.444300889968872, - "learning_rate": 2.2006534976979057e-05, - "loss": 0.052, - "step": 17940 - }, - { - "epoch": 1.3329867815238379, - "grad_norm": 1.492150068283081, - "learning_rate": 2.2002079310856975e-05, - "loss": 0.0644, - "step": 17950 - }, - { - "epoch": 1.3337293925441853, - "grad_norm": 3.0664021968841553, - "learning_rate": 2.199762364473489e-05, - "loss": 0.1176, - "step": 17960 - }, - { - "epoch": 1.334472003564533, - "grad_norm": 2.1446638107299805, - "learning_rate": 2.19931679786128e-05, - "loss": 0.0621, - "step": 17970 - }, - { - "epoch": 1.3352146145848804, - "grad_norm": 2.1840789318084717, - "learning_rate": 2.198871231249072e-05, - "loss": 0.0744, - "step": 17980 - }, - { - "epoch": 1.335957225605228, - "grad_norm": 6.352825164794922, - "learning_rate": 2.198425664636863e-05, - "loss": 0.0636, - "step": 17990 - }, - { - "epoch": 1.3366998366255756, - "grad_norm": 1.7833071947097778, - "learning_rate": 2.1979800980246546e-05, - "loss": 0.0835, - "step": 18000 - }, - { - "epoch": 1.337442447645923, - "grad_norm": 1.596863031387329, - "learning_rate": 2.197534531412446e-05, - "loss": 0.0639, - "step": 18010 - }, - { - "epoch": 1.3381850586662707, - "grad_norm": 1.079342246055603, - "learning_rate": 2.1970889648002376e-05, - "loss": 0.0675, - "step": 18020 - }, - { - "epoch": 1.3389276696866181, - "grad_norm": 2.5016486644744873, - "learning_rate": 2.196643398188029e-05, - "loss": 0.0717, - "step": 18030 - }, - { - "epoch": 1.3396702807069656, - "grad_norm": 1.00153648853302, - "learning_rate": 2.1961978315758206e-05, - "loss": 0.0901, - "step": 18040 - }, - { - "epoch": 1.3404128917273133, - "grad_norm": 0.4505369961261749, - "learning_rate": 2.195752264963612e-05, - "loss": 0.0617, - "step": 18050 - }, - { - "epoch": 1.3411555027476607, - "grad_norm": 0.8627389669418335, - "learning_rate": 2.1953066983514036e-05, - "loss": 0.0915, - "step": 18060 - }, - { - "epoch": 1.3418981137680084, - "grad_norm": 1.8708628416061401, - "learning_rate": 2.194861131739195e-05, - "loss": 0.087, - "step": 18070 - }, - { - "epoch": 1.3426407247883558, - "grad_norm": 1.866942286491394, - "learning_rate": 2.1944155651269866e-05, - "loss": 0.0633, - "step": 18080 - }, - { - "epoch": 1.3433833358087033, - "grad_norm": 3.1067397594451904, - "learning_rate": 2.193969998514778e-05, - "loss": 0.065, - "step": 18090 - }, - { - "epoch": 1.344125946829051, - "grad_norm": 1.4775131940841675, - "learning_rate": 2.1935244319025693e-05, - "loss": 0.0858, - "step": 18100 - }, - { - "epoch": 1.3448685578493984, - "grad_norm": 1.9869881868362427, - "learning_rate": 2.1930788652903608e-05, - "loss": 0.088, - "step": 18110 - }, - { - "epoch": 1.345611168869746, - "grad_norm": 2.1801204681396484, - "learning_rate": 2.1926332986781526e-05, - "loss": 0.0941, - "step": 18120 - }, - { - "epoch": 1.3463537798900935, - "grad_norm": 1.2219593524932861, - "learning_rate": 2.1921877320659438e-05, - "loss": 0.0897, - "step": 18130 - }, - { - "epoch": 1.347096390910441, - "grad_norm": 1.6922121047973633, - "learning_rate": 2.1917421654537353e-05, - "loss": 0.0905, - "step": 18140 - }, - { - "epoch": 1.3478390019307886, - "grad_norm": 4.362298488616943, - "learning_rate": 2.191296598841527e-05, - "loss": 0.0714, - "step": 18150 - }, - { - "epoch": 1.3485816129511363, - "grad_norm": 1.9066132307052612, - "learning_rate": 2.1908510322293183e-05, - "loss": 0.076, - "step": 18160 - }, - { - "epoch": 1.3493242239714838, - "grad_norm": 2.2827999591827393, - "learning_rate": 2.1904054656171098e-05, - "loss": 0.0872, - "step": 18170 - }, - { - "epoch": 1.3500668349918312, - "grad_norm": 0.735640287399292, - "learning_rate": 2.189959899004901e-05, - "loss": 0.0668, - "step": 18180 - }, - { - "epoch": 1.350809446012179, - "grad_norm": 1.7946842908859253, - "learning_rate": 2.1895143323926928e-05, - "loss": 0.0767, - "step": 18190 - }, - { - "epoch": 1.3515520570325263, - "grad_norm": 0.793258547782898, - "learning_rate": 2.1890687657804843e-05, - "loss": 0.0484, - "step": 18200 - }, - { - "epoch": 1.352294668052874, - "grad_norm": 1.9546618461608887, - "learning_rate": 2.1886231991682755e-05, - "loss": 0.1081, - "step": 18210 - }, - { - "epoch": 1.3530372790732215, - "grad_norm": 1.6390115022659302, - "learning_rate": 2.1881776325560673e-05, - "loss": 0.051, - "step": 18220 - }, - { - "epoch": 1.353779890093569, - "grad_norm": 0.6895598769187927, - "learning_rate": 2.1877320659438588e-05, - "loss": 0.0765, - "step": 18230 - }, - { - "epoch": 1.3545225011139166, - "grad_norm": 1.3696023225784302, - "learning_rate": 2.18728649933165e-05, - "loss": 0.1083, - "step": 18240 - }, - { - "epoch": 1.355265112134264, - "grad_norm": 1.8594785928726196, - "learning_rate": 2.1868409327194418e-05, - "loss": 0.0773, - "step": 18250 - }, - { - "epoch": 1.3560077231546117, - "grad_norm": 1.0615592002868652, - "learning_rate": 2.1863953661072333e-05, - "loss": 0.079, - "step": 18260 - }, - { - "epoch": 1.3567503341749592, - "grad_norm": 1.725924015045166, - "learning_rate": 2.1859497994950245e-05, - "loss": 0.0957, - "step": 18270 - }, - { - "epoch": 1.3574929451953066, - "grad_norm": 0.7727744579315186, - "learning_rate": 2.185504232882816e-05, - "loss": 0.0664, - "step": 18280 - }, - { - "epoch": 1.3582355562156543, - "grad_norm": 0.4827175438404083, - "learning_rate": 2.1850586662706075e-05, - "loss": 0.0786, - "step": 18290 - }, - { - "epoch": 1.3589781672360017, - "grad_norm": 2.3868589401245117, - "learning_rate": 2.184613099658399e-05, - "loss": 0.0757, - "step": 18300 - }, - { - "epoch": 1.3597207782563494, - "grad_norm": 1.4127172231674194, - "learning_rate": 2.1841675330461905e-05, - "loss": 0.0805, - "step": 18310 - }, - { - "epoch": 1.3604633892766969, - "grad_norm": 3.6567182540893555, - "learning_rate": 2.183721966433982e-05, - "loss": 0.0501, - "step": 18320 - }, - { - "epoch": 1.3612060002970443, - "grad_norm": 0.46795493364334106, - "learning_rate": 2.1832763998217735e-05, - "loss": 0.0702, - "step": 18330 - }, - { - "epoch": 1.361948611317392, - "grad_norm": 1.1529920101165771, - "learning_rate": 2.182830833209565e-05, - "loss": 0.0891, - "step": 18340 - }, - { - "epoch": 1.3626912223377394, - "grad_norm": 1.0906422138214111, - "learning_rate": 2.182385266597356e-05, - "loss": 0.0789, - "step": 18350 - }, - { - "epoch": 1.363433833358087, - "grad_norm": 4.010624408721924, - "learning_rate": 2.181939699985148e-05, - "loss": 0.1198, - "step": 18360 - }, - { - "epoch": 1.3641764443784345, - "grad_norm": 1.2487231492996216, - "learning_rate": 2.1814941333729395e-05, - "loss": 0.0811, - "step": 18370 - }, - { - "epoch": 1.364919055398782, - "grad_norm": 1.9714354276657104, - "learning_rate": 2.1810485667607306e-05, - "loss": 0.065, - "step": 18380 - }, - { - "epoch": 1.3656616664191297, - "grad_norm": 2.3405370712280273, - "learning_rate": 2.1806030001485224e-05, - "loss": 0.1, - "step": 18390 - }, - { - "epoch": 1.3664042774394771, - "grad_norm": 0.7739295959472656, - "learning_rate": 2.1801574335363136e-05, - "loss": 0.0794, - "step": 18400 - }, - { - "epoch": 1.3671468884598248, - "grad_norm": 1.5467528104782104, - "learning_rate": 2.179711866924105e-05, - "loss": 0.0625, - "step": 18410 - }, - { - "epoch": 1.3678894994801722, - "grad_norm": 2.0062620639801025, - "learning_rate": 2.1792663003118966e-05, - "loss": 0.1144, - "step": 18420 - }, - { - "epoch": 1.3686321105005197, - "grad_norm": 1.0899155139923096, - "learning_rate": 2.178820733699688e-05, - "loss": 0.0832, - "step": 18430 - }, - { - "epoch": 1.3693747215208674, - "grad_norm": 1.5174329280853271, - "learning_rate": 2.1783751670874796e-05, - "loss": 0.0886, - "step": 18440 - }, - { - "epoch": 1.370117332541215, - "grad_norm": 1.060883641242981, - "learning_rate": 2.177929600475271e-05, - "loss": 0.0619, - "step": 18450 - }, - { - "epoch": 1.3708599435615625, - "grad_norm": 2.5630977153778076, - "learning_rate": 2.1774840338630626e-05, - "loss": 0.0697, - "step": 18460 - }, - { - "epoch": 1.37160255458191, - "grad_norm": 1.6093450784683228, - "learning_rate": 2.177038467250854e-05, - "loss": 0.0803, - "step": 18470 - }, - { - "epoch": 1.3723451656022576, - "grad_norm": 1.996664047241211, - "learning_rate": 2.1765929006386456e-05, - "loss": 0.0881, - "step": 18480 - }, - { - "epoch": 1.373087776622605, - "grad_norm": 1.6483838558197021, - "learning_rate": 2.176147334026437e-05, - "loss": 0.0536, - "step": 18490 - }, - { - "epoch": 1.3738303876429527, - "grad_norm": 2.682058572769165, - "learning_rate": 2.1757017674142286e-05, - "loss": 0.0822, - "step": 18500 - }, - { - "epoch": 1.3745729986633002, - "grad_norm": 0.638530969619751, - "learning_rate": 2.1752562008020198e-05, - "loss": 0.0843, - "step": 18510 - }, - { - "epoch": 1.3753156096836476, - "grad_norm": 0.7603070139884949, - "learning_rate": 2.1748106341898113e-05, - "loss": 0.0736, - "step": 18520 - }, - { - "epoch": 1.3760582207039953, - "grad_norm": 1.7410355806350708, - "learning_rate": 2.174365067577603e-05, - "loss": 0.0999, - "step": 18530 - }, - { - "epoch": 1.3768008317243428, - "grad_norm": 1.657575011253357, - "learning_rate": 2.1739195009653943e-05, - "loss": 0.0609, - "step": 18540 - }, - { - "epoch": 1.3775434427446904, - "grad_norm": 0.7977071404457092, - "learning_rate": 2.1734739343531858e-05, - "loss": 0.0636, - "step": 18550 - }, - { - "epoch": 1.3782860537650379, - "grad_norm": 0.5938560962677002, - "learning_rate": 2.1730283677409776e-05, - "loss": 0.0776, - "step": 18560 - }, - { - "epoch": 1.3790286647853853, - "grad_norm": 2.3194963932037354, - "learning_rate": 2.1725828011287688e-05, - "loss": 0.0953, - "step": 18570 - }, - { - "epoch": 1.379771275805733, - "grad_norm": 1.4146885871887207, - "learning_rate": 2.1721372345165603e-05, - "loss": 0.0885, - "step": 18580 - }, - { - "epoch": 1.3805138868260804, - "grad_norm": 2.4094855785369873, - "learning_rate": 2.1716916679043518e-05, - "loss": 0.0629, - "step": 18590 - }, - { - "epoch": 1.3812564978464281, - "grad_norm": 2.476471424102783, - "learning_rate": 2.1712461012921433e-05, - "loss": 0.0522, - "step": 18600 - }, - { - "epoch": 1.3819991088667756, - "grad_norm": 1.0352263450622559, - "learning_rate": 2.1708005346799348e-05, - "loss": 0.0605, - "step": 18610 - }, - { - "epoch": 1.382741719887123, - "grad_norm": 1.2471846342086792, - "learning_rate": 2.170354968067726e-05, - "loss": 0.0606, - "step": 18620 - }, - { - "epoch": 1.3834843309074707, - "grad_norm": 0.748393177986145, - "learning_rate": 2.1699094014555178e-05, - "loss": 0.0825, - "step": 18630 - }, - { - "epoch": 1.3842269419278181, - "grad_norm": 2.0027894973754883, - "learning_rate": 2.1694638348433093e-05, - "loss": 0.1089, - "step": 18640 - }, - { - "epoch": 1.3849695529481658, - "grad_norm": 0.7631524205207825, - "learning_rate": 2.1690182682311004e-05, - "loss": 0.0627, - "step": 18650 - }, - { - "epoch": 1.3857121639685133, - "grad_norm": 1.3359373807907104, - "learning_rate": 2.1685727016188923e-05, - "loss": 0.0581, - "step": 18660 - }, - { - "epoch": 1.3864547749888607, - "grad_norm": 1.8302745819091797, - "learning_rate": 2.1681271350066838e-05, - "loss": 0.0578, - "step": 18670 - }, - { - "epoch": 1.3871973860092084, - "grad_norm": 1.2512954473495483, - "learning_rate": 2.167681568394475e-05, - "loss": 0.0746, - "step": 18680 - }, - { - "epoch": 1.3879399970295558, - "grad_norm": 1.0682908296585083, - "learning_rate": 2.1672360017822664e-05, - "loss": 0.0568, - "step": 18690 - }, - { - "epoch": 1.3886826080499035, - "grad_norm": 0.7478026747703552, - "learning_rate": 2.166790435170058e-05, - "loss": 0.0947, - "step": 18700 - }, - { - "epoch": 1.389425219070251, - "grad_norm": 1.646852731704712, - "learning_rate": 2.1663448685578494e-05, - "loss": 0.0718, - "step": 18710 - }, - { - "epoch": 1.3901678300905984, - "grad_norm": 1.0658780336380005, - "learning_rate": 2.165899301945641e-05, - "loss": 0.0939, - "step": 18720 - }, - { - "epoch": 1.390910441110946, - "grad_norm": 1.9189115762710571, - "learning_rate": 2.1654537353334324e-05, - "loss": 0.0844, - "step": 18730 - }, - { - "epoch": 1.3916530521312938, - "grad_norm": 1.009257197380066, - "learning_rate": 2.165008168721224e-05, - "loss": 0.0549, - "step": 18740 - }, - { - "epoch": 1.3923956631516412, - "grad_norm": 1.1717352867126465, - "learning_rate": 2.1645626021090154e-05, - "loss": 0.0741, - "step": 18750 - }, - { - "epoch": 1.3931382741719887, - "grad_norm": 1.070173740386963, - "learning_rate": 2.1641170354968066e-05, - "loss": 0.099, - "step": 18760 - }, - { - "epoch": 1.3938808851923363, - "grad_norm": 2.932997465133667, - "learning_rate": 2.1636714688845984e-05, - "loss": 0.0674, - "step": 18770 - }, - { - "epoch": 1.3946234962126838, - "grad_norm": 0.7119723558425903, - "learning_rate": 2.16322590227239e-05, - "loss": 0.0768, - "step": 18780 - }, - { - "epoch": 1.3953661072330314, - "grad_norm": 2.5483529567718506, - "learning_rate": 2.162780335660181e-05, - "loss": 0.0427, - "step": 18790 - }, - { - "epoch": 1.396108718253379, - "grad_norm": 1.3585890531539917, - "learning_rate": 2.162334769047973e-05, - "loss": 0.0429, - "step": 18800 - }, - { - "epoch": 1.3968513292737263, - "grad_norm": 1.786074161529541, - "learning_rate": 2.161889202435764e-05, - "loss": 0.0965, - "step": 18810 - }, - { - "epoch": 1.397593940294074, - "grad_norm": 2.01636004447937, - "learning_rate": 2.1614436358235556e-05, - "loss": 0.0469, - "step": 18820 - }, - { - "epoch": 1.3983365513144215, - "grad_norm": 1.713550090789795, - "learning_rate": 2.160998069211347e-05, - "loss": 0.0725, - "step": 18830 - }, - { - "epoch": 1.3990791623347691, - "grad_norm": 4.37731409072876, - "learning_rate": 2.1605525025991386e-05, - "loss": 0.0788, - "step": 18840 - }, - { - "epoch": 1.3998217733551166, - "grad_norm": 2.8098561763763428, - "learning_rate": 2.16010693598693e-05, - "loss": 0.0677, - "step": 18850 - }, - { - "epoch": 1.400564384375464, - "grad_norm": 2.595644235610962, - "learning_rate": 2.1596613693747216e-05, - "loss": 0.088, - "step": 18860 - }, - { - "epoch": 1.4013069953958117, - "grad_norm": 2.6050455570220947, - "learning_rate": 2.159215802762513e-05, - "loss": 0.0683, - "step": 18870 - }, - { - "epoch": 1.4020496064161592, - "grad_norm": 1.2562687397003174, - "learning_rate": 2.1587702361503046e-05, - "loss": 0.0605, - "step": 18880 - }, - { - "epoch": 1.4027922174365068, - "grad_norm": 0.9690125584602356, - "learning_rate": 2.158324669538096e-05, - "loss": 0.0506, - "step": 18890 - }, - { - "epoch": 1.4035348284568543, - "grad_norm": 0.6721828579902649, - "learning_rate": 2.1578791029258876e-05, - "loss": 0.0553, - "step": 18900 - }, - { - "epoch": 1.4042774394772017, - "grad_norm": 0.4955576956272125, - "learning_rate": 2.157433536313679e-05, - "loss": 0.065, - "step": 18910 - }, - { - "epoch": 1.4050200504975494, - "grad_norm": 2.1765050888061523, - "learning_rate": 2.1569879697014702e-05, - "loss": 0.0669, - "step": 18920 - }, - { - "epoch": 1.4057626615178969, - "grad_norm": 1.8637815713882446, - "learning_rate": 2.1565424030892617e-05, - "loss": 0.0629, - "step": 18930 - }, - { - "epoch": 1.4065052725382445, - "grad_norm": 2.6646058559417725, - "learning_rate": 2.1560968364770536e-05, - "loss": 0.0729, - "step": 18940 - }, - { - "epoch": 1.407247883558592, - "grad_norm": 1.4432345628738403, - "learning_rate": 2.1556512698648447e-05, - "loss": 0.0601, - "step": 18950 - }, - { - "epoch": 1.4079904945789394, - "grad_norm": 0.9706814885139465, - "learning_rate": 2.1552057032526362e-05, - "loss": 0.0596, - "step": 18960 - }, - { - "epoch": 1.408733105599287, - "grad_norm": 1.4810203313827515, - "learning_rate": 2.154760136640428e-05, - "loss": 0.0816, - "step": 18970 - }, - { - "epoch": 1.4094757166196346, - "grad_norm": 0.5370448231697083, - "learning_rate": 2.1543145700282192e-05, - "loss": 0.0685, - "step": 18980 - }, - { - "epoch": 1.4102183276399822, - "grad_norm": 1.4338277578353882, - "learning_rate": 2.1538690034160107e-05, - "loss": 0.0821, - "step": 18990 - }, - { - "epoch": 1.4109609386603297, - "grad_norm": 1.7360191345214844, - "learning_rate": 2.1534234368038022e-05, - "loss": 0.0853, - "step": 19000 - }, - { - "epoch": 1.4117035496806771, - "grad_norm": 1.8380330801010132, - "learning_rate": 2.1529778701915937e-05, - "loss": 0.0599, - "step": 19010 - }, - { - "epoch": 1.4124461607010248, - "grad_norm": 0.9603464007377625, - "learning_rate": 2.1525323035793852e-05, - "loss": 0.0783, - "step": 19020 - }, - { - "epoch": 1.4131887717213725, - "grad_norm": 1.198040246963501, - "learning_rate": 2.1520867369671764e-05, - "loss": 0.0801, - "step": 19030 - }, - { - "epoch": 1.41393138274172, - "grad_norm": 2.3278064727783203, - "learning_rate": 2.1516411703549682e-05, - "loss": 0.0785, - "step": 19040 - }, - { - "epoch": 1.4146739937620674, - "grad_norm": 2.149242877960205, - "learning_rate": 2.1511956037427597e-05, - "loss": 0.0699, - "step": 19050 - }, - { - "epoch": 1.415416604782415, - "grad_norm": 1.5481926202774048, - "learning_rate": 2.150750037130551e-05, - "loss": 0.0785, - "step": 19060 - }, - { - "epoch": 1.4161592158027625, - "grad_norm": 1.473336100578308, - "learning_rate": 2.1503044705183427e-05, - "loss": 0.1051, - "step": 19070 - }, - { - "epoch": 1.4169018268231102, - "grad_norm": 1.15213143825531, - "learning_rate": 2.1498589039061342e-05, - "loss": 0.08, - "step": 19080 - }, - { - "epoch": 1.4176444378434576, - "grad_norm": 1.5161607265472412, - "learning_rate": 2.1494133372939254e-05, - "loss": 0.08, - "step": 19090 - }, - { - "epoch": 1.418387048863805, - "grad_norm": 1.6533968448638916, - "learning_rate": 2.148967770681717e-05, - "loss": 0.0633, - "step": 19100 - }, - { - "epoch": 1.4191296598841527, - "grad_norm": 3.5496084690093994, - "learning_rate": 2.1485222040695087e-05, - "loss": 0.0849, - "step": 19110 - }, - { - "epoch": 1.4198722709045002, - "grad_norm": 0.9919148683547974, - "learning_rate": 2.1480766374573e-05, - "loss": 0.0349, - "step": 19120 - }, - { - "epoch": 1.4206148819248479, - "grad_norm": 1.724232792854309, - "learning_rate": 2.1476310708450914e-05, - "loss": 0.0672, - "step": 19130 - }, - { - "epoch": 1.4213574929451953, - "grad_norm": 2.5495779514312744, - "learning_rate": 2.147185504232883e-05, - "loss": 0.0786, - "step": 19140 - }, - { - "epoch": 1.4221001039655428, - "grad_norm": 1.9082344770431519, - "learning_rate": 2.1467399376206744e-05, - "loss": 0.0668, - "step": 19150 - }, - { - "epoch": 1.4228427149858904, - "grad_norm": 0.5212798714637756, - "learning_rate": 2.146294371008466e-05, - "loss": 0.059, - "step": 19160 - }, - { - "epoch": 1.4235853260062379, - "grad_norm": 1.6344729661941528, - "learning_rate": 2.145848804396257e-05, - "loss": 0.0821, - "step": 19170 - }, - { - "epoch": 1.4243279370265856, - "grad_norm": 1.4449615478515625, - "learning_rate": 2.145403237784049e-05, - "loss": 0.0597, - "step": 19180 - }, - { - "epoch": 1.425070548046933, - "grad_norm": 1.3735612630844116, - "learning_rate": 2.1449576711718404e-05, - "loss": 0.049, - "step": 19190 - }, - { - "epoch": 1.4258131590672805, - "grad_norm": 1.045433521270752, - "learning_rate": 2.1445121045596315e-05, - "loss": 0.079, - "step": 19200 - }, - { - "epoch": 1.4265557700876281, - "grad_norm": 0.6579065918922424, - "learning_rate": 2.1440665379474234e-05, - "loss": 0.0651, - "step": 19210 - }, - { - "epoch": 1.4272983811079756, - "grad_norm": 2.254539728164673, - "learning_rate": 2.1436209713352145e-05, - "loss": 0.0883, - "step": 19220 - }, - { - "epoch": 1.4280409921283232, - "grad_norm": 0.9771292209625244, - "learning_rate": 2.143175404723006e-05, - "loss": 0.0561, - "step": 19230 - }, - { - "epoch": 1.4287836031486707, - "grad_norm": 1.2068768739700317, - "learning_rate": 2.142729838110798e-05, - "loss": 0.0761, - "step": 19240 - }, - { - "epoch": 1.4295262141690182, - "grad_norm": 2.0585765838623047, - "learning_rate": 2.142284271498589e-05, - "loss": 0.0978, - "step": 19250 - }, - { - "epoch": 1.4302688251893658, - "grad_norm": 1.229931354522705, - "learning_rate": 2.1418387048863805e-05, - "loss": 0.0664, - "step": 19260 - }, - { - "epoch": 1.4310114362097133, - "grad_norm": 1.6777613162994385, - "learning_rate": 2.141393138274172e-05, - "loss": 0.0979, - "step": 19270 - }, - { - "epoch": 1.431754047230061, - "grad_norm": 1.9495104551315308, - "learning_rate": 2.1409475716619635e-05, - "loss": 0.0684, - "step": 19280 - }, - { - "epoch": 1.4324966582504084, - "grad_norm": 2.273432493209839, - "learning_rate": 2.140502005049755e-05, - "loss": 0.0975, - "step": 19290 - }, - { - "epoch": 1.4332392692707558, - "grad_norm": 1.896386742591858, - "learning_rate": 2.1400564384375465e-05, - "loss": 0.0974, - "step": 19300 - }, - { - "epoch": 1.4339818802911035, - "grad_norm": 0.9202299118041992, - "learning_rate": 2.139610871825338e-05, - "loss": 0.0809, - "step": 19310 - }, - { - "epoch": 1.4347244913114512, - "grad_norm": 1.1943680047988892, - "learning_rate": 2.1391653052131295e-05, - "loss": 0.0808, - "step": 19320 - }, - { - "epoch": 1.4354671023317986, - "grad_norm": 2.0343806743621826, - "learning_rate": 2.1387197386009207e-05, - "loss": 0.072, - "step": 19330 - }, - { - "epoch": 1.436209713352146, - "grad_norm": 3.626370668411255, - "learning_rate": 2.1382741719887122e-05, - "loss": 0.0568, - "step": 19340 - }, - { - "epoch": 1.4369523243724938, - "grad_norm": 5.546724796295166, - "learning_rate": 2.137828605376504e-05, - "loss": 0.0625, - "step": 19350 - }, - { - "epoch": 1.4376949353928412, - "grad_norm": 1.03886878490448, - "learning_rate": 2.1373830387642952e-05, - "loss": 0.0897, - "step": 19360 - }, - { - "epoch": 1.4384375464131889, - "grad_norm": 2.2745583057403564, - "learning_rate": 2.1369374721520867e-05, - "loss": 0.0781, - "step": 19370 - }, - { - "epoch": 1.4391801574335363, - "grad_norm": 2.1215226650238037, - "learning_rate": 2.1364919055398785e-05, - "loss": 0.0764, - "step": 19380 - }, - { - "epoch": 1.4399227684538838, - "grad_norm": 1.7415093183517456, - "learning_rate": 2.1360463389276697e-05, - "loss": 0.0959, - "step": 19390 - }, - { - "epoch": 1.4406653794742315, - "grad_norm": 4.175904750823975, - "learning_rate": 2.1356007723154612e-05, - "loss": 0.0898, - "step": 19400 - }, - { - "epoch": 1.441407990494579, - "grad_norm": 1.0260313749313354, - "learning_rate": 2.1351552057032527e-05, - "loss": 0.1057, - "step": 19410 - }, - { - "epoch": 1.4421506015149266, - "grad_norm": 1.1393053531646729, - "learning_rate": 2.1347096390910442e-05, - "loss": 0.0632, - "step": 19420 - }, - { - "epoch": 1.442893212535274, - "grad_norm": 0.7006543278694153, - "learning_rate": 2.1342640724788357e-05, - "loss": 0.0601, - "step": 19430 - }, - { - "epoch": 1.4436358235556215, - "grad_norm": 1.356958270072937, - "learning_rate": 2.133818505866627e-05, - "loss": 0.0522, - "step": 19440 - }, - { - "epoch": 1.4443784345759692, - "grad_norm": 0.9803171753883362, - "learning_rate": 2.1333729392544187e-05, - "loss": 0.0593, - "step": 19450 - }, - { - "epoch": 1.4451210455963166, - "grad_norm": 0.602212131023407, - "learning_rate": 2.1329273726422102e-05, - "loss": 0.0412, - "step": 19460 - }, - { - "epoch": 1.4458636566166643, - "grad_norm": 2.612487554550171, - "learning_rate": 2.1324818060300013e-05, - "loss": 0.1007, - "step": 19470 - }, - { - "epoch": 1.4466062676370117, - "grad_norm": 2.2010037899017334, - "learning_rate": 2.1320362394177932e-05, - "loss": 0.0845, - "step": 19480 - }, - { - "epoch": 1.4473488786573592, - "grad_norm": 1.3629838228225708, - "learning_rate": 2.1315906728055847e-05, - "loss": 0.0794, - "step": 19490 - }, - { - "epoch": 1.4480914896777068, - "grad_norm": 1.0975171327590942, - "learning_rate": 2.131145106193376e-05, - "loss": 0.0656, - "step": 19500 - }, - { - "epoch": 1.4488341006980543, - "grad_norm": 1.8527283668518066, - "learning_rate": 2.1306995395811673e-05, - "loss": 0.0724, - "step": 19510 - }, - { - "epoch": 1.449576711718402, - "grad_norm": 1.6812669038772583, - "learning_rate": 2.1302539729689592e-05, - "loss": 0.0959, - "step": 19520 - }, - { - "epoch": 1.4503193227387494, - "grad_norm": 1.0494896173477173, - "learning_rate": 2.1298084063567503e-05, - "loss": 0.0544, - "step": 19530 - }, - { - "epoch": 1.4510619337590969, - "grad_norm": 0.5817059874534607, - "learning_rate": 2.129362839744542e-05, - "loss": 0.0808, - "step": 19540 - }, - { - "epoch": 1.4518045447794445, - "grad_norm": 1.512558102607727, - "learning_rate": 2.1289172731323333e-05, - "loss": 0.0952, - "step": 19550 - }, - { - "epoch": 1.452547155799792, - "grad_norm": 4.8917694091796875, - "learning_rate": 2.128471706520125e-05, - "loss": 0.0987, - "step": 19560 - }, - { - "epoch": 1.4532897668201397, - "grad_norm": 2.3295490741729736, - "learning_rate": 2.1280261399079163e-05, - "loss": 0.0872, - "step": 19570 - }, - { - "epoch": 1.4540323778404871, - "grad_norm": 1.3101624250411987, - "learning_rate": 2.1275805732957075e-05, - "loss": 0.0484, - "step": 19580 - }, - { - "epoch": 1.4547749888608346, - "grad_norm": 3.150947332382202, - "learning_rate": 2.1271350066834993e-05, - "loss": 0.0876, - "step": 19590 - }, - { - "epoch": 1.4555175998811822, - "grad_norm": 1.6120469570159912, - "learning_rate": 2.1266894400712908e-05, - "loss": 0.0806, - "step": 19600 - }, - { - "epoch": 1.45626021090153, - "grad_norm": 1.210063099861145, - "learning_rate": 2.126243873459082e-05, - "loss": 0.0542, - "step": 19610 - }, - { - "epoch": 1.4570028219218774, - "grad_norm": 2.553395986557007, - "learning_rate": 2.1257983068468738e-05, - "loss": 0.0743, - "step": 19620 - }, - { - "epoch": 1.4577454329422248, - "grad_norm": 2.5680768489837646, - "learning_rate": 2.125352740234665e-05, - "loss": 0.0764, - "step": 19630 - }, - { - "epoch": 1.4584880439625725, - "grad_norm": 1.1068662405014038, - "learning_rate": 2.1249071736224565e-05, - "loss": 0.0778, - "step": 19640 - }, - { - "epoch": 1.45923065498292, - "grad_norm": 2.1891543865203857, - "learning_rate": 2.1244616070102483e-05, - "loss": 0.0654, - "step": 19650 - }, - { - "epoch": 1.4599732660032676, - "grad_norm": 1.6965099573135376, - "learning_rate": 2.1240160403980395e-05, - "loss": 0.0634, - "step": 19660 - }, - { - "epoch": 1.460715877023615, - "grad_norm": 2.3436694145202637, - "learning_rate": 2.123570473785831e-05, - "loss": 0.098, - "step": 19670 - }, - { - "epoch": 1.4614584880439625, - "grad_norm": 0.8156054615974426, - "learning_rate": 2.1231249071736225e-05, - "loss": 0.0676, - "step": 19680 - }, - { - "epoch": 1.4622010990643102, - "grad_norm": 1.2002021074295044, - "learning_rate": 2.122679340561414e-05, - "loss": 0.082, - "step": 19690 - }, - { - "epoch": 1.4629437100846576, - "grad_norm": 1.8691362142562866, - "learning_rate": 2.1222337739492055e-05, - "loss": 0.0487, - "step": 19700 - }, - { - "epoch": 1.4636863211050053, - "grad_norm": 2.66479229927063, - "learning_rate": 2.121788207336997e-05, - "loss": 0.0784, - "step": 19710 - }, - { - "epoch": 1.4644289321253527, - "grad_norm": 1.1679737567901611, - "learning_rate": 2.1213426407247885e-05, - "loss": 0.0645, - "step": 19720 - }, - { - "epoch": 1.4651715431457002, - "grad_norm": 1.454779028892517, - "learning_rate": 2.12089707411258e-05, - "loss": 0.0739, - "step": 19730 - }, - { - "epoch": 1.4659141541660479, - "grad_norm": 2.86212420463562, - "learning_rate": 2.120451507500371e-05, - "loss": 0.0479, - "step": 19740 - }, - { - "epoch": 1.4666567651863953, - "grad_norm": 1.7495118379592896, - "learning_rate": 2.1200059408881626e-05, - "loss": 0.0796, - "step": 19750 - }, - { - "epoch": 1.467399376206743, - "grad_norm": 1.2656298875808716, - "learning_rate": 2.1195603742759545e-05, - "loss": 0.079, - "step": 19760 - }, - { - "epoch": 1.4681419872270904, - "grad_norm": 0.7113642692565918, - "learning_rate": 2.1191148076637456e-05, - "loss": 0.0707, - "step": 19770 - }, - { - "epoch": 1.468884598247438, - "grad_norm": 0.5482021570205688, - "learning_rate": 2.118669241051537e-05, - "loss": 0.0678, - "step": 19780 - }, - { - "epoch": 1.4696272092677856, - "grad_norm": 2.1381452083587646, - "learning_rate": 2.118223674439329e-05, - "loss": 0.0847, - "step": 19790 - }, - { - "epoch": 1.470369820288133, - "grad_norm": 1.8469703197479248, - "learning_rate": 2.11777810782712e-05, - "loss": 0.0654, - "step": 19800 - }, - { - "epoch": 1.4711124313084807, - "grad_norm": 1.900571346282959, - "learning_rate": 2.1173325412149116e-05, - "loss": 0.05, - "step": 19810 - }, - { - "epoch": 1.4718550423288281, - "grad_norm": 0.9532872438430786, - "learning_rate": 2.116886974602703e-05, - "loss": 0.0535, - "step": 19820 - }, - { - "epoch": 1.4725976533491756, - "grad_norm": 1.2694770097732544, - "learning_rate": 2.1164414079904946e-05, - "loss": 0.0907, - "step": 19830 - }, - { - "epoch": 1.4733402643695233, - "grad_norm": 0.9396808743476868, - "learning_rate": 2.115995841378286e-05, - "loss": 0.0592, - "step": 19840 - }, - { - "epoch": 1.4740828753898707, - "grad_norm": 1.6375855207443237, - "learning_rate": 2.1155502747660773e-05, - "loss": 0.0707, - "step": 19850 - }, - { - "epoch": 1.4748254864102184, - "grad_norm": 2.0587351322174072, - "learning_rate": 2.115104708153869e-05, - "loss": 0.084, - "step": 19860 - }, - { - "epoch": 1.4755680974305658, - "grad_norm": 0.8906083106994629, - "learning_rate": 2.1146591415416606e-05, - "loss": 0.078, - "step": 19870 - }, - { - "epoch": 1.4763107084509133, - "grad_norm": 0.7745434045791626, - "learning_rate": 2.1142135749294518e-05, - "loss": 0.0601, - "step": 19880 - }, - { - "epoch": 1.477053319471261, - "grad_norm": 1.4954042434692383, - "learning_rate": 2.1137680083172436e-05, - "loss": 0.0797, - "step": 19890 - }, - { - "epoch": 1.4777959304916086, - "grad_norm": 1.5005775690078735, - "learning_rate": 2.113322441705035e-05, - "loss": 0.0563, - "step": 19900 - }, - { - "epoch": 1.478538541511956, - "grad_norm": 1.535308837890625, - "learning_rate": 2.1128768750928263e-05, - "loss": 0.0555, - "step": 19910 - }, - { - "epoch": 1.4792811525323035, - "grad_norm": 1.6805989742279053, - "learning_rate": 2.1124313084806178e-05, - "loss": 0.0413, - "step": 19920 - }, - { - "epoch": 1.4800237635526512, - "grad_norm": 1.622406244277954, - "learning_rate": 2.1119857418684096e-05, - "loss": 0.0634, - "step": 19930 - }, - { - "epoch": 1.4807663745729986, - "grad_norm": 1.6346774101257324, - "learning_rate": 2.1115401752562008e-05, - "loss": 0.0793, - "step": 19940 - }, - { - "epoch": 1.4815089855933463, - "grad_norm": 2.6986865997314453, - "learning_rate": 2.1110946086439923e-05, - "loss": 0.0689, - "step": 19950 - }, - { - "epoch": 1.4822515966136938, - "grad_norm": 2.2464749813079834, - "learning_rate": 2.1106490420317838e-05, - "loss": 0.0736, - "step": 19960 - }, - { - "epoch": 1.4829942076340412, - "grad_norm": 0.5846890211105347, - "learning_rate": 2.1102034754195753e-05, - "loss": 0.055, - "step": 19970 - }, - { - "epoch": 1.483736818654389, - "grad_norm": 2.5977261066436768, - "learning_rate": 2.1097579088073668e-05, - "loss": 0.0637, - "step": 19980 - }, - { - "epoch": 1.4844794296747363, - "grad_norm": 2.5025405883789062, - "learning_rate": 2.109312342195158e-05, - "loss": 0.0687, - "step": 19990 - }, - { - "epoch": 1.485222040695084, - "grad_norm": 0.5519008040428162, - "learning_rate": 2.1088667755829498e-05, - "loss": 0.0369, - "step": 20000 - }, - { - "epoch": 1.4859646517154315, - "grad_norm": 2.253185272216797, - "learning_rate": 2.1084212089707413e-05, - "loss": 0.0943, - "step": 20010 - }, - { - "epoch": 1.486707262735779, - "grad_norm": 1.709266185760498, - "learning_rate": 2.1079756423585325e-05, - "loss": 0.0721, - "step": 20020 - }, - { - "epoch": 1.4874498737561266, - "grad_norm": 1.548275351524353, - "learning_rate": 2.1075300757463243e-05, - "loss": 0.0819, - "step": 20030 - }, - { - "epoch": 1.488192484776474, - "grad_norm": 0.7985262274742126, - "learning_rate": 2.1070845091341158e-05, - "loss": 0.0646, - "step": 20040 - }, - { - "epoch": 1.4889350957968217, - "grad_norm": 2.2757515907287598, - "learning_rate": 2.106638942521907e-05, - "loss": 0.0766, - "step": 20050 - }, - { - "epoch": 1.4896777068171692, - "grad_norm": 1.4162112474441528, - "learning_rate": 2.1061933759096988e-05, - "loss": 0.0765, - "step": 20060 - }, - { - "epoch": 1.4904203178375166, - "grad_norm": 2.37214994430542, - "learning_rate": 2.10574780929749e-05, - "loss": 0.0794, - "step": 20070 - }, - { - "epoch": 1.4911629288578643, - "grad_norm": 2.4197685718536377, - "learning_rate": 2.1053022426852814e-05, - "loss": 0.1291, - "step": 20080 - }, - { - "epoch": 1.4919055398782117, - "grad_norm": 1.205228328704834, - "learning_rate": 2.104856676073073e-05, - "loss": 0.1016, - "step": 20090 - }, - { - "epoch": 1.4926481508985594, - "grad_norm": 0.3539555072784424, - "learning_rate": 2.1044111094608644e-05, - "loss": 0.0769, - "step": 20100 - }, - { - "epoch": 1.4933907619189069, - "grad_norm": 0.9795430302619934, - "learning_rate": 2.103965542848656e-05, - "loss": 0.0789, - "step": 20110 - }, - { - "epoch": 1.4941333729392543, - "grad_norm": 2.009331703186035, - "learning_rate": 2.1035199762364474e-05, - "loss": 0.0557, - "step": 20120 - }, - { - "epoch": 1.494875983959602, - "grad_norm": 2.3191285133361816, - "learning_rate": 2.103074409624239e-05, - "loss": 0.065, - "step": 20130 - }, - { - "epoch": 1.4956185949799494, - "grad_norm": 2.4035158157348633, - "learning_rate": 2.1026288430120304e-05, - "loss": 0.0668, - "step": 20140 - }, - { - "epoch": 1.496361206000297, - "grad_norm": 1.0998272895812988, - "learning_rate": 2.1021832763998216e-05, - "loss": 0.0536, - "step": 20150 - }, - { - "epoch": 1.4971038170206445, - "grad_norm": 3.933292865753174, - "learning_rate": 2.101737709787613e-05, - "loss": 0.1027, - "step": 20160 - }, - { - "epoch": 1.497846428040992, - "grad_norm": 0.8710070848464966, - "learning_rate": 2.101292143175405e-05, - "loss": 0.0521, - "step": 20170 - }, - { - "epoch": 1.4985890390613397, - "grad_norm": 3.407097816467285, - "learning_rate": 2.100846576563196e-05, - "loss": 0.0823, - "step": 20180 - }, - { - "epoch": 1.4993316500816873, - "grad_norm": 2.3524112701416016, - "learning_rate": 2.1004010099509876e-05, - "loss": 0.1098, - "step": 20190 - }, - { - "epoch": 1.5000742611020348, - "grad_norm": 2.8110392093658447, - "learning_rate": 2.0999554433387794e-05, - "loss": 0.0903, - "step": 20200 - }, - { - "epoch": 1.5008168721223822, - "grad_norm": 2.2008228302001953, - "learning_rate": 2.0995098767265706e-05, - "loss": 0.0546, - "step": 20210 - }, - { - "epoch": 1.5015594831427297, - "grad_norm": 0.38597363233566284, - "learning_rate": 2.099064310114362e-05, - "loss": 0.049, - "step": 20220 - }, - { - "epoch": 1.5023020941630774, - "grad_norm": 1.819575309753418, - "learning_rate": 2.098618743502154e-05, - "loss": 0.0668, - "step": 20230 - }, - { - "epoch": 1.503044705183425, - "grad_norm": 1.7330279350280762, - "learning_rate": 2.098173176889945e-05, - "loss": 0.0746, - "step": 20240 - }, - { - "epoch": 1.5037873162037725, - "grad_norm": 1.4571242332458496, - "learning_rate": 2.0977276102777366e-05, - "loss": 0.0804, - "step": 20250 - }, - { - "epoch": 1.50452992722412, - "grad_norm": 0.8865562677383423, - "learning_rate": 2.0972820436655278e-05, - "loss": 0.08, - "step": 20260 - }, - { - "epoch": 1.5052725382444674, - "grad_norm": 1.0869084596633911, - "learning_rate": 2.0968364770533196e-05, - "loss": 0.0526, - "step": 20270 - }, - { - "epoch": 1.506015149264815, - "grad_norm": 1.397093653678894, - "learning_rate": 2.096390910441111e-05, - "loss": 0.0899, - "step": 20280 - }, - { - "epoch": 1.5067577602851627, - "grad_norm": 1.4997838735580444, - "learning_rate": 2.0959453438289023e-05, - "loss": 0.0929, - "step": 20290 - }, - { - "epoch": 1.5075003713055102, - "grad_norm": 2.05659556388855, - "learning_rate": 2.095499777216694e-05, - "loss": 0.0715, - "step": 20300 - }, - { - "epoch": 1.5082429823258576, - "grad_norm": 2.342632293701172, - "learning_rate": 2.0950542106044856e-05, - "loss": 0.0825, - "step": 20310 - }, - { - "epoch": 1.5089855933462053, - "grad_norm": 2.975311040878296, - "learning_rate": 2.0946086439922768e-05, - "loss": 0.0916, - "step": 20320 - }, - { - "epoch": 1.5097282043665528, - "grad_norm": 2.030165672302246, - "learning_rate": 2.0941630773800683e-05, - "loss": 0.0953, - "step": 20330 - }, - { - "epoch": 1.5104708153869004, - "grad_norm": 1.042168378829956, - "learning_rate": 2.09371751076786e-05, - "loss": 0.0659, - "step": 20340 - }, - { - "epoch": 1.5112134264072479, - "grad_norm": 1.342262864112854, - "learning_rate": 2.0932719441556513e-05, - "loss": 0.0727, - "step": 20350 - }, - { - "epoch": 1.5119560374275953, - "grad_norm": 1.2941288948059082, - "learning_rate": 2.0928263775434428e-05, - "loss": 0.0511, - "step": 20360 - }, - { - "epoch": 1.512698648447943, - "grad_norm": 3.6500983238220215, - "learning_rate": 2.0923808109312343e-05, - "loss": 0.1324, - "step": 20370 - }, - { - "epoch": 1.5134412594682907, - "grad_norm": 1.0113955736160278, - "learning_rate": 2.0919352443190258e-05, - "loss": 0.0696, - "step": 20380 - }, - { - "epoch": 1.5141838704886381, - "grad_norm": 1.395719289779663, - "learning_rate": 2.0914896777068173e-05, - "loss": 0.0683, - "step": 20390 - }, - { - "epoch": 1.5149264815089856, - "grad_norm": 1.7119672298431396, - "learning_rate": 2.0910441110946084e-05, - "loss": 0.062, - "step": 20400 - }, - { - "epoch": 1.515669092529333, - "grad_norm": 1.9944767951965332, - "learning_rate": 2.0905985444824003e-05, - "loss": 0.0649, - "step": 20410 - }, - { - "epoch": 1.5164117035496807, - "grad_norm": 1.1254253387451172, - "learning_rate": 2.0901529778701917e-05, - "loss": 0.0545, - "step": 20420 - }, - { - "epoch": 1.5171543145700284, - "grad_norm": 2.864976406097412, - "learning_rate": 2.089707411257983e-05, - "loss": 0.0984, - "step": 20430 - }, - { - "epoch": 1.5178969255903758, - "grad_norm": 2.1159725189208984, - "learning_rate": 2.0892618446457747e-05, - "loss": 0.06, - "step": 20440 - }, - { - "epoch": 1.5186395366107233, - "grad_norm": 0.9338457584381104, - "learning_rate": 2.0888162780335662e-05, - "loss": 0.0659, - "step": 20450 - }, - { - "epoch": 1.5193821476310707, - "grad_norm": 1.4100627899169922, - "learning_rate": 2.0883707114213574e-05, - "loss": 0.0716, - "step": 20460 - }, - { - "epoch": 1.5201247586514184, - "grad_norm": 1.341646432876587, - "learning_rate": 2.0879251448091492e-05, - "loss": 0.0737, - "step": 20470 - }, - { - "epoch": 1.520867369671766, - "grad_norm": 3.197697401046753, - "learning_rate": 2.0874795781969404e-05, - "loss": 0.0897, - "step": 20480 - }, - { - "epoch": 1.5216099806921135, - "grad_norm": 0.8237298727035522, - "learning_rate": 2.087034011584732e-05, - "loss": 0.0553, - "step": 20490 - }, - { - "epoch": 1.522352591712461, - "grad_norm": 1.78400456905365, - "learning_rate": 2.0865884449725234e-05, - "loss": 0.0745, - "step": 20500 - }, - { - "epoch": 1.5230952027328084, - "grad_norm": 2.6086509227752686, - "learning_rate": 2.086142878360315e-05, - "loss": 0.1298, - "step": 20510 - }, - { - "epoch": 1.523837813753156, - "grad_norm": 1.0803875923156738, - "learning_rate": 2.0856973117481064e-05, - "loss": 0.0673, - "step": 20520 - }, - { - "epoch": 1.5245804247735038, - "grad_norm": 0.390265554189682, - "learning_rate": 2.085251745135898e-05, - "loss": 0.0434, - "step": 20530 - }, - { - "epoch": 1.5253230357938512, - "grad_norm": 1.218558669090271, - "learning_rate": 2.0848061785236894e-05, - "loss": 0.0666, - "step": 20540 - }, - { - "epoch": 1.5260656468141987, - "grad_norm": 1.0969456434249878, - "learning_rate": 2.084360611911481e-05, - "loss": 0.095, - "step": 20550 - }, - { - "epoch": 1.526808257834546, - "grad_norm": 1.8818721771240234, - "learning_rate": 2.0839150452992724e-05, - "loss": 0.0808, - "step": 20560 - }, - { - "epoch": 1.5275508688548938, - "grad_norm": 2.201045036315918, - "learning_rate": 2.0834694786870636e-05, - "loss": 0.1017, - "step": 20570 - }, - { - "epoch": 1.5282934798752414, - "grad_norm": 1.147294521331787, - "learning_rate": 2.0830239120748554e-05, - "loss": 0.0744, - "step": 20580 - }, - { - "epoch": 1.529036090895589, - "grad_norm": 0.8210429549217224, - "learning_rate": 2.0825783454626466e-05, - "loss": 0.0528, - "step": 20590 - }, - { - "epoch": 1.5297787019159363, - "grad_norm": 3.2045974731445312, - "learning_rate": 2.082132778850438e-05, - "loss": 0.0918, - "step": 20600 - }, - { - "epoch": 1.530521312936284, - "grad_norm": 3.5814733505249023, - "learning_rate": 2.08168721223823e-05, - "loss": 0.0625, - "step": 20610 - }, - { - "epoch": 1.5312639239566315, - "grad_norm": 0.8140150308609009, - "learning_rate": 2.081241645626021e-05, - "loss": 0.1008, - "step": 20620 - }, - { - "epoch": 1.5320065349769791, - "grad_norm": 2.3892927169799805, - "learning_rate": 2.0807960790138126e-05, - "loss": 0.0747, - "step": 20630 - }, - { - "epoch": 1.5327491459973266, - "grad_norm": 1.1212306022644043, - "learning_rate": 2.0803505124016044e-05, - "loss": 0.0827, - "step": 20640 - }, - { - "epoch": 1.533491757017674, - "grad_norm": 1.2150450944900513, - "learning_rate": 2.0799049457893956e-05, - "loss": 0.056, - "step": 20650 - }, - { - "epoch": 1.5342343680380217, - "grad_norm": 2.157820463180542, - "learning_rate": 2.079459379177187e-05, - "loss": 0.0952, - "step": 20660 - }, - { - "epoch": 1.5349769790583694, - "grad_norm": 2.5032124519348145, - "learning_rate": 2.0790138125649782e-05, - "loss": 0.0591, - "step": 20670 - }, - { - "epoch": 1.5357195900787168, - "grad_norm": 1.2760533094406128, - "learning_rate": 2.07856824595277e-05, - "loss": 0.0746, - "step": 20680 - }, - { - "epoch": 1.5364622010990643, - "grad_norm": 3.50283145904541, - "learning_rate": 2.0781226793405616e-05, - "loss": 0.0743, - "step": 20690 - }, - { - "epoch": 1.5372048121194117, - "grad_norm": 2.669391393661499, - "learning_rate": 2.0776771127283527e-05, - "loss": 0.0721, - "step": 20700 - }, - { - "epoch": 1.5379474231397594, - "grad_norm": 3.1741724014282227, - "learning_rate": 2.0772315461161446e-05, - "loss": 0.0815, - "step": 20710 - }, - { - "epoch": 1.538690034160107, - "grad_norm": 2.0243866443634033, - "learning_rate": 2.076785979503936e-05, - "loss": 0.0678, - "step": 20720 - }, - { - "epoch": 1.5394326451804545, - "grad_norm": 3.2684175968170166, - "learning_rate": 2.0763404128917272e-05, - "loss": 0.0838, - "step": 20730 - }, - { - "epoch": 1.540175256200802, - "grad_norm": 1.0323513746261597, - "learning_rate": 2.0758948462795187e-05, - "loss": 0.0892, - "step": 20740 - }, - { - "epoch": 1.5409178672211494, - "grad_norm": 0.852678656578064, - "learning_rate": 2.0754492796673106e-05, - "loss": 0.0938, - "step": 20750 - }, - { - "epoch": 1.541660478241497, - "grad_norm": 1.296675205230713, - "learning_rate": 2.0750037130551017e-05, - "loss": 0.0761, - "step": 20760 - }, - { - "epoch": 1.5424030892618448, - "grad_norm": 0.6336604952812195, - "learning_rate": 2.0745581464428932e-05, - "loss": 0.0521, - "step": 20770 - }, - { - "epoch": 1.5431457002821922, - "grad_norm": 0.9696953296661377, - "learning_rate": 2.0741125798306847e-05, - "loss": 0.0773, - "step": 20780 - }, - { - "epoch": 1.5438883113025397, - "grad_norm": 2.513984203338623, - "learning_rate": 2.0736670132184762e-05, - "loss": 0.0938, - "step": 20790 - }, - { - "epoch": 1.5446309223228871, - "grad_norm": 1.3726938962936401, - "learning_rate": 2.0732214466062677e-05, - "loss": 0.0799, - "step": 20800 - }, - { - "epoch": 1.5453735333432348, - "grad_norm": 1.0419371128082275, - "learning_rate": 2.0727758799940592e-05, - "loss": 0.0765, - "step": 20810 - }, - { - "epoch": 1.5461161443635825, - "grad_norm": 4.229785442352295, - "learning_rate": 2.0723303133818507e-05, - "loss": 0.0761, - "step": 20820 - }, - { - "epoch": 1.54685875538393, - "grad_norm": 3.3588624000549316, - "learning_rate": 2.0718847467696422e-05, - "loss": 0.0683, - "step": 20830 - }, - { - "epoch": 1.5476013664042774, - "grad_norm": 1.541447401046753, - "learning_rate": 2.0714391801574334e-05, - "loss": 0.0785, - "step": 20840 - }, - { - "epoch": 1.5483439774246248, - "grad_norm": 1.4682326316833496, - "learning_rate": 2.0709936135452252e-05, - "loss": 0.056, - "step": 20850 - }, - { - "epoch": 1.5490865884449725, - "grad_norm": 1.9115557670593262, - "learning_rate": 2.0705480469330167e-05, - "loss": 0.0658, - "step": 20860 - }, - { - "epoch": 1.5498291994653202, - "grad_norm": 2.6742310523986816, - "learning_rate": 2.070102480320808e-05, - "loss": 0.0542, - "step": 20870 - }, - { - "epoch": 1.5505718104856676, - "grad_norm": 1.7176462411880493, - "learning_rate": 2.0696569137085997e-05, - "loss": 0.0604, - "step": 20880 - }, - { - "epoch": 1.551314421506015, - "grad_norm": 2.445446252822876, - "learning_rate": 2.069211347096391e-05, - "loss": 0.1063, - "step": 20890 - }, - { - "epoch": 1.5520570325263627, - "grad_norm": 2.274242401123047, - "learning_rate": 2.0687657804841824e-05, - "loss": 0.0842, - "step": 20900 - }, - { - "epoch": 1.5527996435467102, - "grad_norm": 0.9645692110061646, - "learning_rate": 2.068320213871974e-05, - "loss": 0.0487, - "step": 20910 - }, - { - "epoch": 1.5535422545670579, - "grad_norm": 2.782325506210327, - "learning_rate": 2.0678746472597654e-05, - "loss": 0.0783, - "step": 20920 - }, - { - "epoch": 1.5542848655874053, - "grad_norm": 2.1064939498901367, - "learning_rate": 2.067429080647557e-05, - "loss": 0.0555, - "step": 20930 - }, - { - "epoch": 1.5550274766077528, - "grad_norm": 2.168714761734009, - "learning_rate": 2.0669835140353484e-05, - "loss": 0.0864, - "step": 20940 - }, - { - "epoch": 1.5557700876281004, - "grad_norm": 1.001583456993103, - "learning_rate": 2.06653794742314e-05, - "loss": 0.0761, - "step": 20950 - }, - { - "epoch": 1.556512698648448, - "grad_norm": 1.3306684494018555, - "learning_rate": 2.0660923808109314e-05, - "loss": 0.0596, - "step": 20960 - }, - { - "epoch": 1.5572553096687956, - "grad_norm": 0.9188536405563354, - "learning_rate": 2.065646814198723e-05, - "loss": 0.0653, - "step": 20970 - }, - { - "epoch": 1.557997920689143, - "grad_norm": 1.159742832183838, - "learning_rate": 2.065201247586514e-05, - "loss": 0.0942, - "step": 20980 - }, - { - "epoch": 1.5587405317094905, - "grad_norm": 3.285846471786499, - "learning_rate": 2.064755680974306e-05, - "loss": 0.0673, - "step": 20990 - }, - { - "epoch": 1.5594831427298381, - "grad_norm": 1.9212596416473389, - "learning_rate": 2.064310114362097e-05, - "loss": 0.0477, - "step": 21000 - }, - { - "epoch": 1.5602257537501858, - "grad_norm": 1.4942169189453125, - "learning_rate": 2.0638645477498885e-05, - "loss": 0.0735, - "step": 21010 - }, - { - "epoch": 1.5609683647705332, - "grad_norm": 2.1586103439331055, - "learning_rate": 2.0634189811376804e-05, - "loss": 0.0594, - "step": 21020 - }, - { - "epoch": 1.5617109757908807, - "grad_norm": 0.46512606739997864, - "learning_rate": 2.0629734145254715e-05, - "loss": 0.0662, - "step": 21030 - }, - { - "epoch": 1.5624535868112281, - "grad_norm": 1.9914196729660034, - "learning_rate": 2.062527847913263e-05, - "loss": 0.0415, - "step": 21040 - }, - { - "epoch": 1.5631961978315758, - "grad_norm": 0.8854469060897827, - "learning_rate": 2.062082281301055e-05, - "loss": 0.0776, - "step": 21050 - }, - { - "epoch": 1.5639388088519235, - "grad_norm": 0.9890866875648499, - "learning_rate": 2.061636714688846e-05, - "loss": 0.0666, - "step": 21060 - }, - { - "epoch": 1.564681419872271, - "grad_norm": 1.0312457084655762, - "learning_rate": 2.0611911480766375e-05, - "loss": 0.0777, - "step": 21070 - }, - { - "epoch": 1.5654240308926184, - "grad_norm": 1.1136667728424072, - "learning_rate": 2.0607455814644287e-05, - "loss": 0.0548, - "step": 21080 - }, - { - "epoch": 1.5661666419129658, - "grad_norm": 1.6554890871047974, - "learning_rate": 2.0603000148522205e-05, - "loss": 0.0659, - "step": 21090 - }, - { - "epoch": 1.5669092529333135, - "grad_norm": 1.3619014024734497, - "learning_rate": 2.059854448240012e-05, - "loss": 0.0773, - "step": 21100 - }, - { - "epoch": 1.5676518639536612, - "grad_norm": 2.3610949516296387, - "learning_rate": 2.0594088816278032e-05, - "loss": 0.0513, - "step": 21110 - }, - { - "epoch": 1.5683944749740086, - "grad_norm": 1.5495983362197876, - "learning_rate": 2.058963315015595e-05, - "loss": 0.0767, - "step": 21120 - }, - { - "epoch": 1.569137085994356, - "grad_norm": 0.9158768653869629, - "learning_rate": 2.0585177484033865e-05, - "loss": 0.0831, - "step": 21130 - }, - { - "epoch": 1.5698796970147035, - "grad_norm": 0.8416073322296143, - "learning_rate": 2.0580721817911777e-05, - "loss": 0.0824, - "step": 21140 - }, - { - "epoch": 1.5706223080350512, - "grad_norm": 4.620311737060547, - "learning_rate": 2.0576266151789692e-05, - "loss": 0.0638, - "step": 21150 - }, - { - "epoch": 1.5713649190553989, - "grad_norm": 1.3405332565307617, - "learning_rate": 2.057181048566761e-05, - "loss": 0.0829, - "step": 21160 - }, - { - "epoch": 1.5721075300757463, - "grad_norm": 1.7006781101226807, - "learning_rate": 2.0567354819545522e-05, - "loss": 0.0619, - "step": 21170 - }, - { - "epoch": 1.5728501410960938, - "grad_norm": 2.3096494674682617, - "learning_rate": 2.0562899153423437e-05, - "loss": 0.069, - "step": 21180 - }, - { - "epoch": 1.5735927521164415, - "grad_norm": 1.4846018552780151, - "learning_rate": 2.0558443487301352e-05, - "loss": 0.0623, - "step": 21190 - }, - { - "epoch": 1.574335363136789, - "grad_norm": 1.648088812828064, - "learning_rate": 2.0553987821179267e-05, - "loss": 0.0544, - "step": 21200 - }, - { - "epoch": 1.5750779741571366, - "grad_norm": 1.0111830234527588, - "learning_rate": 2.0549532155057182e-05, - "loss": 0.0544, - "step": 21210 - }, - { - "epoch": 1.575820585177484, - "grad_norm": 3.3681936264038086, - "learning_rate": 2.0545076488935097e-05, - "loss": 0.0648, - "step": 21220 - }, - { - "epoch": 1.5765631961978315, - "grad_norm": 2.3970768451690674, - "learning_rate": 2.0540620822813012e-05, - "loss": 0.0545, - "step": 21230 - }, - { - "epoch": 1.5773058072181791, - "grad_norm": 3.242074489593506, - "learning_rate": 2.0536165156690927e-05, - "loss": 0.0729, - "step": 21240 - }, - { - "epoch": 1.5780484182385268, - "grad_norm": 0.41246843338012695, - "learning_rate": 2.0531709490568838e-05, - "loss": 0.0549, - "step": 21250 - }, - { - "epoch": 1.5787910292588743, - "grad_norm": 1.8131650686264038, - "learning_rate": 2.0527253824446757e-05, - "loss": 0.0633, - "step": 21260 - }, - { - "epoch": 1.5795336402792217, - "grad_norm": 1.4795677661895752, - "learning_rate": 2.052279815832467e-05, - "loss": 0.0668, - "step": 21270 - }, - { - "epoch": 1.5802762512995692, - "grad_norm": 0.927021324634552, - "learning_rate": 2.0518342492202583e-05, - "loss": 0.0673, - "step": 21280 - }, - { - "epoch": 1.5810188623199168, - "grad_norm": 3.456859827041626, - "learning_rate": 2.05138868260805e-05, - "loss": 0.0764, - "step": 21290 - }, - { - "epoch": 1.5817614733402645, - "grad_norm": 2.829115390777588, - "learning_rate": 2.0509431159958413e-05, - "loss": 0.0865, - "step": 21300 - }, - { - "epoch": 1.582504084360612, - "grad_norm": 3.3823864459991455, - "learning_rate": 2.0504975493836328e-05, - "loss": 0.0578, - "step": 21310 - }, - { - "epoch": 1.5832466953809594, - "grad_norm": 1.5947513580322266, - "learning_rate": 2.0500519827714243e-05, - "loss": 0.0997, - "step": 21320 - }, - { - "epoch": 1.5839893064013069, - "grad_norm": 2.1573078632354736, - "learning_rate": 2.0496064161592158e-05, - "loss": 0.0788, - "step": 21330 - }, - { - "epoch": 1.5847319174216545, - "grad_norm": 0.3694283962249756, - "learning_rate": 2.0491608495470073e-05, - "loss": 0.0275, - "step": 21340 - }, - { - "epoch": 1.5854745284420022, - "grad_norm": 1.0279000997543335, - "learning_rate": 2.0487152829347988e-05, - "loss": 0.0967, - "step": 21350 - }, - { - "epoch": 1.5862171394623497, - "grad_norm": 1.637056827545166, - "learning_rate": 2.0482697163225903e-05, - "loss": 0.0605, - "step": 21360 - }, - { - "epoch": 1.586959750482697, - "grad_norm": 1.0048965215682983, - "learning_rate": 2.0478241497103818e-05, - "loss": 0.0656, - "step": 21370 - }, - { - "epoch": 1.5877023615030446, - "grad_norm": 1.906299114227295, - "learning_rate": 2.0473785830981733e-05, - "loss": 0.0811, - "step": 21380 - }, - { - "epoch": 1.5884449725233922, - "grad_norm": 3.761151075363159, - "learning_rate": 2.0469330164859645e-05, - "loss": 0.0829, - "step": 21390 - }, - { - "epoch": 1.58918758354374, - "grad_norm": 3.385910749435425, - "learning_rate": 2.0464874498737563e-05, - "loss": 0.0476, - "step": 21400 - }, - { - "epoch": 1.5899301945640874, - "grad_norm": 1.3071726560592651, - "learning_rate": 2.0460418832615475e-05, - "loss": 0.0931, - "step": 21410 - }, - { - "epoch": 1.5906728055844348, - "grad_norm": 1.4268453121185303, - "learning_rate": 2.045596316649339e-05, - "loss": 0.1017, - "step": 21420 - }, - { - "epoch": 1.5914154166047823, - "grad_norm": 0.96445232629776, - "learning_rate": 2.0451507500371308e-05, - "loss": 0.0711, - "step": 21430 - }, - { - "epoch": 1.59215802762513, - "grad_norm": 1.8206923007965088, - "learning_rate": 2.044705183424922e-05, - "loss": 0.0682, - "step": 21440 - }, - { - "epoch": 1.5929006386454776, - "grad_norm": 1.7625178098678589, - "learning_rate": 2.0442596168127135e-05, - "loss": 0.0502, - "step": 21450 - }, - { - "epoch": 1.593643249665825, - "grad_norm": 1.0990653038024902, - "learning_rate": 2.0438140502005053e-05, - "loss": 0.0705, - "step": 21460 - }, - { - "epoch": 1.5943858606861725, - "grad_norm": 0.8799698352813721, - "learning_rate": 2.0433684835882965e-05, - "loss": 0.0513, - "step": 21470 - }, - { - "epoch": 1.5951284717065202, - "grad_norm": 1.2714344263076782, - "learning_rate": 2.042922916976088e-05, - "loss": 0.0877, - "step": 21480 - }, - { - "epoch": 1.5958710827268676, - "grad_norm": 0.9506982564926147, - "learning_rate": 2.0424773503638795e-05, - "loss": 0.1113, - "step": 21490 - }, - { - "epoch": 1.5966136937472153, - "grad_norm": 1.751642107963562, - "learning_rate": 2.042031783751671e-05, - "loss": 0.08, - "step": 21500 - }, - { - "epoch": 1.5973563047675627, - "grad_norm": 3.1982038021087646, - "learning_rate": 2.0415862171394625e-05, - "loss": 0.074, - "step": 21510 - }, - { - "epoch": 1.5980989157879102, - "grad_norm": 1.0957239866256714, - "learning_rate": 2.0411406505272536e-05, - "loss": 0.059, - "step": 21520 - }, - { - "epoch": 1.5988415268082579, - "grad_norm": 0.893408477306366, - "learning_rate": 2.0406950839150455e-05, - "loss": 0.0789, - "step": 21530 - }, - { - "epoch": 1.5995841378286055, - "grad_norm": 1.416986346244812, - "learning_rate": 2.040249517302837e-05, - "loss": 0.0941, - "step": 21540 - }, - { - "epoch": 1.600326748848953, - "grad_norm": 0.7409796118736267, - "learning_rate": 2.039803950690628e-05, - "loss": 0.0687, - "step": 21550 - }, - { - "epoch": 1.6010693598693004, - "grad_norm": 1.517598271369934, - "learning_rate": 2.0393583840784196e-05, - "loss": 0.0638, - "step": 21560 - }, - { - "epoch": 1.6018119708896479, - "grad_norm": 1.0811076164245605, - "learning_rate": 2.0389128174662115e-05, - "loss": 0.0784, - "step": 21570 - }, - { - "epoch": 1.6025545819099956, - "grad_norm": 3.014960765838623, - "learning_rate": 2.0384672508540026e-05, - "loss": 0.0529, - "step": 21580 - }, - { - "epoch": 1.6032971929303432, - "grad_norm": 4.6855669021606445, - "learning_rate": 2.038021684241794e-05, - "loss": 0.0614, - "step": 21590 - }, - { - "epoch": 1.6040398039506907, - "grad_norm": 2.0930423736572266, - "learning_rate": 2.0375761176295856e-05, - "loss": 0.0794, - "step": 21600 - }, - { - "epoch": 1.6047824149710381, - "grad_norm": 2.3684639930725098, - "learning_rate": 2.037130551017377e-05, - "loss": 0.0806, - "step": 21610 - }, - { - "epoch": 1.6055250259913856, - "grad_norm": 2.803929090499878, - "learning_rate": 2.0366849844051686e-05, - "loss": 0.1052, - "step": 21620 - }, - { - "epoch": 1.6062676370117333, - "grad_norm": 1.4284909963607788, - "learning_rate": 2.03623941779296e-05, - "loss": 0.078, - "step": 21630 - }, - { - "epoch": 1.607010248032081, - "grad_norm": 1.5730488300323486, - "learning_rate": 2.0357938511807516e-05, - "loss": 0.0623, - "step": 21640 - }, - { - "epoch": 1.6077528590524284, - "grad_norm": 1.4216201305389404, - "learning_rate": 2.035348284568543e-05, - "loss": 0.0399, - "step": 21650 - }, - { - "epoch": 1.6084954700727758, - "grad_norm": 1.4788250923156738, - "learning_rate": 2.0349027179563343e-05, - "loss": 0.0619, - "step": 21660 - }, - { - "epoch": 1.6092380810931233, - "grad_norm": 1.5001707077026367, - "learning_rate": 2.034457151344126e-05, - "loss": 0.0683, - "step": 21670 - }, - { - "epoch": 1.609980692113471, - "grad_norm": 2.592287063598633, - "learning_rate": 2.0340115847319176e-05, - "loss": 0.0971, - "step": 21680 - }, - { - "epoch": 1.6107233031338186, - "grad_norm": 2.6354775428771973, - "learning_rate": 2.0335660181197088e-05, - "loss": 0.0832, - "step": 21690 - }, - { - "epoch": 1.611465914154166, - "grad_norm": 0.5848486423492432, - "learning_rate": 2.0331204515075006e-05, - "loss": 0.066, - "step": 21700 - }, - { - "epoch": 1.6122085251745135, - "grad_norm": 2.4620141983032227, - "learning_rate": 2.0326748848952918e-05, - "loss": 0.0688, - "step": 21710 - }, - { - "epoch": 1.612951136194861, - "grad_norm": 0.8085265755653381, - "learning_rate": 2.0322293182830833e-05, - "loss": 0.0639, - "step": 21720 - }, - { - "epoch": 1.6136937472152086, - "grad_norm": 2.16105580329895, - "learning_rate": 2.0317837516708748e-05, - "loss": 0.111, - "step": 21730 - }, - { - "epoch": 1.6144363582355563, - "grad_norm": 2.140782356262207, - "learning_rate": 2.0313381850586663e-05, - "loss": 0.0902, - "step": 21740 - }, - { - "epoch": 1.6151789692559038, - "grad_norm": 1.3173938989639282, - "learning_rate": 2.0308926184464578e-05, - "loss": 0.0528, - "step": 21750 - }, - { - "epoch": 1.6159215802762512, - "grad_norm": 1.9319645166397095, - "learning_rate": 2.0304470518342493e-05, - "loss": 0.061, - "step": 21760 - }, - { - "epoch": 1.6166641912965989, - "grad_norm": 1.2936400175094604, - "learning_rate": 2.0300014852220408e-05, - "loss": 0.0601, - "step": 21770 - }, - { - "epoch": 1.6174068023169463, - "grad_norm": 0.23843184113502502, - "learning_rate": 2.0295559186098323e-05, - "loss": 0.0479, - "step": 21780 - }, - { - "epoch": 1.618149413337294, - "grad_norm": 5.628422260284424, - "learning_rate": 2.0291103519976238e-05, - "loss": 0.0611, - "step": 21790 - }, - { - "epoch": 1.6188920243576415, - "grad_norm": 1.189815640449524, - "learning_rate": 2.028664785385415e-05, - "loss": 0.0843, - "step": 21800 - }, - { - "epoch": 1.619634635377989, - "grad_norm": 0.9391959309577942, - "learning_rate": 2.0282192187732068e-05, - "loss": 0.0786, - "step": 21810 - }, - { - "epoch": 1.6203772463983366, - "grad_norm": 2.154853582382202, - "learning_rate": 2.027773652160998e-05, - "loss": 0.0816, - "step": 21820 - }, - { - "epoch": 1.6211198574186843, - "grad_norm": 2.5236504077911377, - "learning_rate": 2.0273280855487894e-05, - "loss": 0.0779, - "step": 21830 - }, - { - "epoch": 1.6218624684390317, - "grad_norm": 1.4820054769515991, - "learning_rate": 2.0268825189365813e-05, - "loss": 0.071, - "step": 21840 - }, - { - "epoch": 1.6226050794593792, - "grad_norm": 0.6480633616447449, - "learning_rate": 2.0264369523243724e-05, - "loss": 0.0475, - "step": 21850 - }, - { - "epoch": 1.6233476904797266, - "grad_norm": 2.3438937664031982, - "learning_rate": 2.025991385712164e-05, - "loss": 0.1059, - "step": 21860 - }, - { - "epoch": 1.6240903015000743, - "grad_norm": 3.0132994651794434, - "learning_rate": 2.0255458190999558e-05, - "loss": 0.065, - "step": 21870 - }, - { - "epoch": 1.624832912520422, - "grad_norm": 3.362128496170044, - "learning_rate": 2.025100252487747e-05, - "loss": 0.0946, - "step": 21880 - }, - { - "epoch": 1.6255755235407694, - "grad_norm": 2.070509672164917, - "learning_rate": 2.0246546858755384e-05, - "loss": 0.0744, - "step": 21890 - }, - { - "epoch": 1.6263181345611168, - "grad_norm": 2.716153383255005, - "learning_rate": 2.02420911926333e-05, - "loss": 0.0517, - "step": 21900 - }, - { - "epoch": 1.6270607455814643, - "grad_norm": 0.9877446293830872, - "learning_rate": 2.0237635526511214e-05, - "loss": 0.0744, - "step": 21910 - }, - { - "epoch": 1.627803356601812, - "grad_norm": 0.48732122778892517, - "learning_rate": 2.023317986038913e-05, - "loss": 0.0395, - "step": 21920 - }, - { - "epoch": 1.6285459676221596, - "grad_norm": 2.898503541946411, - "learning_rate": 2.022872419426704e-05, - "loss": 0.0776, - "step": 21930 - }, - { - "epoch": 1.629288578642507, - "grad_norm": 1.4797714948654175, - "learning_rate": 2.022426852814496e-05, - "loss": 0.0721, - "step": 21940 - }, - { - "epoch": 1.6300311896628545, - "grad_norm": 2.628574848175049, - "learning_rate": 2.0219812862022874e-05, - "loss": 0.0751, - "step": 21950 - }, - { - "epoch": 1.630773800683202, - "grad_norm": 0.8774972558021545, - "learning_rate": 2.0215357195900786e-05, - "loss": 0.0778, - "step": 21960 - }, - { - "epoch": 1.6315164117035497, - "grad_norm": 3.261282205581665, - "learning_rate": 2.02109015297787e-05, - "loss": 0.0896, - "step": 21970 - }, - { - "epoch": 1.6322590227238973, - "grad_norm": 1.9803194999694824, - "learning_rate": 2.020644586365662e-05, - "loss": 0.0978, - "step": 21980 - }, - { - "epoch": 1.6330016337442448, - "grad_norm": 2.566403865814209, - "learning_rate": 2.020199019753453e-05, - "loss": 0.0794, - "step": 21990 - }, - { - "epoch": 1.6337442447645922, - "grad_norm": 1.7836480140686035, - "learning_rate": 2.0197534531412446e-05, - "loss": 0.0688, - "step": 22000 - }, - { - "epoch": 1.6344868557849397, - "grad_norm": 1.82455575466156, - "learning_rate": 2.0193078865290364e-05, - "loss": 0.0448, - "step": 22010 - }, - { - "epoch": 1.6352294668052874, - "grad_norm": 2.849226713180542, - "learning_rate": 2.0188623199168276e-05, - "loss": 0.0601, - "step": 22020 - }, - { - "epoch": 1.635972077825635, - "grad_norm": 2.3754022121429443, - "learning_rate": 2.018416753304619e-05, - "loss": 0.0588, - "step": 22030 - }, - { - "epoch": 1.6367146888459825, - "grad_norm": 3.1693973541259766, - "learning_rate": 2.0179711866924106e-05, - "loss": 0.0812, - "step": 22040 - }, - { - "epoch": 1.63745729986633, - "grad_norm": 1.5568816661834717, - "learning_rate": 2.017525620080202e-05, - "loss": 0.0859, - "step": 22050 - }, - { - "epoch": 1.6381999108866776, - "grad_norm": 1.8013701438903809, - "learning_rate": 2.0170800534679936e-05, - "loss": 0.1001, - "step": 22060 - }, - { - "epoch": 1.638942521907025, - "grad_norm": 1.7124766111373901, - "learning_rate": 2.0166344868557847e-05, - "loss": 0.0779, - "step": 22070 - }, - { - "epoch": 1.6396851329273727, - "grad_norm": 3.0362048149108887, - "learning_rate": 2.0161889202435766e-05, - "loss": 0.0592, - "step": 22080 - }, - { - "epoch": 1.6404277439477202, - "grad_norm": 3.313502788543701, - "learning_rate": 2.015743353631368e-05, - "loss": 0.0933, - "step": 22090 - }, - { - "epoch": 1.6411703549680676, - "grad_norm": 1.0095113515853882, - "learning_rate": 2.0152977870191592e-05, - "loss": 0.0747, - "step": 22100 - }, - { - "epoch": 1.6419129659884153, - "grad_norm": 1.5468275547027588, - "learning_rate": 2.014852220406951e-05, - "loss": 0.0409, - "step": 22110 - }, - { - "epoch": 1.642655577008763, - "grad_norm": 4.015323162078857, - "learning_rate": 2.0144066537947422e-05, - "loss": 0.0768, - "step": 22120 - }, - { - "epoch": 1.6433981880291104, - "grad_norm": 2.2448365688323975, - "learning_rate": 2.0139610871825337e-05, - "loss": 0.059, - "step": 22130 - }, - { - "epoch": 1.6441407990494579, - "grad_norm": 1.0703582763671875, - "learning_rate": 2.0135155205703252e-05, - "loss": 0.0744, - "step": 22140 - }, - { - "epoch": 1.6448834100698053, - "grad_norm": 1.5378329753875732, - "learning_rate": 2.0130699539581167e-05, - "loss": 0.0652, - "step": 22150 - }, - { - "epoch": 1.645626021090153, - "grad_norm": 3.0120882987976074, - "learning_rate": 2.0126243873459082e-05, - "loss": 0.0799, - "step": 22160 - }, - { - "epoch": 1.6463686321105007, - "grad_norm": 2.719409227371216, - "learning_rate": 2.0121788207336997e-05, - "loss": 0.0785, - "step": 22170 - }, - { - "epoch": 1.6471112431308481, - "grad_norm": 2.3582966327667236, - "learning_rate": 2.0117332541214912e-05, - "loss": 0.0735, - "step": 22180 - }, - { - "epoch": 1.6478538541511956, - "grad_norm": 1.0670444965362549, - "learning_rate": 2.0112876875092827e-05, - "loss": 0.0701, - "step": 22190 - }, - { - "epoch": 1.648596465171543, - "grad_norm": 2.2394518852233887, - "learning_rate": 2.0108421208970742e-05, - "loss": 0.07, - "step": 22200 - }, - { - "epoch": 1.6493390761918907, - "grad_norm": 1.6190416812896729, - "learning_rate": 2.0103965542848657e-05, - "loss": 0.0538, - "step": 22210 - }, - { - "epoch": 1.6500816872122384, - "grad_norm": 3.418266773223877, - "learning_rate": 2.0099509876726572e-05, - "loss": 0.078, - "step": 22220 - }, - { - "epoch": 1.6508242982325858, - "grad_norm": 1.8200223445892334, - "learning_rate": 2.0095054210604484e-05, - "loss": 0.0464, - "step": 22230 - }, - { - "epoch": 1.6515669092529333, - "grad_norm": 1.1762034893035889, - "learning_rate": 2.00905985444824e-05, - "loss": 0.1071, - "step": 22240 - }, - { - "epoch": 1.6523095202732807, - "grad_norm": 2.370851755142212, - "learning_rate": 2.0086142878360317e-05, - "loss": 0.072, - "step": 22250 - }, - { - "epoch": 1.6530521312936284, - "grad_norm": 1.7569416761398315, - "learning_rate": 2.008168721223823e-05, - "loss": 0.0923, - "step": 22260 - }, - { - "epoch": 1.653794742313976, - "grad_norm": 1.144127368927002, - "learning_rate": 2.0077231546116144e-05, - "loss": 0.0771, - "step": 22270 - }, - { - "epoch": 1.6545373533343235, - "grad_norm": 2.696286201477051, - "learning_rate": 2.0072775879994062e-05, - "loss": 0.0989, - "step": 22280 - }, - { - "epoch": 1.655279964354671, - "grad_norm": 1.715278148651123, - "learning_rate": 2.0068320213871974e-05, - "loss": 0.0781, - "step": 22290 - }, - { - "epoch": 1.6560225753750184, - "grad_norm": 0.7108889818191528, - "learning_rate": 2.006386454774989e-05, - "loss": 0.0512, - "step": 22300 - }, - { - "epoch": 1.656765186395366, - "grad_norm": 1.54939603805542, - "learning_rate": 2.0059408881627804e-05, - "loss": 0.073, - "step": 22310 - }, - { - "epoch": 1.6575077974157137, - "grad_norm": 1.0879472494125366, - "learning_rate": 2.005495321550572e-05, - "loss": 0.0884, - "step": 22320 - }, - { - "epoch": 1.6582504084360612, - "grad_norm": 0.8523910641670227, - "learning_rate": 2.0050497549383634e-05, - "loss": 0.0762, - "step": 22330 - }, - { - "epoch": 1.6589930194564086, - "grad_norm": 0.9075714945793152, - "learning_rate": 2.0046041883261546e-05, - "loss": 0.0792, - "step": 22340 - }, - { - "epoch": 1.6597356304767563, - "grad_norm": 1.5962119102478027, - "learning_rate": 2.0041586217139464e-05, - "loss": 0.0665, - "step": 22350 - }, - { - "epoch": 1.6604782414971038, - "grad_norm": 2.9406886100769043, - "learning_rate": 2.003713055101738e-05, - "loss": 0.0658, - "step": 22360 - }, - { - "epoch": 1.6612208525174514, - "grad_norm": 1.0619057416915894, - "learning_rate": 2.003267488489529e-05, - "loss": 0.0719, - "step": 22370 - }, - { - "epoch": 1.661963463537799, - "grad_norm": 1.0932631492614746, - "learning_rate": 2.0028219218773206e-05, - "loss": 0.0747, - "step": 22380 - }, - { - "epoch": 1.6627060745581463, - "grad_norm": 2.542506217956543, - "learning_rate": 2.0023763552651124e-05, - "loss": 0.067, - "step": 22390 - }, - { - "epoch": 1.663448685578494, - "grad_norm": 2.6380186080932617, - "learning_rate": 2.0019307886529036e-05, - "loss": 0.033, - "step": 22400 - }, - { - "epoch": 1.6641912965988417, - "grad_norm": 1.5135997533798218, - "learning_rate": 2.001485222040695e-05, - "loss": 0.0786, - "step": 22410 - }, - { - "epoch": 1.6649339076191891, - "grad_norm": 0.8384225368499756, - "learning_rate": 2.001039655428487e-05, - "loss": 0.084, - "step": 22420 - }, - { - "epoch": 1.6656765186395366, - "grad_norm": 2.0017759799957275, - "learning_rate": 2.000594088816278e-05, - "loss": 0.0913, - "step": 22430 - }, - { - "epoch": 1.666419129659884, - "grad_norm": 2.2100701332092285, - "learning_rate": 2.0001485222040696e-05, - "loss": 0.0485, - "step": 22440 - }, - { - "epoch": 1.6671617406802317, - "grad_norm": 3.30169939994812, - "learning_rate": 1.999702955591861e-05, - "loss": 0.0674, - "step": 22450 - }, - { - "epoch": 1.6679043517005794, - "grad_norm": 0.9708051085472107, - "learning_rate": 1.9992573889796525e-05, - "loss": 0.0807, - "step": 22460 - }, - { - "epoch": 1.6686469627209268, - "grad_norm": 0.9182741045951843, - "learning_rate": 1.998811822367444e-05, - "loss": 0.0432, - "step": 22470 - }, - { - "epoch": 1.6693895737412743, - "grad_norm": 4.127451419830322, - "learning_rate": 1.9983662557552352e-05, - "loss": 0.0857, - "step": 22480 - }, - { - "epoch": 1.6701321847616217, - "grad_norm": 1.7990877628326416, - "learning_rate": 1.997920689143027e-05, - "loss": 0.0993, - "step": 22490 - }, - { - "epoch": 1.6708747957819694, - "grad_norm": 3.319918155670166, - "learning_rate": 1.9974751225308185e-05, - "loss": 0.0492, - "step": 22500 - }, - { - "epoch": 1.671617406802317, - "grad_norm": 1.4016786813735962, - "learning_rate": 1.9970295559186097e-05, - "loss": 0.0593, - "step": 22510 - }, - { - "epoch": 1.6723600178226645, - "grad_norm": 3.1249544620513916, - "learning_rate": 1.9965839893064015e-05, - "loss": 0.1065, - "step": 22520 - }, - { - "epoch": 1.673102628843012, - "grad_norm": 1.5883194208145142, - "learning_rate": 1.9961384226941927e-05, - "loss": 0.083, - "step": 22530 - }, - { - "epoch": 1.6738452398633594, - "grad_norm": 0.8119624257087708, - "learning_rate": 1.9956928560819842e-05, - "loss": 0.0768, - "step": 22540 - }, - { - "epoch": 1.674587850883707, - "grad_norm": 1.9466767311096191, - "learning_rate": 1.9952472894697757e-05, - "loss": 0.0688, - "step": 22550 - }, - { - "epoch": 1.6753304619040548, - "grad_norm": 1.9473903179168701, - "learning_rate": 1.9948017228575672e-05, - "loss": 0.0631, - "step": 22560 - }, - { - "epoch": 1.6760730729244022, - "grad_norm": 1.485198974609375, - "learning_rate": 1.9943561562453587e-05, - "loss": 0.0685, - "step": 22570 - }, - { - "epoch": 1.6768156839447497, - "grad_norm": 1.3554059267044067, - "learning_rate": 1.9939105896331502e-05, - "loss": 0.0596, - "step": 22580 - }, - { - "epoch": 1.6775582949650971, - "grad_norm": 2.8601107597351074, - "learning_rate": 1.9934650230209417e-05, - "loss": 0.0897, - "step": 22590 - }, - { - "epoch": 1.6783009059854448, - "grad_norm": 0.8527280688285828, - "learning_rate": 1.9930194564087332e-05, - "loss": 0.0749, - "step": 22600 - }, - { - "epoch": 1.6790435170057925, - "grad_norm": 1.9120954275131226, - "learning_rate": 1.9925738897965247e-05, - "loss": 0.0733, - "step": 22610 - }, - { - "epoch": 1.67978612802614, - "grad_norm": 0.8848724365234375, - "learning_rate": 1.9921283231843162e-05, - "loss": 0.047, - "step": 22620 - }, - { - "epoch": 1.6805287390464874, - "grad_norm": 1.2848988771438599, - "learning_rate": 1.9916827565721077e-05, - "loss": 0.0584, - "step": 22630 - }, - { - "epoch": 1.681271350066835, - "grad_norm": 1.2438756227493286, - "learning_rate": 1.991237189959899e-05, - "loss": 0.0784, - "step": 22640 - }, - { - "epoch": 1.6820139610871825, - "grad_norm": 0.8794949054718018, - "learning_rate": 1.9907916233476904e-05, - "loss": 0.0759, - "step": 22650 - }, - { - "epoch": 1.6827565721075302, - "grad_norm": 2.005244255065918, - "learning_rate": 1.9903460567354822e-05, - "loss": 0.0558, - "step": 22660 - }, - { - "epoch": 1.6834991831278776, - "grad_norm": 1.2207728624343872, - "learning_rate": 1.9899004901232734e-05, - "loss": 0.0891, - "step": 22670 - }, - { - "epoch": 1.684241794148225, - "grad_norm": 1.019566297531128, - "learning_rate": 1.989454923511065e-05, - "loss": 0.0762, - "step": 22680 - }, - { - "epoch": 1.6849844051685727, - "grad_norm": 1.250605583190918, - "learning_rate": 1.9890093568988567e-05, - "loss": 0.0728, - "step": 22690 - }, - { - "epoch": 1.6857270161889204, - "grad_norm": 2.422374963760376, - "learning_rate": 1.988563790286648e-05, - "loss": 0.0747, - "step": 22700 - }, - { - "epoch": 1.6864696272092679, - "grad_norm": 2.8228814601898193, - "learning_rate": 1.9881182236744394e-05, - "loss": 0.0741, - "step": 22710 - }, - { - "epoch": 1.6872122382296153, - "grad_norm": 2.183687448501587, - "learning_rate": 1.987672657062231e-05, - "loss": 0.0659, - "step": 22720 - }, - { - "epoch": 1.6879548492499628, - "grad_norm": 1.0389907360076904, - "learning_rate": 1.9872270904500224e-05, - "loss": 0.0901, - "step": 22730 - }, - { - "epoch": 1.6886974602703104, - "grad_norm": 2.4531607627868652, - "learning_rate": 1.986781523837814e-05, - "loss": 0.0963, - "step": 22740 - }, - { - "epoch": 1.689440071290658, - "grad_norm": 1.1364638805389404, - "learning_rate": 1.986335957225605e-05, - "loss": 0.0745, - "step": 22750 - }, - { - "epoch": 1.6901826823110055, - "grad_norm": 1.6292158365249634, - "learning_rate": 1.985890390613397e-05, - "loss": 0.0886, - "step": 22760 - }, - { - "epoch": 1.690925293331353, - "grad_norm": 2.255054473876953, - "learning_rate": 1.9854448240011884e-05, - "loss": 0.0885, - "step": 22770 - }, - { - "epoch": 1.6916679043517004, - "grad_norm": 0.5803804993629456, - "learning_rate": 1.9849992573889795e-05, - "loss": 0.0325, - "step": 22780 - }, - { - "epoch": 1.6924105153720481, - "grad_norm": 1.7144925594329834, - "learning_rate": 1.984553690776771e-05, - "loss": 0.0705, - "step": 22790 - }, - { - "epoch": 1.6931531263923958, - "grad_norm": 0.6633053421974182, - "learning_rate": 1.984108124164563e-05, - "loss": 0.0524, - "step": 22800 - }, - { - "epoch": 1.6938957374127432, - "grad_norm": 3.752182960510254, - "learning_rate": 1.983662557552354e-05, - "loss": 0.0732, - "step": 22810 - }, - { - "epoch": 1.6946383484330907, - "grad_norm": 1.711698055267334, - "learning_rate": 1.9832169909401455e-05, - "loss": 0.0639, - "step": 22820 - }, - { - "epoch": 1.6953809594534381, - "grad_norm": 3.3594610691070557, - "learning_rate": 1.9827714243279374e-05, - "loss": 0.0828, - "step": 22830 - }, - { - "epoch": 1.6961235704737858, - "grad_norm": 2.337766647338867, - "learning_rate": 1.9823258577157285e-05, - "loss": 0.0718, - "step": 22840 - }, - { - "epoch": 1.6968661814941335, - "grad_norm": 0.9109551310539246, - "learning_rate": 1.98188029110352e-05, - "loss": 0.0757, - "step": 22850 - }, - { - "epoch": 1.697608792514481, - "grad_norm": 2.4265153408050537, - "learning_rate": 1.9814347244913115e-05, - "loss": 0.1019, - "step": 22860 - }, - { - "epoch": 1.6983514035348284, - "grad_norm": 2.5311357975006104, - "learning_rate": 1.980989157879103e-05, - "loss": 0.07, - "step": 22870 - }, - { - "epoch": 1.6990940145551758, - "grad_norm": 1.0674959421157837, - "learning_rate": 1.9805435912668945e-05, - "loss": 0.0914, - "step": 22880 - }, - { - "epoch": 1.6998366255755235, - "grad_norm": 1.6489328145980835, - "learning_rate": 1.9800980246546857e-05, - "loss": 0.0893, - "step": 22890 - }, - { - "epoch": 1.7005792365958712, - "grad_norm": 1.366485595703125, - "learning_rate": 1.9796524580424775e-05, - "loss": 0.0594, - "step": 22900 - }, - { - "epoch": 1.7013218476162186, - "grad_norm": 1.1169344186782837, - "learning_rate": 1.979206891430269e-05, - "loss": 0.0928, - "step": 22910 - }, - { - "epoch": 1.702064458636566, - "grad_norm": 0.7352683544158936, - "learning_rate": 1.97876132481806e-05, - "loss": 0.0778, - "step": 22920 - }, - { - "epoch": 1.7028070696569138, - "grad_norm": 1.1200909614562988, - "learning_rate": 1.978315758205852e-05, - "loss": 0.0808, - "step": 22930 - }, - { - "epoch": 1.7035496806772612, - "grad_norm": 1.3726412057876587, - "learning_rate": 1.9778701915936435e-05, - "loss": 0.0678, - "step": 22940 - }, - { - "epoch": 1.7042922916976089, - "grad_norm": 2.1558868885040283, - "learning_rate": 1.9774246249814347e-05, - "loss": 0.0842, - "step": 22950 - }, - { - "epoch": 1.7050349027179563, - "grad_norm": 0.9559635519981384, - "learning_rate": 1.976979058369226e-05, - "loss": 0.0813, - "step": 22960 - }, - { - "epoch": 1.7057775137383038, - "grad_norm": 0.5382719039916992, - "learning_rate": 1.9765334917570177e-05, - "loss": 0.0669, - "step": 22970 - }, - { - "epoch": 1.7065201247586514, - "grad_norm": 1.2556627988815308, - "learning_rate": 1.976087925144809e-05, - "loss": 0.0981, - "step": 22980 - }, - { - "epoch": 1.7072627357789991, - "grad_norm": 1.57675039768219, - "learning_rate": 1.9756423585326007e-05, - "loss": 0.0876, - "step": 22990 - }, - { - "epoch": 1.7080053467993466, - "grad_norm": 1.055188536643982, - "learning_rate": 1.975196791920392e-05, - "loss": 0.0983, - "step": 23000 - }, - { - "epoch": 1.708747957819694, - "grad_norm": 0.611940324306488, - "learning_rate": 1.9747512253081837e-05, - "loss": 0.0705, - "step": 23010 - }, - { - "epoch": 1.7094905688400415, - "grad_norm": 2.1072449684143066, - "learning_rate": 1.974305658695975e-05, - "loss": 0.072, - "step": 23020 - }, - { - "epoch": 1.7102331798603891, - "grad_norm": 1.6510035991668701, - "learning_rate": 1.9738600920837667e-05, - "loss": 0.0522, - "step": 23030 - }, - { - "epoch": 1.7109757908807368, - "grad_norm": 0.40311571955680847, - "learning_rate": 1.973414525471558e-05, - "loss": 0.0883, - "step": 23040 - }, - { - "epoch": 1.7117184019010843, - "grad_norm": 3.123772144317627, - "learning_rate": 1.9729689588593493e-05, - "loss": 0.0727, - "step": 23050 - }, - { - "epoch": 1.7124610129214317, - "grad_norm": 0.9838127493858337, - "learning_rate": 1.9725233922471408e-05, - "loss": 0.0601, - "step": 23060 - }, - { - "epoch": 1.7132036239417792, - "grad_norm": 0.6052844524383545, - "learning_rate": 1.9720778256349327e-05, - "loss": 0.0905, - "step": 23070 - }, - { - "epoch": 1.7139462349621268, - "grad_norm": 2.6029303073883057, - "learning_rate": 1.9716322590227238e-05, - "loss": 0.0854, - "step": 23080 - }, - { - "epoch": 1.7146888459824745, - "grad_norm": 1.6953434944152832, - "learning_rate": 1.9711866924105153e-05, - "loss": 0.0739, - "step": 23090 - }, - { - "epoch": 1.715431457002822, - "grad_norm": 1.7296435832977295, - "learning_rate": 1.970741125798307e-05, - "loss": 0.0666, - "step": 23100 - }, - { - "epoch": 1.7161740680231694, - "grad_norm": 1.8964383602142334, - "learning_rate": 1.9702955591860983e-05, - "loss": 0.0687, - "step": 23110 - }, - { - "epoch": 1.7169166790435169, - "grad_norm": 1.0528844594955444, - "learning_rate": 1.9698499925738898e-05, - "loss": 0.0555, - "step": 23120 - }, - { - "epoch": 1.7176592900638645, - "grad_norm": 2.4928388595581055, - "learning_rate": 1.9694044259616813e-05, - "loss": 0.0738, - "step": 23130 - }, - { - "epoch": 1.7184019010842122, - "grad_norm": 2.2617714405059814, - "learning_rate": 1.9689588593494728e-05, - "loss": 0.1023, - "step": 23140 - }, - { - "epoch": 1.7191445121045597, - "grad_norm": 1.0434247255325317, - "learning_rate": 1.9685132927372643e-05, - "loss": 0.0923, - "step": 23150 - }, - { - "epoch": 1.719887123124907, - "grad_norm": 0.6594432592391968, - "learning_rate": 1.9680677261250555e-05, - "loss": 0.0626, - "step": 23160 - }, - { - "epoch": 1.7206297341452546, - "grad_norm": 2.8370988368988037, - "learning_rate": 1.9676221595128473e-05, - "loss": 0.093, - "step": 23170 - }, - { - "epoch": 1.7213723451656022, - "grad_norm": 0.7767960429191589, - "learning_rate": 1.9671765929006388e-05, - "loss": 0.0637, - "step": 23180 - }, - { - "epoch": 1.72211495618595, - "grad_norm": 1.8710448741912842, - "learning_rate": 1.96673102628843e-05, - "loss": 0.0675, - "step": 23190 - }, - { - "epoch": 1.7228575672062973, - "grad_norm": 3.039166212081909, - "learning_rate": 1.9662854596762215e-05, - "loss": 0.0883, - "step": 23200 - }, - { - "epoch": 1.7236001782266448, - "grad_norm": 4.762219429016113, - "learning_rate": 1.9658398930640133e-05, - "loss": 0.0519, - "step": 23210 - }, - { - "epoch": 1.7243427892469925, - "grad_norm": 1.641481876373291, - "learning_rate": 1.9653943264518045e-05, - "loss": 0.0918, - "step": 23220 - }, - { - "epoch": 1.72508540026734, - "grad_norm": 0.6783468127250671, - "learning_rate": 1.964948759839596e-05, - "loss": 0.0757, - "step": 23230 - }, - { - "epoch": 1.7258280112876876, - "grad_norm": 1.0476303100585938, - "learning_rate": 1.9645031932273878e-05, - "loss": 0.0607, - "step": 23240 - }, - { - "epoch": 1.726570622308035, - "grad_norm": 1.5306792259216309, - "learning_rate": 1.964057626615179e-05, - "loss": 0.0644, - "step": 23250 - }, - { - "epoch": 1.7273132333283825, - "grad_norm": 0.8044191598892212, - "learning_rate": 1.9636120600029705e-05, - "loss": 0.0881, - "step": 23260 - }, - { - "epoch": 1.7280558443487302, - "grad_norm": 1.202543020248413, - "learning_rate": 1.963166493390762e-05, - "loss": 0.0665, - "step": 23270 - }, - { - "epoch": 1.7287984553690778, - "grad_norm": 2.2589240074157715, - "learning_rate": 1.9627209267785535e-05, - "loss": 0.0831, - "step": 23280 - }, - { - "epoch": 1.7295410663894253, - "grad_norm": 2.0191476345062256, - "learning_rate": 1.962275360166345e-05, - "loss": 0.0765, - "step": 23290 - }, - { - "epoch": 1.7302836774097727, - "grad_norm": 2.8185505867004395, - "learning_rate": 1.961829793554136e-05, - "loss": 0.0791, - "step": 23300 - }, - { - "epoch": 1.7310262884301202, - "grad_norm": 1.082022786140442, - "learning_rate": 1.961384226941928e-05, - "loss": 0.0978, - "step": 23310 - }, - { - "epoch": 1.7317688994504679, - "grad_norm": 1.9204188585281372, - "learning_rate": 1.9609386603297195e-05, - "loss": 0.0893, - "step": 23320 - }, - { - "epoch": 1.7325115104708155, - "grad_norm": 1.1445153951644897, - "learning_rate": 1.9604930937175106e-05, - "loss": 0.0547, - "step": 23330 - }, - { - "epoch": 1.733254121491163, - "grad_norm": 0.6673332452774048, - "learning_rate": 1.9600475271053025e-05, - "loss": 0.0447, - "step": 23340 - }, - { - "epoch": 1.7339967325115104, - "grad_norm": 4.474247455596924, - "learning_rate": 1.959601960493094e-05, - "loss": 0.11, - "step": 23350 - }, - { - "epoch": 1.7347393435318579, - "grad_norm": 4.08743143081665, - "learning_rate": 1.959156393880885e-05, - "loss": 0.0967, - "step": 23360 - }, - { - "epoch": 1.7354819545522056, - "grad_norm": 1.5097987651824951, - "learning_rate": 1.9587108272686766e-05, - "loss": 0.0715, - "step": 23370 - }, - { - "epoch": 1.7362245655725532, - "grad_norm": 2.6313939094543457, - "learning_rate": 1.958265260656468e-05, - "loss": 0.0836, - "step": 23380 - }, - { - "epoch": 1.7369671765929007, - "grad_norm": 0.9366971850395203, - "learning_rate": 1.9578196940442596e-05, - "loss": 0.0911, - "step": 23390 - }, - { - "epoch": 1.7377097876132481, - "grad_norm": 0.614687979221344, - "learning_rate": 1.957374127432051e-05, - "loss": 0.0654, - "step": 23400 - }, - { - "epoch": 1.7384523986335956, - "grad_norm": 1.9698867797851562, - "learning_rate": 1.9569285608198426e-05, - "loss": 0.0922, - "step": 23410 - }, - { - "epoch": 1.7391950096539432, - "grad_norm": 2.608386754989624, - "learning_rate": 1.956482994207634e-05, - "loss": 0.0963, - "step": 23420 - }, - { - "epoch": 1.739937620674291, - "grad_norm": 3.6688835620880127, - "learning_rate": 1.9560374275954256e-05, - "loss": 0.0771, - "step": 23430 - }, - { - "epoch": 1.7406802316946384, - "grad_norm": 2.29097318649292, - "learning_rate": 1.955591860983217e-05, - "loss": 0.0723, - "step": 23440 - }, - { - "epoch": 1.7414228427149858, - "grad_norm": 1.2678636312484741, - "learning_rate": 1.9551462943710086e-05, - "loss": 0.0709, - "step": 23450 - }, - { - "epoch": 1.7421654537353333, - "grad_norm": 4.124483108520508, - "learning_rate": 1.9547007277588e-05, - "loss": 0.089, - "step": 23460 - }, - { - "epoch": 1.742908064755681, - "grad_norm": 0.28768646717071533, - "learning_rate": 1.9542551611465913e-05, - "loss": 0.0933, - "step": 23470 - }, - { - "epoch": 1.7436506757760286, - "grad_norm": 0.71043860912323, - "learning_rate": 1.953809594534383e-05, - "loss": 0.0806, - "step": 23480 - }, - { - "epoch": 1.744393286796376, - "grad_norm": 1.6145790815353394, - "learning_rate": 1.9533640279221743e-05, - "loss": 0.0659, - "step": 23490 - }, - { - "epoch": 1.7451358978167235, - "grad_norm": 1.6282793283462524, - "learning_rate": 1.9529184613099658e-05, - "loss": 0.1154, - "step": 23500 - }, - { - "epoch": 1.7458785088370712, - "grad_norm": 1.4098920822143555, - "learning_rate": 1.9524728946977576e-05, - "loss": 0.073, - "step": 23510 - }, - { - "epoch": 1.7466211198574186, - "grad_norm": 2.4502289295196533, - "learning_rate": 1.9520273280855488e-05, - "loss": 0.0688, - "step": 23520 - }, - { - "epoch": 1.7473637308777663, - "grad_norm": 1.5324982404708862, - "learning_rate": 1.9515817614733403e-05, - "loss": 0.0789, - "step": 23530 - }, - { - "epoch": 1.7481063418981138, - "grad_norm": 1.1918566226959229, - "learning_rate": 1.9511361948611318e-05, - "loss": 0.0879, - "step": 23540 - }, - { - "epoch": 1.7488489529184612, - "grad_norm": 1.8699147701263428, - "learning_rate": 1.9506906282489233e-05, - "loss": 0.0677, - "step": 23550 - }, - { - "epoch": 1.7495915639388089, - "grad_norm": 1.7043718099594116, - "learning_rate": 1.9502450616367148e-05, - "loss": 0.0751, - "step": 23560 - }, - { - "epoch": 1.7503341749591566, - "grad_norm": 1.2603180408477783, - "learning_rate": 1.949799495024506e-05, - "loss": 0.0724, - "step": 23570 - }, - { - "epoch": 1.751076785979504, - "grad_norm": 0.9619042277336121, - "learning_rate": 1.9493539284122978e-05, - "loss": 0.0815, - "step": 23580 - }, - { - "epoch": 1.7518193969998515, - "grad_norm": 1.521238923072815, - "learning_rate": 1.9489083618000893e-05, - "loss": 0.0494, - "step": 23590 - }, - { - "epoch": 1.752562008020199, - "grad_norm": 2.5039453506469727, - "learning_rate": 1.9484627951878804e-05, - "loss": 0.0824, - "step": 23600 - }, - { - "epoch": 1.7533046190405466, - "grad_norm": 0.793013334274292, - "learning_rate": 1.9480172285756723e-05, - "loss": 0.0529, - "step": 23610 - }, - { - "epoch": 1.7540472300608942, - "grad_norm": 2.6276259422302246, - "learning_rate": 1.9475716619634638e-05, - "loss": 0.0624, - "step": 23620 - }, - { - "epoch": 1.7547898410812417, - "grad_norm": 0.7211153507232666, - "learning_rate": 1.947126095351255e-05, - "loss": 0.0859, - "step": 23630 - }, - { - "epoch": 1.7555324521015891, - "grad_norm": 0.7055466175079346, - "learning_rate": 1.9466805287390464e-05, - "loss": 0.0813, - "step": 23640 - }, - { - "epoch": 1.7562750631219366, - "grad_norm": 1.8342903852462769, - "learning_rate": 1.9462349621268383e-05, - "loss": 0.1125, - "step": 23650 - }, - { - "epoch": 1.7570176741422843, - "grad_norm": 1.909565806388855, - "learning_rate": 1.9457893955146294e-05, - "loss": 0.1002, - "step": 23660 - }, - { - "epoch": 1.757760285162632, - "grad_norm": 1.755029559135437, - "learning_rate": 1.945343828902421e-05, - "loss": 0.0785, - "step": 23670 - }, - { - "epoch": 1.7585028961829794, - "grad_norm": 1.2598732709884644, - "learning_rate": 1.9448982622902124e-05, - "loss": 0.063, - "step": 23680 - }, - { - "epoch": 1.7592455072033268, - "grad_norm": 1.9370228052139282, - "learning_rate": 1.944452695678004e-05, - "loss": 0.0481, - "step": 23690 - }, - { - "epoch": 1.7599881182236743, - "grad_norm": 0.8600150942802429, - "learning_rate": 1.9440071290657954e-05, - "loss": 0.0818, - "step": 23700 - }, - { - "epoch": 1.760730729244022, - "grad_norm": 1.0704541206359863, - "learning_rate": 1.9435615624535866e-05, - "loss": 0.0879, - "step": 23710 - }, - { - "epoch": 1.7614733402643696, - "grad_norm": 0.8569614291191101, - "learning_rate": 1.9431159958413784e-05, - "loss": 0.0786, - "step": 23720 - }, - { - "epoch": 1.762215951284717, - "grad_norm": 2.5720906257629395, - "learning_rate": 1.94267042922917e-05, - "loss": 0.0942, - "step": 23730 - }, - { - "epoch": 1.7629585623050645, - "grad_norm": 1.6318458318710327, - "learning_rate": 1.942224862616961e-05, - "loss": 0.0819, - "step": 23740 - }, - { - "epoch": 1.763701173325412, - "grad_norm": 0.5371285080909729, - "learning_rate": 1.941779296004753e-05, - "loss": 0.042, - "step": 23750 - }, - { - "epoch": 1.7644437843457597, - "grad_norm": 0.4963701665401459, - "learning_rate": 1.9413337293925444e-05, - "loss": 0.0849, - "step": 23760 - }, - { - "epoch": 1.7651863953661073, - "grad_norm": 4.991149425506592, - "learning_rate": 1.9408881627803356e-05, - "loss": 0.0797, - "step": 23770 - }, - { - "epoch": 1.7659290063864548, - "grad_norm": 2.4451613426208496, - "learning_rate": 1.940442596168127e-05, - "loss": 0.0688, - "step": 23780 - }, - { - "epoch": 1.7666716174068022, - "grad_norm": 0.5909414887428284, - "learning_rate": 1.9399970295559186e-05, - "loss": 0.0814, - "step": 23790 - }, - { - "epoch": 1.76741422842715, - "grad_norm": 3.1932735443115234, - "learning_rate": 1.93955146294371e-05, - "loss": 0.0725, - "step": 23800 - }, - { - "epoch": 1.7681568394474974, - "grad_norm": 1.4078024625778198, - "learning_rate": 1.9391058963315016e-05, - "loss": 0.0917, - "step": 23810 - }, - { - "epoch": 1.768899450467845, - "grad_norm": 0.5630704164505005, - "learning_rate": 1.938660329719293e-05, - "loss": 0.049, - "step": 23820 - }, - { - "epoch": 1.7696420614881925, - "grad_norm": 1.22433602809906, - "learning_rate": 1.9382147631070846e-05, - "loss": 0.0883, - "step": 23830 - }, - { - "epoch": 1.77038467250854, - "grad_norm": 2.7386889457702637, - "learning_rate": 1.937769196494876e-05, - "loss": 0.0769, - "step": 23840 - }, - { - "epoch": 1.7711272835288876, - "grad_norm": 0.52625572681427, - "learning_rate": 1.9373236298826676e-05, - "loss": 0.0807, - "step": 23850 - }, - { - "epoch": 1.7718698945492353, - "grad_norm": 1.0019735097885132, - "learning_rate": 1.936878063270459e-05, - "loss": 0.074, - "step": 23860 - }, - { - "epoch": 1.7726125055695827, - "grad_norm": 1.6387897729873657, - "learning_rate": 1.9364324966582506e-05, - "loss": 0.0592, - "step": 23870 - }, - { - "epoch": 1.7733551165899302, - "grad_norm": 1.7129108905792236, - "learning_rate": 1.9359869300460417e-05, - "loss": 0.101, - "step": 23880 - }, - { - "epoch": 1.7740977276102776, - "grad_norm": 2.9050674438476562, - "learning_rate": 1.9355413634338336e-05, - "loss": 0.0965, - "step": 23890 - }, - { - "epoch": 1.7748403386306253, - "grad_norm": 2.6938226222991943, - "learning_rate": 1.9350957968216247e-05, - "loss": 0.0713, - "step": 23900 - }, - { - "epoch": 1.775582949650973, - "grad_norm": 2.0255374908447266, - "learning_rate": 1.9346502302094162e-05, - "loss": 0.0711, - "step": 23910 - }, - { - "epoch": 1.7763255606713204, - "grad_norm": 0.8787927627563477, - "learning_rate": 1.934204663597208e-05, - "loss": 0.0717, - "step": 23920 - }, - { - "epoch": 1.7770681716916679, - "grad_norm": 0.6391984224319458, - "learning_rate": 1.9337590969849992e-05, - "loss": 0.0434, - "step": 23930 - }, - { - "epoch": 1.7778107827120153, - "grad_norm": 2.4964632987976074, - "learning_rate": 1.9333135303727907e-05, - "loss": 0.0759, - "step": 23940 - }, - { - "epoch": 1.778553393732363, - "grad_norm": 2.1311395168304443, - "learning_rate": 1.9328679637605822e-05, - "loss": 0.0499, - "step": 23950 - }, - { - "epoch": 1.7792960047527107, - "grad_norm": 3.056412935256958, - "learning_rate": 1.9324223971483737e-05, - "loss": 0.0652, - "step": 23960 - }, - { - "epoch": 1.780038615773058, - "grad_norm": 1.9002305269241333, - "learning_rate": 1.9319768305361652e-05, - "loss": 0.0528, - "step": 23970 - }, - { - "epoch": 1.7807812267934056, - "grad_norm": 2.583705186843872, - "learning_rate": 1.9315312639239564e-05, - "loss": 0.0692, - "step": 23980 - }, - { - "epoch": 1.781523837813753, - "grad_norm": 2.8918917179107666, - "learning_rate": 1.9310856973117482e-05, - "loss": 0.0405, - "step": 23990 - }, - { - "epoch": 1.7822664488341007, - "grad_norm": 2.4750471115112305, - "learning_rate": 1.9306401306995397e-05, - "loss": 0.0704, - "step": 24000 - }, - { - "epoch": 1.7830090598544484, - "grad_norm": 0.7255248427391052, - "learning_rate": 1.930194564087331e-05, - "loss": 0.0657, - "step": 24010 - }, - { - "epoch": 1.7837516708747958, - "grad_norm": 1.313336968421936, - "learning_rate": 1.9297489974751227e-05, - "loss": 0.0548, - "step": 24020 - }, - { - "epoch": 1.7844942818951433, - "grad_norm": 1.0988185405731201, - "learning_rate": 1.9293034308629142e-05, - "loss": 0.0681, - "step": 24030 - }, - { - "epoch": 1.7852368929154907, - "grad_norm": 1.2511615753173828, - "learning_rate": 1.9288578642507054e-05, - "loss": 0.1, - "step": 24040 - }, - { - "epoch": 1.7859795039358384, - "grad_norm": 0.9137929677963257, - "learning_rate": 1.928412297638497e-05, - "loss": 0.0922, - "step": 24050 - }, - { - "epoch": 1.786722114956186, - "grad_norm": 1.0579396486282349, - "learning_rate": 1.9279667310262887e-05, - "loss": 0.0649, - "step": 24060 - }, - { - "epoch": 1.7874647259765335, - "grad_norm": 2.3197665214538574, - "learning_rate": 1.92752116441408e-05, - "loss": 0.0646, - "step": 24070 - }, - { - "epoch": 1.788207336996881, - "grad_norm": 0.7259221076965332, - "learning_rate": 1.9270755978018714e-05, - "loss": 0.0642, - "step": 24080 - }, - { - "epoch": 1.7889499480172286, - "grad_norm": 1.1063398122787476, - "learning_rate": 1.926630031189663e-05, - "loss": 0.077, - "step": 24090 - }, - { - "epoch": 1.789692559037576, - "grad_norm": 0.8561720848083496, - "learning_rate": 1.9261844645774544e-05, - "loss": 0.0799, - "step": 24100 - }, - { - "epoch": 1.7904351700579237, - "grad_norm": 2.2343969345092773, - "learning_rate": 1.925738897965246e-05, - "loss": 0.0798, - "step": 24110 - }, - { - "epoch": 1.7911777810782712, - "grad_norm": 0.8606523275375366, - "learning_rate": 1.925293331353037e-05, - "loss": 0.0636, - "step": 24120 - }, - { - "epoch": 1.7919203920986186, - "grad_norm": 1.764185905456543, - "learning_rate": 1.924847764740829e-05, - "loss": 0.072, - "step": 24130 - }, - { - "epoch": 1.7926630031189663, - "grad_norm": 2.313272714614868, - "learning_rate": 1.9244021981286204e-05, - "loss": 0.0701, - "step": 24140 - }, - { - "epoch": 1.793405614139314, - "grad_norm": 0.961453914642334, - "learning_rate": 1.9239566315164115e-05, - "loss": 0.0845, - "step": 24150 - }, - { - "epoch": 1.7941482251596614, - "grad_norm": 0.6591196060180664, - "learning_rate": 1.9235110649042034e-05, - "loss": 0.0578, - "step": 24160 - }, - { - "epoch": 1.7948908361800089, - "grad_norm": 2.050917148590088, - "learning_rate": 1.923065498291995e-05, - "loss": 0.07, - "step": 24170 - }, - { - "epoch": 1.7956334472003563, - "grad_norm": 1.651291847229004, - "learning_rate": 1.922619931679786e-05, - "loss": 0.1065, - "step": 24180 - }, - { - "epoch": 1.796376058220704, - "grad_norm": 1.6536940336227417, - "learning_rate": 1.9221743650675775e-05, - "loss": 0.0675, - "step": 24190 - }, - { - "epoch": 1.7971186692410517, - "grad_norm": 1.4677518606185913, - "learning_rate": 1.921728798455369e-05, - "loss": 0.0531, - "step": 24200 - }, - { - "epoch": 1.7978612802613991, - "grad_norm": 0.6976707577705383, - "learning_rate": 1.9212832318431605e-05, - "loss": 0.0458, - "step": 24210 - }, - { - "epoch": 1.7986038912817466, - "grad_norm": 2.0088629722595215, - "learning_rate": 1.920837665230952e-05, - "loss": 0.0756, - "step": 24220 - }, - { - "epoch": 1.799346502302094, - "grad_norm": 1.7095671892166138, - "learning_rate": 1.9203920986187435e-05, - "loss": 0.0709, - "step": 24230 - }, - { - "epoch": 1.8000891133224417, - "grad_norm": 1.893115758895874, - "learning_rate": 1.919946532006535e-05, - "loss": 0.0676, - "step": 24240 - }, - { - "epoch": 1.8008317243427894, - "grad_norm": 2.080127716064453, - "learning_rate": 1.9195009653943265e-05, - "loss": 0.0533, - "step": 24250 - }, - { - "epoch": 1.8015743353631368, - "grad_norm": 1.7303770780563354, - "learning_rate": 1.919055398782118e-05, - "loss": 0.0741, - "step": 24260 - }, - { - "epoch": 1.8023169463834843, - "grad_norm": 1.3288146257400513, - "learning_rate": 1.9186098321699095e-05, - "loss": 0.0812, - "step": 24270 - }, - { - "epoch": 1.8030595574038317, - "grad_norm": 2.545034408569336, - "learning_rate": 1.918164265557701e-05, - "loss": 0.0959, - "step": 24280 - }, - { - "epoch": 1.8038021684241794, - "grad_norm": 2.7964491844177246, - "learning_rate": 1.9177186989454922e-05, - "loss": 0.0649, - "step": 24290 - }, - { - "epoch": 1.804544779444527, - "grad_norm": 1.9604982137680054, - "learning_rate": 1.917273132333284e-05, - "loss": 0.0733, - "step": 24300 - }, - { - "epoch": 1.8052873904648745, - "grad_norm": 2.917266607284546, - "learning_rate": 1.9168275657210752e-05, - "loss": 0.0903, - "step": 24310 - }, - { - "epoch": 1.806030001485222, - "grad_norm": 2.3615570068359375, - "learning_rate": 1.9163819991088667e-05, - "loss": 0.065, - "step": 24320 - }, - { - "epoch": 1.8067726125055694, - "grad_norm": 1.7897320985794067, - "learning_rate": 1.9159364324966585e-05, - "loss": 0.086, - "step": 24330 - }, - { - "epoch": 1.807515223525917, - "grad_norm": 1.5693241357803345, - "learning_rate": 1.9154908658844497e-05, - "loss": 0.0876, - "step": 24340 - }, - { - "epoch": 1.8082578345462648, - "grad_norm": 1.0281766653060913, - "learning_rate": 1.9150452992722412e-05, - "loss": 0.0841, - "step": 24350 - }, - { - "epoch": 1.8090004455666122, - "grad_norm": 1.0320008993148804, - "learning_rate": 1.9145997326600327e-05, - "loss": 0.0852, - "step": 24360 - }, - { - "epoch": 1.8097430565869597, - "grad_norm": 0.7011651396751404, - "learning_rate": 1.9141541660478242e-05, - "loss": 0.0549, - "step": 24370 - }, - { - "epoch": 1.8104856676073073, - "grad_norm": 2.5355286598205566, - "learning_rate": 1.9137085994356157e-05, - "loss": 0.0809, - "step": 24380 - }, - { - "epoch": 1.8112282786276548, - "grad_norm": 1.371387243270874, - "learning_rate": 1.9132630328234072e-05, - "loss": 0.0568, - "step": 24390 - }, - { - "epoch": 1.8119708896480025, - "grad_norm": 1.14397132396698, - "learning_rate": 1.9128174662111987e-05, - "loss": 0.08, - "step": 24400 - }, - { - "epoch": 1.81271350066835, - "grad_norm": 0.4540915787220001, - "learning_rate": 1.9123718995989902e-05, - "loss": 0.0759, - "step": 24410 - }, - { - "epoch": 1.8134561116886974, - "grad_norm": 0.6488813757896423, - "learning_rate": 1.9119263329867814e-05, - "loss": 0.0805, - "step": 24420 - }, - { - "epoch": 1.814198722709045, - "grad_norm": 2.155545949935913, - "learning_rate": 1.9114807663745732e-05, - "loss": 0.06, - "step": 24430 - }, - { - "epoch": 1.8149413337293927, - "grad_norm": 2.4752142429351807, - "learning_rate": 1.9110351997623647e-05, - "loss": 0.0823, - "step": 24440 - }, - { - "epoch": 1.8156839447497402, - "grad_norm": 1.4653511047363281, - "learning_rate": 1.910589633150156e-05, - "loss": 0.0863, - "step": 24450 - }, - { - "epoch": 1.8164265557700876, - "grad_norm": 1.1317205429077148, - "learning_rate": 1.9101440665379474e-05, - "loss": 0.0554, - "step": 24460 - }, - { - "epoch": 1.817169166790435, - "grad_norm": 1.3294923305511475, - "learning_rate": 1.9096984999257392e-05, - "loss": 0.0619, - "step": 24470 - }, - { - "epoch": 1.8179117778107827, - "grad_norm": 2.8235435485839844, - "learning_rate": 1.9092529333135304e-05, - "loss": 0.0769, - "step": 24480 - }, - { - "epoch": 1.8186543888311304, - "grad_norm": 0.2612536549568176, - "learning_rate": 1.908807366701322e-05, - "loss": 0.0782, - "step": 24490 - }, - { - "epoch": 1.8193969998514778, - "grad_norm": 2.1067092418670654, - "learning_rate": 1.9083618000891133e-05, - "loss": 0.0641, - "step": 24500 - }, - { - "epoch": 1.8201396108718253, - "grad_norm": 1.507546067237854, - "learning_rate": 1.907916233476905e-05, - "loss": 0.0673, - "step": 24510 - }, - { - "epoch": 1.8208822218921727, - "grad_norm": 1.1641823053359985, - "learning_rate": 1.9074706668646963e-05, - "loss": 0.0735, - "step": 24520 - }, - { - "epoch": 1.8216248329125204, - "grad_norm": 1.6520384550094604, - "learning_rate": 1.9070251002524875e-05, - "loss": 0.0714, - "step": 24530 - }, - { - "epoch": 1.822367443932868, - "grad_norm": 1.908644676208496, - "learning_rate": 1.9065795336402793e-05, - "loss": 0.0808, - "step": 24540 - }, - { - "epoch": 1.8231100549532155, - "grad_norm": 1.606765866279602, - "learning_rate": 1.906133967028071e-05, - "loss": 0.0944, - "step": 24550 - }, - { - "epoch": 1.823852665973563, - "grad_norm": 1.8764336109161377, - "learning_rate": 1.905688400415862e-05, - "loss": 0.105, - "step": 24560 - }, - { - "epoch": 1.8245952769939104, - "grad_norm": 1.009965419769287, - "learning_rate": 1.905242833803654e-05, - "loss": 0.039, - "step": 24570 - }, - { - "epoch": 1.8253378880142581, - "grad_norm": 1.2469267845153809, - "learning_rate": 1.9047972671914453e-05, - "loss": 0.0539, - "step": 24580 - }, - { - "epoch": 1.8260804990346058, - "grad_norm": 2.306645393371582, - "learning_rate": 1.9043517005792365e-05, - "loss": 0.0849, - "step": 24590 - }, - { - "epoch": 1.8268231100549532, - "grad_norm": 2.0731003284454346, - "learning_rate": 1.903906133967028e-05, - "loss": 0.082, - "step": 24600 - }, - { - "epoch": 1.8275657210753007, - "grad_norm": 2.53838849067688, - "learning_rate": 1.9034605673548195e-05, - "loss": 0.0964, - "step": 24610 - }, - { - "epoch": 1.8283083320956481, - "grad_norm": 0.8338577747344971, - "learning_rate": 1.903015000742611e-05, - "loss": 0.0842, - "step": 24620 - }, - { - "epoch": 1.8290509431159958, - "grad_norm": 0.40346524119377136, - "learning_rate": 1.9025694341304025e-05, - "loss": 0.0786, - "step": 24630 - }, - { - "epoch": 1.8297935541363435, - "grad_norm": 3.7028415203094482, - "learning_rate": 1.902123867518194e-05, - "loss": 0.0967, - "step": 24640 - }, - { - "epoch": 1.830536165156691, - "grad_norm": 1.1305569410324097, - "learning_rate": 1.9016783009059855e-05, - "loss": 0.0596, - "step": 24650 - }, - { - "epoch": 1.8312787761770384, - "grad_norm": 1.63505220413208, - "learning_rate": 1.901232734293777e-05, - "loss": 0.0985, - "step": 24660 - }, - { - "epoch": 1.832021387197386, - "grad_norm": 1.5454314947128296, - "learning_rate": 1.9007871676815685e-05, - "loss": 0.0677, - "step": 24670 - }, - { - "epoch": 1.8327639982177335, - "grad_norm": 1.4274262189865112, - "learning_rate": 1.90034160106936e-05, - "loss": 0.0681, - "step": 24680 - }, - { - "epoch": 1.8335066092380812, - "grad_norm": 0.888826847076416, - "learning_rate": 1.8998960344571515e-05, - "loss": 0.0922, - "step": 24690 - }, - { - "epoch": 1.8342492202584286, - "grad_norm": 0.6009930968284607, - "learning_rate": 1.8994504678449427e-05, - "loss": 0.08, - "step": 24700 - }, - { - "epoch": 1.834991831278776, - "grad_norm": 1.8012096881866455, - "learning_rate": 1.8990049012327345e-05, - "loss": 0.0726, - "step": 24710 - }, - { - "epoch": 1.8357344422991237, - "grad_norm": 2.0512189865112305, - "learning_rate": 1.8985593346205257e-05, - "loss": 0.089, - "step": 24720 - }, - { - "epoch": 1.8364770533194714, - "grad_norm": 1.3309962749481201, - "learning_rate": 1.898113768008317e-05, - "loss": 0.042, - "step": 24730 - }, - { - "epoch": 1.8372196643398189, - "grad_norm": 2.834207057952881, - "learning_rate": 1.897668201396109e-05, - "loss": 0.0774, - "step": 24740 - }, - { - "epoch": 1.8379622753601663, - "grad_norm": 0.6929059028625488, - "learning_rate": 1.8972226347839e-05, - "loss": 0.0601, - "step": 24750 - }, - { - "epoch": 1.8387048863805138, - "grad_norm": 2.1518747806549072, - "learning_rate": 1.8967770681716917e-05, - "loss": 0.0647, - "step": 24760 - }, - { - "epoch": 1.8394474974008614, - "grad_norm": 1.8092671632766724, - "learning_rate": 1.896331501559483e-05, - "loss": 0.0596, - "step": 24770 - }, - { - "epoch": 1.8401901084212091, - "grad_norm": 1.499583125114441, - "learning_rate": 1.8958859349472747e-05, - "loss": 0.0625, - "step": 24780 - }, - { - "epoch": 1.8409327194415566, - "grad_norm": 1.1313135623931885, - "learning_rate": 1.895440368335066e-05, - "loss": 0.0761, - "step": 24790 - }, - { - "epoch": 1.841675330461904, - "grad_norm": 3.332144260406494, - "learning_rate": 1.8949948017228577e-05, - "loss": 0.1059, - "step": 24800 - }, - { - "epoch": 1.8424179414822515, - "grad_norm": 1.193617820739746, - "learning_rate": 1.894549235110649e-05, - "loss": 0.0897, - "step": 24810 - }, - { - "epoch": 1.8431605525025991, - "grad_norm": 1.0771690607070923, - "learning_rate": 1.8941036684984407e-05, - "loss": 0.0709, - "step": 24820 - }, - { - "epoch": 1.8439031635229468, - "grad_norm": 2.919710874557495, - "learning_rate": 1.8936581018862318e-05, - "loss": 0.0729, - "step": 24830 - }, - { - "epoch": 1.8446457745432943, - "grad_norm": 1.2123440504074097, - "learning_rate": 1.8932125352740237e-05, - "loss": 0.0586, - "step": 24840 - }, - { - "epoch": 1.8453883855636417, - "grad_norm": 2.8051271438598633, - "learning_rate": 1.892766968661815e-05, - "loss": 0.1141, - "step": 24850 - }, - { - "epoch": 1.8461309965839892, - "grad_norm": 1.1732330322265625, - "learning_rate": 1.8923214020496063e-05, - "loss": 0.0702, - "step": 24860 - }, - { - "epoch": 1.8468736076043368, - "grad_norm": 1.468248963356018, - "learning_rate": 1.8918758354373978e-05, - "loss": 0.0755, - "step": 24870 - }, - { - "epoch": 1.8476162186246845, - "grad_norm": 1.1212787628173828, - "learning_rate": 1.8914302688251896e-05, - "loss": 0.0733, - "step": 24880 - }, - { - "epoch": 1.848358829645032, - "grad_norm": 1.3834295272827148, - "learning_rate": 1.8909847022129808e-05, - "loss": 0.0582, - "step": 24890 - }, - { - "epoch": 1.8491014406653794, - "grad_norm": 1.426999807357788, - "learning_rate": 1.8905391356007723e-05, - "loss": 0.0763, - "step": 24900 - }, - { - "epoch": 1.8498440516857269, - "grad_norm": 1.5629621744155884, - "learning_rate": 1.890093568988564e-05, - "loss": 0.0593, - "step": 24910 - }, - { - "epoch": 1.8505866627060745, - "grad_norm": 1.6968507766723633, - "learning_rate": 1.8896480023763553e-05, - "loss": 0.1121, - "step": 24920 - }, - { - "epoch": 1.8513292737264222, - "grad_norm": 1.8145391941070557, - "learning_rate": 1.8892024357641468e-05, - "loss": 0.0594, - "step": 24930 - }, - { - "epoch": 1.8520718847467696, - "grad_norm": 2.452359914779663, - "learning_rate": 1.888756869151938e-05, - "loss": 0.0626, - "step": 24940 - }, - { - "epoch": 1.852814495767117, - "grad_norm": 0.7949011921882629, - "learning_rate": 1.8883113025397298e-05, - "loss": 0.0816, - "step": 24950 - }, - { - "epoch": 1.8535571067874648, - "grad_norm": 1.0505071878433228, - "learning_rate": 1.8878657359275213e-05, - "loss": 0.0874, - "step": 24960 - }, - { - "epoch": 1.8542997178078122, - "grad_norm": 1.7781516313552856, - "learning_rate": 1.8874201693153125e-05, - "loss": 0.0677, - "step": 24970 - }, - { - "epoch": 1.85504232882816, - "grad_norm": 1.9626563787460327, - "learning_rate": 1.8869746027031043e-05, - "loss": 0.0692, - "step": 24980 - }, - { - "epoch": 1.8557849398485073, - "grad_norm": 0.7430385947227478, - "learning_rate": 1.8865290360908958e-05, - "loss": 0.0372, - "step": 24990 - }, - { - "epoch": 1.8565275508688548, - "grad_norm": 2.076448440551758, - "learning_rate": 1.886083469478687e-05, - "loss": 0.0702, - "step": 25000 - }, - { - "epoch": 1.8572701618892025, - "grad_norm": 1.5963934659957886, - "learning_rate": 1.8856379028664788e-05, - "loss": 0.0672, - "step": 25010 - }, - { - "epoch": 1.8580127729095501, - "grad_norm": 2.167839765548706, - "learning_rate": 1.88519233625427e-05, - "loss": 0.0642, - "step": 25020 - }, - { - "epoch": 1.8587553839298976, - "grad_norm": 1.1535776853561401, - "learning_rate": 1.8847467696420615e-05, - "loss": 0.1037, - "step": 25030 - }, - { - "epoch": 1.859497994950245, - "grad_norm": 0.5450434684753418, - "learning_rate": 1.884301203029853e-05, - "loss": 0.0663, - "step": 25040 - }, - { - "epoch": 1.8602406059705925, - "grad_norm": 1.2037556171417236, - "learning_rate": 1.8838556364176445e-05, - "loss": 0.0856, - "step": 25050 - }, - { - "epoch": 1.8609832169909402, - "grad_norm": 1.5233439207077026, - "learning_rate": 1.883410069805436e-05, - "loss": 0.0765, - "step": 25060 - }, - { - "epoch": 1.8617258280112878, - "grad_norm": 1.9642084836959839, - "learning_rate": 1.8829645031932275e-05, - "loss": 0.0701, - "step": 25070 - }, - { - "epoch": 1.8624684390316353, - "grad_norm": 4.366384983062744, - "learning_rate": 1.882518936581019e-05, - "loss": 0.0879, - "step": 25080 - }, - { - "epoch": 1.8632110500519827, - "grad_norm": 1.4830248355865479, - "learning_rate": 1.8820733699688105e-05, - "loss": 0.0753, - "step": 25090 - }, - { - "epoch": 1.8639536610723302, - "grad_norm": 0.9438735842704773, - "learning_rate": 1.881627803356602e-05, - "loss": 0.0621, - "step": 25100 - }, - { - "epoch": 1.8646962720926779, - "grad_norm": 1.960681438446045, - "learning_rate": 1.881182236744393e-05, - "loss": 0.102, - "step": 25110 - }, - { - "epoch": 1.8654388831130255, - "grad_norm": 0.6813110709190369, - "learning_rate": 1.880736670132185e-05, - "loss": 0.0515, - "step": 25120 - }, - { - "epoch": 1.866181494133373, - "grad_norm": 2.467663049697876, - "learning_rate": 1.880291103519976e-05, - "loss": 0.0824, - "step": 25130 - }, - { - "epoch": 1.8669241051537204, - "grad_norm": 0.4229584336280823, - "learning_rate": 1.8798455369077676e-05, - "loss": 0.0507, - "step": 25140 - }, - { - "epoch": 1.8676667161740679, - "grad_norm": 1.0609776973724365, - "learning_rate": 1.8793999702955595e-05, - "loss": 0.0637, - "step": 25150 - }, - { - "epoch": 1.8684093271944155, - "grad_norm": 1.177756905555725, - "learning_rate": 1.8789544036833506e-05, - "loss": 0.0679, - "step": 25160 - }, - { - "epoch": 1.8691519382147632, - "grad_norm": 1.098395586013794, - "learning_rate": 1.878508837071142e-05, - "loss": 0.0546, - "step": 25170 - }, - { - "epoch": 1.8698945492351107, - "grad_norm": 0.5572521686553955, - "learning_rate": 1.8780632704589336e-05, - "loss": 0.0842, - "step": 25180 - }, - { - "epoch": 1.8706371602554581, - "grad_norm": 1.3587162494659424, - "learning_rate": 1.877617703846725e-05, - "loss": 0.0654, - "step": 25190 - }, - { - "epoch": 1.8713797712758056, - "grad_norm": 4.091668605804443, - "learning_rate": 1.8771721372345166e-05, - "loss": 0.1023, - "step": 25200 - }, - { - "epoch": 1.8721223822961532, - "grad_norm": 2.425302267074585, - "learning_rate": 1.876726570622308e-05, - "loss": 0.0869, - "step": 25210 - }, - { - "epoch": 1.872864993316501, - "grad_norm": 0.6244649291038513, - "learning_rate": 1.8762810040100996e-05, - "loss": 0.0898, - "step": 25220 - }, - { - "epoch": 1.8736076043368484, - "grad_norm": 0.962476372718811, - "learning_rate": 1.875835437397891e-05, - "loss": 0.0778, - "step": 25230 - }, - { - "epoch": 1.8743502153571958, - "grad_norm": 3.0341858863830566, - "learning_rate": 1.8753898707856823e-05, - "loss": 0.0679, - "step": 25240 - }, - { - "epoch": 1.8750928263775435, - "grad_norm": 1.5342912673950195, - "learning_rate": 1.874944304173474e-05, - "loss": 0.0834, - "step": 25250 - }, - { - "epoch": 1.875835437397891, - "grad_norm": 2.197174549102783, - "learning_rate": 1.8744987375612656e-05, - "loss": 0.0582, - "step": 25260 - }, - { - "epoch": 1.8765780484182386, - "grad_norm": 1.7695233821868896, - "learning_rate": 1.8740531709490568e-05, - "loss": 0.0797, - "step": 25270 - }, - { - "epoch": 1.877320659438586, - "grad_norm": 1.5914796590805054, - "learning_rate": 1.8736076043368483e-05, - "loss": 0.089, - "step": 25280 - }, - { - "epoch": 1.8780632704589335, - "grad_norm": 3.123690128326416, - "learning_rate": 1.87316203772464e-05, - "loss": 0.069, - "step": 25290 - }, - { - "epoch": 1.8788058814792812, - "grad_norm": 2.2254252433776855, - "learning_rate": 1.8727164711124313e-05, - "loss": 0.1029, - "step": 25300 - }, - { - "epoch": 1.8795484924996289, - "grad_norm": 1.571058750152588, - "learning_rate": 1.8722709045002228e-05, - "loss": 0.0705, - "step": 25310 - }, - { - "epoch": 1.8802911035199763, - "grad_norm": 1.3334083557128906, - "learning_rate": 1.8718253378880146e-05, - "loss": 0.0961, - "step": 25320 - }, - { - "epoch": 1.8810337145403238, - "grad_norm": 3.364617109298706, - "learning_rate": 1.8713797712758058e-05, - "loss": 0.0876, - "step": 25330 - }, - { - "epoch": 1.8817763255606712, - "grad_norm": 0.7864534258842468, - "learning_rate": 1.8709342046635973e-05, - "loss": 0.0615, - "step": 25340 - }, - { - "epoch": 1.8825189365810189, - "grad_norm": 2.1435587406158447, - "learning_rate": 1.8704886380513884e-05, - "loss": 0.0666, - "step": 25350 - }, - { - "epoch": 1.8832615476013665, - "grad_norm": 1.164170503616333, - "learning_rate": 1.8700430714391803e-05, - "loss": 0.0619, - "step": 25360 - }, - { - "epoch": 1.884004158621714, - "grad_norm": 2.059136390686035, - "learning_rate": 1.8695975048269718e-05, - "loss": 0.0787, - "step": 25370 - }, - { - "epoch": 1.8847467696420614, - "grad_norm": 0.4554833173751831, - "learning_rate": 1.869151938214763e-05, - "loss": 0.0513, - "step": 25380 - }, - { - "epoch": 1.885489380662409, - "grad_norm": 0.9922448396682739, - "learning_rate": 1.8687063716025548e-05, - "loss": 0.0634, - "step": 25390 - }, - { - "epoch": 1.8862319916827566, - "grad_norm": 1.9916400909423828, - "learning_rate": 1.8682608049903463e-05, - "loss": 0.0695, - "step": 25400 - }, - { - "epoch": 1.8869746027031042, - "grad_norm": 0.31023046374320984, - "learning_rate": 1.8678152383781374e-05, - "loss": 0.0799, - "step": 25410 - }, - { - "epoch": 1.8877172137234517, - "grad_norm": 1.627617597579956, - "learning_rate": 1.8673696717659293e-05, - "loss": 0.0685, - "step": 25420 - }, - { - "epoch": 1.8884598247437991, - "grad_norm": 1.0137081146240234, - "learning_rate": 1.8669241051537208e-05, - "loss": 0.0887, - "step": 25430 - }, - { - "epoch": 1.8892024357641466, - "grad_norm": 1.8060729503631592, - "learning_rate": 1.866478538541512e-05, - "loss": 0.0443, - "step": 25440 - }, - { - "epoch": 1.8899450467844943, - "grad_norm": 1.671414852142334, - "learning_rate": 1.8660329719293034e-05, - "loss": 0.078, - "step": 25450 - }, - { - "epoch": 1.890687657804842, - "grad_norm": 1.3879966735839844, - "learning_rate": 1.865587405317095e-05, - "loss": 0.0741, - "step": 25460 - }, - { - "epoch": 1.8914302688251894, - "grad_norm": 2.2087647914886475, - "learning_rate": 1.8651418387048864e-05, - "loss": 0.0602, - "step": 25470 - }, - { - "epoch": 1.8921728798455368, - "grad_norm": 1.051397681236267, - "learning_rate": 1.864696272092678e-05, - "loss": 0.0936, - "step": 25480 - }, - { - "epoch": 1.8929154908658843, - "grad_norm": 1.432411789894104, - "learning_rate": 1.8642507054804694e-05, - "loss": 0.0838, - "step": 25490 - }, - { - "epoch": 1.893658101886232, - "grad_norm": 2.1286797523498535, - "learning_rate": 1.863805138868261e-05, - "loss": 0.0594, - "step": 25500 - }, - { - "epoch": 1.8944007129065796, - "grad_norm": 2.0686354637145996, - "learning_rate": 1.8633595722560524e-05, - "loss": 0.0601, - "step": 25510 - }, - { - "epoch": 1.895143323926927, - "grad_norm": 1.1248515844345093, - "learning_rate": 1.8629140056438436e-05, - "loss": 0.0595, - "step": 25520 - }, - { - "epoch": 1.8958859349472745, - "grad_norm": 1.444861650466919, - "learning_rate": 1.8624684390316354e-05, - "loss": 0.0484, - "step": 25530 - }, - { - "epoch": 1.8966285459676222, - "grad_norm": 0.935176432132721, - "learning_rate": 1.8620228724194266e-05, - "loss": 0.0826, - "step": 25540 - }, - { - "epoch": 1.8973711569879697, - "grad_norm": 1.5523897409439087, - "learning_rate": 1.861577305807218e-05, - "loss": 0.0676, - "step": 25550 - }, - { - "epoch": 1.8981137680083173, - "grad_norm": 0.7804394364356995, - "learning_rate": 1.86113173919501e-05, - "loss": 0.0848, - "step": 25560 - }, - { - "epoch": 1.8988563790286648, - "grad_norm": 2.2378127574920654, - "learning_rate": 1.860686172582801e-05, - "loss": 0.0884, - "step": 25570 - }, - { - "epoch": 1.8995989900490122, - "grad_norm": 0.5532150268554688, - "learning_rate": 1.8602406059705926e-05, - "loss": 0.0641, - "step": 25580 - }, - { - "epoch": 1.90034160106936, - "grad_norm": 0.7949833869934082, - "learning_rate": 1.859795039358384e-05, - "loss": 0.0543, - "step": 25590 - }, - { - "epoch": 1.9010842120897076, - "grad_norm": 1.0134397745132446, - "learning_rate": 1.8593494727461756e-05, - "loss": 0.0702, - "step": 25600 - }, - { - "epoch": 1.901826823110055, - "grad_norm": 1.708309531211853, - "learning_rate": 1.858903906133967e-05, - "loss": 0.0884, - "step": 25610 - }, - { - "epoch": 1.9025694341304025, - "grad_norm": 0.8126017451286316, - "learning_rate": 1.8584583395217586e-05, - "loss": 0.0615, - "step": 25620 - }, - { - "epoch": 1.90331204515075, - "grad_norm": 1.7196837663650513, - "learning_rate": 1.85801277290955e-05, - "loss": 0.0671, - "step": 25630 - }, - { - "epoch": 1.9040546561710976, - "grad_norm": 2.439037561416626, - "learning_rate": 1.8575672062973416e-05, - "loss": 0.0789, - "step": 25640 - }, - { - "epoch": 1.9047972671914453, - "grad_norm": 0.7953950762748718, - "learning_rate": 1.8571216396851327e-05, - "loss": 0.0709, - "step": 25650 - }, - { - "epoch": 1.9055398782117927, - "grad_norm": 1.4773277044296265, - "learning_rate": 1.8566760730729246e-05, - "loss": 0.0926, - "step": 25660 - }, - { - "epoch": 1.9062824892321402, - "grad_norm": 2.0613648891448975, - "learning_rate": 1.856230506460716e-05, - "loss": 0.0926, - "step": 25670 - }, - { - "epoch": 1.9070251002524876, - "grad_norm": 1.0905838012695312, - "learning_rate": 1.8557849398485072e-05, - "loss": 0.0546, - "step": 25680 - }, - { - "epoch": 1.9077677112728353, - "grad_norm": 0.6521821618080139, - "learning_rate": 1.8553393732362987e-05, - "loss": 0.0817, - "step": 25690 - }, - { - "epoch": 1.908510322293183, - "grad_norm": 1.17433762550354, - "learning_rate": 1.8548938066240906e-05, - "loss": 0.0703, - "step": 25700 - }, - { - "epoch": 1.9092529333135304, - "grad_norm": 1.6955013275146484, - "learning_rate": 1.8544482400118817e-05, - "loss": 0.0973, - "step": 25710 - }, - { - "epoch": 1.9099955443338779, - "grad_norm": 2.2255401611328125, - "learning_rate": 1.8540026733996732e-05, - "loss": 0.0678, - "step": 25720 - }, - { - "epoch": 1.9107381553542253, - "grad_norm": 1.3039476871490479, - "learning_rate": 1.853557106787465e-05, - "loss": 0.0667, - "step": 25730 - }, - { - "epoch": 1.911480766374573, - "grad_norm": 0.46905070543289185, - "learning_rate": 1.8531115401752562e-05, - "loss": 0.0575, - "step": 25740 - }, - { - "epoch": 1.9122233773949207, - "grad_norm": 0.5787822008132935, - "learning_rate": 1.8526659735630477e-05, - "loss": 0.0515, - "step": 25750 - }, - { - "epoch": 1.912965988415268, - "grad_norm": 1.4693628549575806, - "learning_rate": 1.852220406950839e-05, - "loss": 0.0929, - "step": 25760 - }, - { - "epoch": 1.9137085994356156, - "grad_norm": 1.5744353532791138, - "learning_rate": 1.8517748403386307e-05, - "loss": 0.0589, - "step": 25770 - }, - { - "epoch": 1.914451210455963, - "grad_norm": 1.8583874702453613, - "learning_rate": 1.8513292737264222e-05, - "loss": 0.0586, - "step": 25780 - }, - { - "epoch": 1.9151938214763107, - "grad_norm": 2.5291054248809814, - "learning_rate": 1.8508837071142134e-05, - "loss": 0.0623, - "step": 25790 - }, - { - "epoch": 1.9159364324966583, - "grad_norm": 0.9900248050689697, - "learning_rate": 1.8504381405020052e-05, - "loss": 0.0273, - "step": 25800 - }, - { - "epoch": 1.9166790435170058, - "grad_norm": 1.1880460977554321, - "learning_rate": 1.8499925738897967e-05, - "loss": 0.0901, - "step": 25810 - }, - { - "epoch": 1.9174216545373532, - "grad_norm": 1.0495351552963257, - "learning_rate": 1.849547007277588e-05, - "loss": 0.0592, - "step": 25820 - }, - { - "epoch": 1.918164265557701, - "grad_norm": 2.6768858432769775, - "learning_rate": 1.8491014406653797e-05, - "loss": 0.0908, - "step": 25830 - }, - { - "epoch": 1.9189068765780484, - "grad_norm": 1.1184509992599487, - "learning_rate": 1.8486558740531712e-05, - "loss": 0.0594, - "step": 25840 - }, - { - "epoch": 1.919649487598396, - "grad_norm": 4.1254401206970215, - "learning_rate": 1.8482103074409624e-05, - "loss": 0.0834, - "step": 25850 - }, - { - "epoch": 1.9203920986187435, - "grad_norm": 0.6216328144073486, - "learning_rate": 1.847764740828754e-05, - "loss": 0.0647, - "step": 25860 - }, - { - "epoch": 1.921134709639091, - "grad_norm": 2.5887441635131836, - "learning_rate": 1.8473191742165454e-05, - "loss": 0.063, - "step": 25870 - }, - { - "epoch": 1.9218773206594386, - "grad_norm": 1.4074640274047852, - "learning_rate": 1.846873607604337e-05, - "loss": 0.0643, - "step": 25880 - }, - { - "epoch": 1.9226199316797863, - "grad_norm": 1.3061528205871582, - "learning_rate": 1.8464280409921284e-05, - "loss": 0.0518, - "step": 25890 - }, - { - "epoch": 1.9233625427001337, - "grad_norm": 4.136571884155273, - "learning_rate": 1.84598247437992e-05, - "loss": 0.0546, - "step": 25900 - }, - { - "epoch": 1.9241051537204812, - "grad_norm": 2.307090997695923, - "learning_rate": 1.8455369077677114e-05, - "loss": 0.0685, - "step": 25910 - }, - { - "epoch": 1.9248477647408286, - "grad_norm": 1.6441222429275513, - "learning_rate": 1.845091341155503e-05, - "loss": 0.0886, - "step": 25920 - }, - { - "epoch": 1.9255903757611763, - "grad_norm": 0.6175203323364258, - "learning_rate": 1.844645774543294e-05, - "loss": 0.0401, - "step": 25930 - }, - { - "epoch": 1.926332986781524, - "grad_norm": 0.5794946551322937, - "learning_rate": 1.844200207931086e-05, - "loss": 0.0627, - "step": 25940 - }, - { - "epoch": 1.9270755978018714, - "grad_norm": 0.15005835890769958, - "learning_rate": 1.843754641318877e-05, - "loss": 0.0369, - "step": 25950 - }, - { - "epoch": 1.9278182088222189, - "grad_norm": 2.209848403930664, - "learning_rate": 1.8433090747066685e-05, - "loss": 0.0724, - "step": 25960 - }, - { - "epoch": 1.9285608198425663, - "grad_norm": 1.4631694555282593, - "learning_rate": 1.8428635080944604e-05, - "loss": 0.0872, - "step": 25970 - }, - { - "epoch": 1.929303430862914, - "grad_norm": 1.996635913848877, - "learning_rate": 1.8424179414822515e-05, - "loss": 0.0592, - "step": 25980 - }, - { - "epoch": 1.9300460418832617, - "grad_norm": 1.6855813264846802, - "learning_rate": 1.841972374870043e-05, - "loss": 0.0902, - "step": 25990 - }, - { - "epoch": 1.9307886529036091, - "grad_norm": 1.024795413017273, - "learning_rate": 1.8415268082578345e-05, - "loss": 0.0532, - "step": 26000 - }, - { - "epoch": 1.9315312639239566, - "grad_norm": 0.7905906438827515, - "learning_rate": 1.841081241645626e-05, - "loss": 0.0524, - "step": 26010 - }, - { - "epoch": 1.932273874944304, - "grad_norm": 1.406113624572754, - "learning_rate": 1.8406356750334175e-05, - "loss": 0.0471, - "step": 26020 - }, - { - "epoch": 1.9330164859646517, - "grad_norm": 2.197995185852051, - "learning_rate": 1.840190108421209e-05, - "loss": 0.0518, - "step": 26030 - }, - { - "epoch": 1.9337590969849994, - "grad_norm": 0.5076370239257812, - "learning_rate": 1.8397445418090005e-05, - "loss": 0.056, - "step": 26040 - }, - { - "epoch": 1.9345017080053468, - "grad_norm": 0.6380198001861572, - "learning_rate": 1.839298975196792e-05, - "loss": 0.0787, - "step": 26050 - }, - { - "epoch": 1.9352443190256943, - "grad_norm": 0.7328625321388245, - "learning_rate": 1.8388534085845832e-05, - "loss": 0.064, - "step": 26060 - }, - { - "epoch": 1.9359869300460417, - "grad_norm": 1.652590274810791, - "learning_rate": 1.838407841972375e-05, - "loss": 0.0843, - "step": 26070 - }, - { - "epoch": 1.9367295410663894, - "grad_norm": 3.7497518062591553, - "learning_rate": 1.8379622753601665e-05, - "loss": 0.0686, - "step": 26080 - }, - { - "epoch": 1.937472152086737, - "grad_norm": 0.787777304649353, - "learning_rate": 1.8375167087479577e-05, - "loss": 0.1322, - "step": 26090 - }, - { - "epoch": 1.9382147631070845, - "grad_norm": 2.129948616027832, - "learning_rate": 1.8370711421357492e-05, - "loss": 0.0428, - "step": 26100 - }, - { - "epoch": 1.938957374127432, - "grad_norm": 3.463418960571289, - "learning_rate": 1.836625575523541e-05, - "loss": 0.0811, - "step": 26110 - }, - { - "epoch": 1.9396999851477796, - "grad_norm": 0.8304588794708252, - "learning_rate": 1.8361800089113322e-05, - "loss": 0.0744, - "step": 26120 - }, - { - "epoch": 1.940442596168127, - "grad_norm": 0.47356998920440674, - "learning_rate": 1.8357344422991237e-05, - "loss": 0.0558, - "step": 26130 - }, - { - "epoch": 1.9411852071884748, - "grad_norm": 1.1219099760055542, - "learning_rate": 1.8352888756869155e-05, - "loss": 0.0752, - "step": 26140 - }, - { - "epoch": 1.9419278182088222, - "grad_norm": 1.6041889190673828, - "learning_rate": 1.8348433090747067e-05, - "loss": 0.0737, - "step": 26150 - }, - { - "epoch": 1.9426704292291697, - "grad_norm": 0.80296790599823, - "learning_rate": 1.8343977424624982e-05, - "loss": 0.074, - "step": 26160 - }, - { - "epoch": 1.9434130402495173, - "grad_norm": 2.8766558170318604, - "learning_rate": 1.8339521758502893e-05, - "loss": 0.0874, - "step": 26170 - }, - { - "epoch": 1.944155651269865, - "grad_norm": 0.7357403635978699, - "learning_rate": 1.8335066092380812e-05, - "loss": 0.0765, - "step": 26180 - }, - { - "epoch": 1.9448982622902125, - "grad_norm": 2.0172839164733887, - "learning_rate": 1.8330610426258727e-05, - "loss": 0.0734, - "step": 26190 - }, - { - "epoch": 1.94564087331056, - "grad_norm": 1.475164771080017, - "learning_rate": 1.832615476013664e-05, - "loss": 0.092, - "step": 26200 - }, - { - "epoch": 1.9463834843309074, - "grad_norm": 0.9614417552947998, - "learning_rate": 1.8321699094014557e-05, - "loss": 0.0785, - "step": 26210 - }, - { - "epoch": 1.947126095351255, - "grad_norm": 2.393979549407959, - "learning_rate": 1.8317243427892472e-05, - "loss": 0.0762, - "step": 26220 - }, - { - "epoch": 1.9478687063716027, - "grad_norm": 2.239128589630127, - "learning_rate": 1.8312787761770383e-05, - "loss": 0.0603, - "step": 26230 - }, - { - "epoch": 1.9486113173919501, - "grad_norm": 0.7804839015007019, - "learning_rate": 1.8308332095648302e-05, - "loss": 0.0607, - "step": 26240 - }, - { - "epoch": 1.9493539284122976, - "grad_norm": 1.400680422782898, - "learning_rate": 1.8303876429526217e-05, - "loss": 0.0694, - "step": 26250 - }, - { - "epoch": 1.950096539432645, - "grad_norm": 1.6606298685073853, - "learning_rate": 1.829942076340413e-05, - "loss": 0.0692, - "step": 26260 - }, - { - "epoch": 1.9508391504529927, - "grad_norm": 1.0099084377288818, - "learning_rate": 1.8294965097282043e-05, - "loss": 0.0588, - "step": 26270 - }, - { - "epoch": 1.9515817614733404, - "grad_norm": 1.2126892805099487, - "learning_rate": 1.829050943115996e-05, - "loss": 0.0827, - "step": 26280 - }, - { - "epoch": 1.9523243724936878, - "grad_norm": 2.0662033557891846, - "learning_rate": 1.8286053765037873e-05, - "loss": 0.057, - "step": 26290 - }, - { - "epoch": 1.9530669835140353, - "grad_norm": 1.9331927299499512, - "learning_rate": 1.828159809891579e-05, - "loss": 0.073, - "step": 26300 - }, - { - "epoch": 1.9538095945343827, - "grad_norm": 2.7068533897399902, - "learning_rate": 1.8277142432793703e-05, - "loss": 0.0848, - "step": 26310 - }, - { - "epoch": 1.9545522055547304, - "grad_norm": 0.645256519317627, - "learning_rate": 1.827268676667162e-05, - "loss": 0.0731, - "step": 26320 - }, - { - "epoch": 1.955294816575078, - "grad_norm": 0.46004560589790344, - "learning_rate": 1.8268231100549533e-05, - "loss": 0.0556, - "step": 26330 - }, - { - "epoch": 1.9560374275954255, - "grad_norm": 2.7907826900482178, - "learning_rate": 1.8263775434427445e-05, - "loss": 0.076, - "step": 26340 - }, - { - "epoch": 1.956780038615773, - "grad_norm": 0.48506757616996765, - "learning_rate": 1.8259319768305363e-05, - "loss": 0.0669, - "step": 26350 - }, - { - "epoch": 1.9575226496361204, - "grad_norm": 2.199068546295166, - "learning_rate": 1.825486410218328e-05, - "loss": 0.0996, - "step": 26360 - }, - { - "epoch": 1.958265260656468, - "grad_norm": 1.1990214586257935, - "learning_rate": 1.825040843606119e-05, - "loss": 0.0453, - "step": 26370 - }, - { - "epoch": 1.9590078716768158, - "grad_norm": 1.2882795333862305, - "learning_rate": 1.824595276993911e-05, - "loss": 0.064, - "step": 26380 - }, - { - "epoch": 1.9597504826971632, - "grad_norm": 0.6526997089385986, - "learning_rate": 1.824149710381702e-05, - "loss": 0.0817, - "step": 26390 - }, - { - "epoch": 1.9604930937175107, - "grad_norm": 0.8225412368774414, - "learning_rate": 1.8237041437694935e-05, - "loss": 0.0611, - "step": 26400 - }, - { - "epoch": 1.9612357047378584, - "grad_norm": 0.5846786499023438, - "learning_rate": 1.8232585771572853e-05, - "loss": 0.0877, - "step": 26410 - }, - { - "epoch": 1.9619783157582058, - "grad_norm": 3.2029032707214355, - "learning_rate": 1.8228130105450765e-05, - "loss": 0.0773, - "step": 26420 - }, - { - "epoch": 1.9627209267785535, - "grad_norm": 1.3183096647262573, - "learning_rate": 1.822367443932868e-05, - "loss": 0.052, - "step": 26430 - }, - { - "epoch": 1.963463537798901, - "grad_norm": 0.731730043888092, - "learning_rate": 1.8219218773206595e-05, - "loss": 0.081, - "step": 26440 - }, - { - "epoch": 1.9642061488192484, - "grad_norm": 1.890268087387085, - "learning_rate": 1.821476310708451e-05, - "loss": 0.0529, - "step": 26450 - }, - { - "epoch": 1.964948759839596, - "grad_norm": 1.2270501852035522, - "learning_rate": 1.8210307440962425e-05, - "loss": 0.0869, - "step": 26460 - }, - { - "epoch": 1.9656913708599437, - "grad_norm": 0.9441844820976257, - "learning_rate": 1.8205851774840337e-05, - "loss": 0.0552, - "step": 26470 - }, - { - "epoch": 1.9664339818802912, - "grad_norm": 1.5903592109680176, - "learning_rate": 1.8201396108718255e-05, - "loss": 0.0693, - "step": 26480 - }, - { - "epoch": 1.9671765929006386, - "grad_norm": 3.176476001739502, - "learning_rate": 1.819694044259617e-05, - "loss": 0.0863, - "step": 26490 - }, - { - "epoch": 1.967919203920986, - "grad_norm": 0.41649898886680603, - "learning_rate": 1.819248477647408e-05, - "loss": 0.0417, - "step": 26500 - }, - { - "epoch": 1.9686618149413337, - "grad_norm": 1.182588815689087, - "learning_rate": 1.8188029110351997e-05, - "loss": 0.0692, - "step": 26510 - }, - { - "epoch": 1.9694044259616814, - "grad_norm": 1.535315990447998, - "learning_rate": 1.8183573444229915e-05, - "loss": 0.056, - "step": 26520 - }, - { - "epoch": 1.9701470369820289, - "grad_norm": 0.7827800512313843, - "learning_rate": 1.8179117778107826e-05, - "loss": 0.0994, - "step": 26530 - }, - { - "epoch": 1.9708896480023763, - "grad_norm": 1.4649193286895752, - "learning_rate": 1.817466211198574e-05, - "loss": 0.0624, - "step": 26540 - }, - { - "epoch": 1.9716322590227238, - "grad_norm": 2.715514898300171, - "learning_rate": 1.817020644586366e-05, - "loss": 0.0686, - "step": 26550 - }, - { - "epoch": 1.9723748700430714, - "grad_norm": 2.133049964904785, - "learning_rate": 1.816575077974157e-05, - "loss": 0.0645, - "step": 26560 - }, - { - "epoch": 1.973117481063419, - "grad_norm": 1.230670690536499, - "learning_rate": 1.8161295113619486e-05, - "loss": 0.0873, - "step": 26570 - }, - { - "epoch": 1.9738600920837666, - "grad_norm": 1.5158164501190186, - "learning_rate": 1.8156839447497398e-05, - "loss": 0.0826, - "step": 26580 - }, - { - "epoch": 1.974602703104114, - "grad_norm": 2.380052089691162, - "learning_rate": 1.8152383781375316e-05, - "loss": 0.0839, - "step": 26590 - }, - { - "epoch": 1.9753453141244615, - "grad_norm": 0.9779214262962341, - "learning_rate": 1.814792811525323e-05, - "loss": 0.1189, - "step": 26600 - }, - { - "epoch": 1.9760879251448091, - "grad_norm": 0.7717707753181458, - "learning_rate": 1.8143472449131143e-05, - "loss": 0.0651, - "step": 26610 - }, - { - "epoch": 1.9768305361651568, - "grad_norm": 1.0977226495742798, - "learning_rate": 1.813901678300906e-05, - "loss": 0.052, - "step": 26620 - }, - { - "epoch": 1.9775731471855043, - "grad_norm": 1.963529348373413, - "learning_rate": 1.8134561116886976e-05, - "loss": 0.06, - "step": 26630 - }, - { - "epoch": 1.9783157582058517, - "grad_norm": 0.4046013653278351, - "learning_rate": 1.8130105450764888e-05, - "loss": 0.0571, - "step": 26640 - }, - { - "epoch": 1.9790583692261992, - "grad_norm": 1.8156684637069702, - "learning_rate": 1.8125649784642806e-05, - "loss": 0.0798, - "step": 26650 - }, - { - "epoch": 1.9798009802465468, - "grad_norm": 0.9352402687072754, - "learning_rate": 1.812119411852072e-05, - "loss": 0.0706, - "step": 26660 - }, - { - "epoch": 1.9805435912668945, - "grad_norm": 2.6892099380493164, - "learning_rate": 1.8116738452398633e-05, - "loss": 0.0716, - "step": 26670 - }, - { - "epoch": 1.981286202287242, - "grad_norm": 1.3051759004592896, - "learning_rate": 1.8112282786276548e-05, - "loss": 0.0775, - "step": 26680 - }, - { - "epoch": 1.9820288133075894, - "grad_norm": 0.3577052056789398, - "learning_rate": 1.8107827120154463e-05, - "loss": 0.0396, - "step": 26690 - }, - { - "epoch": 1.982771424327937, - "grad_norm": 1.0343585014343262, - "learning_rate": 1.8103371454032378e-05, - "loss": 0.064, - "step": 26700 - }, - { - "epoch": 1.9835140353482845, - "grad_norm": 2.462855100631714, - "learning_rate": 1.8098915787910293e-05, - "loss": 0.0882, - "step": 26710 - }, - { - "epoch": 1.9842566463686322, - "grad_norm": 2.172545909881592, - "learning_rate": 1.8094460121788208e-05, - "loss": 0.0716, - "step": 26720 - }, - { - "epoch": 1.9849992573889796, - "grad_norm": 1.9032946825027466, - "learning_rate": 1.8090004455666123e-05, - "loss": 0.0691, - "step": 26730 - }, - { - "epoch": 1.985741868409327, - "grad_norm": 0.6433393359184265, - "learning_rate": 1.8085548789544038e-05, - "loss": 0.0452, - "step": 26740 - }, - { - "epoch": 1.9864844794296748, - "grad_norm": 2.729414939880371, - "learning_rate": 1.808109312342195e-05, - "loss": 0.0704, - "step": 26750 - }, - { - "epoch": 1.9872270904500224, - "grad_norm": 1.0757064819335938, - "learning_rate": 1.8076637457299868e-05, - "loss": 0.0853, - "step": 26760 - }, - { - "epoch": 1.9879697014703699, - "grad_norm": 0.4232407212257385, - "learning_rate": 1.8072181791177783e-05, - "loss": 0.0757, - "step": 26770 - }, - { - "epoch": 1.9887123124907173, - "grad_norm": 1.7221150398254395, - "learning_rate": 1.8067726125055695e-05, - "loss": 0.0709, - "step": 26780 - }, - { - "epoch": 1.9894549235110648, - "grad_norm": 1.9497733116149902, - "learning_rate": 1.8063270458933613e-05, - "loss": 0.0886, - "step": 26790 - }, - { - "epoch": 1.9901975345314125, - "grad_norm": 2.333503484725952, - "learning_rate": 1.8058814792811525e-05, - "loss": 0.0528, - "step": 26800 - }, - { - "epoch": 1.9909401455517601, - "grad_norm": 0.809281051158905, - "learning_rate": 1.805435912668944e-05, - "loss": 0.0683, - "step": 26810 - }, - { - "epoch": 1.9916827565721076, - "grad_norm": 3.6550369262695312, - "learning_rate": 1.8049903460567358e-05, - "loss": 0.0725, - "step": 26820 - }, - { - "epoch": 1.992425367592455, - "grad_norm": 2.8974320888519287, - "learning_rate": 1.804544779444527e-05, - "loss": 0.0892, - "step": 26830 - }, - { - "epoch": 1.9931679786128025, - "grad_norm": 0.9259861707687378, - "learning_rate": 1.8040992128323185e-05, - "loss": 0.0665, - "step": 26840 - }, - { - "epoch": 1.9939105896331502, - "grad_norm": 2.125751495361328, - "learning_rate": 1.80365364622011e-05, - "loss": 0.0634, - "step": 26850 - }, - { - "epoch": 1.9946532006534978, - "grad_norm": 1.0825103521347046, - "learning_rate": 1.8032080796079015e-05, - "loss": 0.0708, - "step": 26860 - }, - { - "epoch": 1.9953958116738453, - "grad_norm": 1.3801538944244385, - "learning_rate": 1.802762512995693e-05, - "loss": 0.0787, - "step": 26870 - }, - { - "epoch": 1.9961384226941927, - "grad_norm": 0.7324304580688477, - "learning_rate": 1.8023169463834845e-05, - "loss": 0.0689, - "step": 26880 - }, - { - "epoch": 1.9968810337145402, - "grad_norm": 0.9306546449661255, - "learning_rate": 1.801871379771276e-05, - "loss": 0.0895, - "step": 26890 - }, - { - "epoch": 1.9976236447348878, - "grad_norm": 3.5003910064697266, - "learning_rate": 1.8014258131590675e-05, - "loss": 0.057, - "step": 26900 - }, - { - "epoch": 1.9983662557552355, - "grad_norm": 1.8445905447006226, - "learning_rate": 1.8009802465468586e-05, - "loss": 0.0806, - "step": 26910 - }, - { - "epoch": 1.999108866775583, - "grad_norm": 2.1891441345214844, - "learning_rate": 1.80053467993465e-05, - "loss": 0.0742, - "step": 26920 - }, - { - "epoch": 1.9998514777959304, - "grad_norm": 2.6680564880371094, - "learning_rate": 1.800089113322442e-05, - "loss": 0.0612, - "step": 26930 - }, - { - "epoch": 2.0, - "eval_f1": 0.0, - "eval_loss": 0.05881134420633316, - "eval_runtime": 790.8114, - "eval_samples_per_second": 48.076, - "eval_steps_per_second": 3.006, - "step": 26932 - }, - { - "epoch": 2.000594088816278, - "grad_norm": 1.1317378282546997, - "learning_rate": 1.799643546710233e-05, - "loss": 0.0591, - "step": 26940 - }, - { - "epoch": 2.0013366998366258, - "grad_norm": 1.214632272720337, - "learning_rate": 1.7991979800980246e-05, - "loss": 0.0469, - "step": 26950 - }, - { - "epoch": 2.002079310856973, - "grad_norm": 0.5876008868217468, - "learning_rate": 1.7987524134858164e-05, - "loss": 0.0565, - "step": 26960 - }, - { - "epoch": 2.0028219218773207, - "grad_norm": 0.7250917553901672, - "learning_rate": 1.7983068468736076e-05, - "loss": 0.0597, - "step": 26970 - }, - { - "epoch": 2.003564532897668, - "grad_norm": 1.3954814672470093, - "learning_rate": 1.797861280261399e-05, - "loss": 0.098, - "step": 26980 - }, - { - "epoch": 2.0043071439180156, - "grad_norm": 0.8022347092628479, - "learning_rate": 1.7974157136491906e-05, - "loss": 0.0634, - "step": 26990 - }, - { - "epoch": 2.0050497549383635, - "grad_norm": 1.9625482559204102, - "learning_rate": 1.796970147036982e-05, - "loss": 0.0774, - "step": 27000 - }, - { - "epoch": 2.005792365958711, - "grad_norm": 1.0475565195083618, - "learning_rate": 1.7965245804247736e-05, - "loss": 0.0798, - "step": 27010 - }, - { - "epoch": 2.0065349769790584, - "grad_norm": 1.3007713556289673, - "learning_rate": 1.7960790138125648e-05, - "loss": 0.0721, - "step": 27020 - }, - { - "epoch": 2.007277587999406, - "grad_norm": 1.5825034379959106, - "learning_rate": 1.7956334472003566e-05, - "loss": 0.0741, - "step": 27030 - }, - { - "epoch": 2.0080201990197533, - "grad_norm": 1.4288562536239624, - "learning_rate": 1.795187880588148e-05, - "loss": 0.0606, - "step": 27040 - }, - { - "epoch": 2.008762810040101, - "grad_norm": 1.6191655397415161, - "learning_rate": 1.7947423139759393e-05, - "loss": 0.0893, - "step": 27050 - }, - { - "epoch": 2.0095054210604486, - "grad_norm": 3.0551598072052, - "learning_rate": 1.794296747363731e-05, - "loss": 0.0609, - "step": 27060 - }, - { - "epoch": 2.010248032080796, - "grad_norm": 0.9543463587760925, - "learning_rate": 1.7938511807515226e-05, - "loss": 0.0685, - "step": 27070 - }, - { - "epoch": 2.0109906431011435, - "grad_norm": 1.9315248727798462, - "learning_rate": 1.7934056141393138e-05, - "loss": 0.0581, - "step": 27080 - }, - { - "epoch": 2.011733254121491, - "grad_norm": 1.0615206956863403, - "learning_rate": 1.7929600475271053e-05, - "loss": 0.0834, - "step": 27090 - }, - { - "epoch": 2.012475865141839, - "grad_norm": 3.055593729019165, - "learning_rate": 1.7925144809148968e-05, - "loss": 0.0777, - "step": 27100 - }, - { - "epoch": 2.0132184761621863, - "grad_norm": 0.9650170207023621, - "learning_rate": 1.7920689143026883e-05, - "loss": 0.0563, - "step": 27110 - }, - { - "epoch": 2.0139610871825337, - "grad_norm": 1.2124733924865723, - "learning_rate": 1.7916233476904798e-05, - "loss": 0.0975, - "step": 27120 - }, - { - "epoch": 2.014703698202881, - "grad_norm": 2.321781635284424, - "learning_rate": 1.7911777810782713e-05, - "loss": 0.0769, - "step": 27130 - }, - { - "epoch": 2.015446309223229, - "grad_norm": 1.0764652490615845, - "learning_rate": 1.7907322144660628e-05, - "loss": 0.0526, - "step": 27140 - }, - { - "epoch": 2.0161889202435765, - "grad_norm": 1.4031742811203003, - "learning_rate": 1.7902866478538543e-05, - "loss": 0.061, - "step": 27150 - }, - { - "epoch": 2.016931531263924, - "grad_norm": 0.9712595343589783, - "learning_rate": 1.7898410812416454e-05, - "loss": 0.0627, - "step": 27160 - }, - { - "epoch": 2.0176741422842714, - "grad_norm": 1.150699496269226, - "learning_rate": 1.7893955146294373e-05, - "loss": 0.0839, - "step": 27170 - }, - { - "epoch": 2.018416753304619, - "grad_norm": 1.0646690130233765, - "learning_rate": 1.7889499480172288e-05, - "loss": 0.084, - "step": 27180 - }, - { - "epoch": 2.019159364324967, - "grad_norm": 1.4827255010604858, - "learning_rate": 1.78850438140502e-05, - "loss": 0.0448, - "step": 27190 - }, - { - "epoch": 2.0199019753453142, - "grad_norm": 1.7980319261550903, - "learning_rate": 1.7880588147928118e-05, - "loss": 0.0918, - "step": 27200 - }, - { - "epoch": 2.0206445863656617, - "grad_norm": 1.5512464046478271, - "learning_rate": 1.787613248180603e-05, - "loss": 0.0685, - "step": 27210 - }, - { - "epoch": 2.021387197386009, - "grad_norm": 1.1397250890731812, - "learning_rate": 1.7871676815683944e-05, - "loss": 0.0731, - "step": 27220 - }, - { - "epoch": 2.0221298084063566, - "grad_norm": 2.0807673931121826, - "learning_rate": 1.7867221149561863e-05, - "loss": 0.0851, - "step": 27230 - }, - { - "epoch": 2.0228724194267045, - "grad_norm": 3.3200225830078125, - "learning_rate": 1.7862765483439774e-05, - "loss": 0.0615, - "step": 27240 - }, - { - "epoch": 2.023615030447052, - "grad_norm": 1.3116739988327026, - "learning_rate": 1.785830981731769e-05, - "loss": 0.0569, - "step": 27250 - }, - { - "epoch": 2.0243576414673994, - "grad_norm": 1.8456593751907349, - "learning_rate": 1.7853854151195604e-05, - "loss": 0.0873, - "step": 27260 - }, - { - "epoch": 2.025100252487747, - "grad_norm": 1.7277987003326416, - "learning_rate": 1.784939848507352e-05, - "loss": 0.0576, - "step": 27270 - }, - { - "epoch": 2.0258428635080943, - "grad_norm": 0.4091399013996124, - "learning_rate": 1.7844942818951434e-05, - "loss": 0.0437, - "step": 27280 - }, - { - "epoch": 2.026585474528442, - "grad_norm": 3.058016300201416, - "learning_rate": 1.784048715282935e-05, - "loss": 0.0525, - "step": 27290 - }, - { - "epoch": 2.0273280855487896, - "grad_norm": 3.718642234802246, - "learning_rate": 1.7836031486707264e-05, - "loss": 0.0712, - "step": 27300 - }, - { - "epoch": 2.028070696569137, - "grad_norm": 2.157290458679199, - "learning_rate": 1.783157582058518e-05, - "loss": 0.0617, - "step": 27310 - }, - { - "epoch": 2.0288133075894845, - "grad_norm": 2.4551494121551514, - "learning_rate": 1.782712015446309e-05, - "loss": 0.0888, - "step": 27320 - }, - { - "epoch": 2.029555918609832, - "grad_norm": 1.5772738456726074, - "learning_rate": 1.7822664488341006e-05, - "loss": 0.0713, - "step": 27330 - }, - { - "epoch": 2.03029852963018, - "grad_norm": 0.9461155533790588, - "learning_rate": 1.7818208822218924e-05, - "loss": 0.0612, - "step": 27340 - }, - { - "epoch": 2.0310411406505273, - "grad_norm": 1.6049461364746094, - "learning_rate": 1.7813753156096836e-05, - "loss": 0.063, - "step": 27350 - }, - { - "epoch": 2.0317837516708748, - "grad_norm": 0.41540223360061646, - "learning_rate": 1.780929748997475e-05, - "loss": 0.064, - "step": 27360 - }, - { - "epoch": 2.032526362691222, - "grad_norm": 1.894874095916748, - "learning_rate": 1.780484182385267e-05, - "loss": 0.0669, - "step": 27370 - }, - { - "epoch": 2.0332689737115697, - "grad_norm": 0.4419223964214325, - "learning_rate": 1.780038615773058e-05, - "loss": 0.0402, - "step": 27380 - }, - { - "epoch": 2.0340115847319176, - "grad_norm": 0.962150514125824, - "learning_rate": 1.7795930491608496e-05, - "loss": 0.0849, - "step": 27390 - }, - { - "epoch": 2.034754195752265, - "grad_norm": 1.1691762208938599, - "learning_rate": 1.779147482548641e-05, - "loss": 0.0736, - "step": 27400 - }, - { - "epoch": 2.0354968067726125, - "grad_norm": 1.5563279390335083, - "learning_rate": 1.7787019159364326e-05, - "loss": 0.0569, - "step": 27410 - }, - { - "epoch": 2.03623941779296, - "grad_norm": 2.58371639251709, - "learning_rate": 1.778256349324224e-05, - "loss": 0.0672, - "step": 27420 - }, - { - "epoch": 2.0369820288133074, - "grad_norm": 0.5150777101516724, - "learning_rate": 1.7778107827120152e-05, - "loss": 0.0833, - "step": 27430 - }, - { - "epoch": 2.0377246398336553, - "grad_norm": 2.1422712802886963, - "learning_rate": 1.777365216099807e-05, - "loss": 0.0657, - "step": 27440 - }, - { - "epoch": 2.0384672508540027, - "grad_norm": 1.660101294517517, - "learning_rate": 1.7769196494875986e-05, - "loss": 0.0678, - "step": 27450 - }, - { - "epoch": 2.03920986187435, - "grad_norm": 2.03857159614563, - "learning_rate": 1.7764740828753897e-05, - "loss": 0.0801, - "step": 27460 - }, - { - "epoch": 2.0399524728946976, - "grad_norm": 1.208047866821289, - "learning_rate": 1.7760285162631816e-05, - "loss": 0.0321, - "step": 27470 - }, - { - "epoch": 2.0406950839150455, - "grad_norm": 0.9345359206199646, - "learning_rate": 1.775582949650973e-05, - "loss": 0.0667, - "step": 27480 - }, - { - "epoch": 2.041437694935393, - "grad_norm": 2.416853427886963, - "learning_rate": 1.7751373830387642e-05, - "loss": 0.089, - "step": 27490 - }, - { - "epoch": 2.0421803059557404, - "grad_norm": 0.6501384377479553, - "learning_rate": 1.7746918164265557e-05, - "loss": 0.0976, - "step": 27500 - }, - { - "epoch": 2.042922916976088, - "grad_norm": 1.3015940189361572, - "learning_rate": 1.7742462498143472e-05, - "loss": 0.0723, - "step": 27510 - }, - { - "epoch": 2.0436655279964353, - "grad_norm": 1.5806890726089478, - "learning_rate": 1.7738006832021387e-05, - "loss": 0.0675, - "step": 27520 - }, - { - "epoch": 2.044408139016783, - "grad_norm": 1.8144307136535645, - "learning_rate": 1.7733551165899302e-05, - "loss": 0.0506, - "step": 27530 - }, - { - "epoch": 2.0451507500371306, - "grad_norm": 1.104903221130371, - "learning_rate": 1.7729095499777217e-05, - "loss": 0.0819, - "step": 27540 - }, - { - "epoch": 2.045893361057478, - "grad_norm": 2.584608554840088, - "learning_rate": 1.7724639833655132e-05, - "loss": 0.0699, - "step": 27550 - }, - { - "epoch": 2.0466359720778255, - "grad_norm": 2.7305595874786377, - "learning_rate": 1.7720184167533047e-05, - "loss": 0.0954, - "step": 27560 - }, - { - "epoch": 2.047378583098173, - "grad_norm": 1.475791573524475, - "learning_rate": 1.771572850141096e-05, - "loss": 0.0682, - "step": 27570 - }, - { - "epoch": 2.048121194118521, - "grad_norm": 0.9141472578048706, - "learning_rate": 1.7711272835288877e-05, - "loss": 0.0583, - "step": 27580 - }, - { - "epoch": 2.0488638051388683, - "grad_norm": 1.4412683248519897, - "learning_rate": 1.7706817169166792e-05, - "loss": 0.0739, - "step": 27590 - }, - { - "epoch": 2.049606416159216, - "grad_norm": 2.051802158355713, - "learning_rate": 1.7702361503044704e-05, - "loss": 0.0864, - "step": 27600 - }, - { - "epoch": 2.0503490271795632, - "grad_norm": 1.61028254032135, - "learning_rate": 1.7697905836922622e-05, - "loss": 0.0379, - "step": 27610 - }, - { - "epoch": 2.0510916381999107, - "grad_norm": 1.2123854160308838, - "learning_rate": 1.7693450170800534e-05, - "loss": 0.0821, - "step": 27620 - }, - { - "epoch": 2.0518342492202586, - "grad_norm": 2.0213537216186523, - "learning_rate": 1.768899450467845e-05, - "loss": 0.0724, - "step": 27630 - }, - { - "epoch": 2.052576860240606, - "grad_norm": 1.507546067237854, - "learning_rate": 1.7684538838556367e-05, - "loss": 0.0844, - "step": 27640 - }, - { - "epoch": 2.0533194712609535, - "grad_norm": 1.3332023620605469, - "learning_rate": 1.768008317243428e-05, - "loss": 0.0679, - "step": 27650 - }, - { - "epoch": 2.054062082281301, - "grad_norm": 1.5484570264816284, - "learning_rate": 1.7675627506312194e-05, - "loss": 0.0898, - "step": 27660 - }, - { - "epoch": 2.0548046933016484, - "grad_norm": 1.267923355102539, - "learning_rate": 1.767117184019011e-05, - "loss": 0.1073, - "step": 27670 - }, - { - "epoch": 2.0555473043219963, - "grad_norm": 2.02040958404541, - "learning_rate": 1.7666716174068024e-05, - "loss": 0.0747, - "step": 27680 - }, - { - "epoch": 2.0562899153423437, - "grad_norm": 0.6762095093727112, - "learning_rate": 1.766226050794594e-05, - "loss": 0.0501, - "step": 27690 - }, - { - "epoch": 2.057032526362691, - "grad_norm": 2.8425498008728027, - "learning_rate": 1.7657804841823854e-05, - "loss": 0.0708, - "step": 27700 - }, - { - "epoch": 2.0577751373830386, - "grad_norm": 2.2198352813720703, - "learning_rate": 1.765334917570177e-05, - "loss": 0.0625, - "step": 27710 - }, - { - "epoch": 2.0585177484033865, - "grad_norm": 0.9430508613586426, - "learning_rate": 1.7648893509579684e-05, - "loss": 0.0844, - "step": 27720 - }, - { - "epoch": 2.059260359423734, - "grad_norm": 3.4453015327453613, - "learning_rate": 1.7644437843457595e-05, - "loss": 0.0821, - "step": 27730 - }, - { - "epoch": 2.0600029704440814, - "grad_norm": 1.2415636777877808, - "learning_rate": 1.763998217733551e-05, - "loss": 0.0574, - "step": 27740 - }, - { - "epoch": 2.060745581464429, - "grad_norm": 1.088160753250122, - "learning_rate": 1.763552651121343e-05, - "loss": 0.0639, - "step": 27750 - }, - { - "epoch": 2.0614881924847763, - "grad_norm": 0.9937611818313599, - "learning_rate": 1.763107084509134e-05, - "loss": 0.0905, - "step": 27760 - }, - { - "epoch": 2.062230803505124, - "grad_norm": 0.7093477845191956, - "learning_rate": 1.7626615178969255e-05, - "loss": 0.0714, - "step": 27770 - }, - { - "epoch": 2.0629734145254717, - "grad_norm": 2.274669885635376, - "learning_rate": 1.7622159512847174e-05, - "loss": 0.0692, - "step": 27780 - }, - { - "epoch": 2.063716025545819, - "grad_norm": 1.7703497409820557, - "learning_rate": 1.7617703846725085e-05, - "loss": 0.0621, - "step": 27790 - }, - { - "epoch": 2.0644586365661666, - "grad_norm": 0.3393421173095703, - "learning_rate": 1.7613248180603e-05, - "loss": 0.0528, - "step": 27800 - }, - { - "epoch": 2.065201247586514, - "grad_norm": 1.3570091724395752, - "learning_rate": 1.760879251448092e-05, - "loss": 0.0585, - "step": 27810 - }, - { - "epoch": 2.065943858606862, - "grad_norm": 1.6953606605529785, - "learning_rate": 1.760433684835883e-05, - "loss": 0.0678, - "step": 27820 - }, - { - "epoch": 2.0666864696272094, - "grad_norm": 0.6332347393035889, - "learning_rate": 1.7599881182236745e-05, - "loss": 0.0474, - "step": 27830 - }, - { - "epoch": 2.067429080647557, - "grad_norm": 4.120887756347656, - "learning_rate": 1.7595425516114657e-05, - "loss": 0.1038, - "step": 27840 - }, - { - "epoch": 2.0681716916679043, - "grad_norm": 2.6775243282318115, - "learning_rate": 1.7590969849992575e-05, - "loss": 0.0786, - "step": 27850 - }, - { - "epoch": 2.0689143026882517, - "grad_norm": 1.4522796869277954, - "learning_rate": 1.758651418387049e-05, - "loss": 0.0603, - "step": 27860 - }, - { - "epoch": 2.0696569137085996, - "grad_norm": 1.7313041687011719, - "learning_rate": 1.7582058517748402e-05, - "loss": 0.0938, - "step": 27870 - }, - { - "epoch": 2.070399524728947, - "grad_norm": 0.7014159560203552, - "learning_rate": 1.757760285162632e-05, - "loss": 0.0905, - "step": 27880 - }, - { - "epoch": 2.0711421357492945, - "grad_norm": 0.5226468443870544, - "learning_rate": 1.7573147185504235e-05, - "loss": 0.0669, - "step": 27890 - }, - { - "epoch": 2.071884746769642, - "grad_norm": 0.4912814795970917, - "learning_rate": 1.7568691519382147e-05, - "loss": 0.0562, - "step": 27900 - }, - { - "epoch": 2.0726273577899894, - "grad_norm": 0.46344590187072754, - "learning_rate": 1.7564235853260062e-05, - "loss": 0.0722, - "step": 27910 - }, - { - "epoch": 2.0733699688103373, - "grad_norm": 0.5316863656044006, - "learning_rate": 1.7559780187137977e-05, - "loss": 0.0782, - "step": 27920 - }, - { - "epoch": 2.0741125798306848, - "grad_norm": 1.1357983350753784, - "learning_rate": 1.7555324521015892e-05, - "loss": 0.0679, - "step": 27930 - }, - { - "epoch": 2.074855190851032, - "grad_norm": 0.679740309715271, - "learning_rate": 1.7550868854893807e-05, - "loss": 0.0618, - "step": 27940 - }, - { - "epoch": 2.0755978018713797, - "grad_norm": 1.716489553451538, - "learning_rate": 1.7546413188771722e-05, - "loss": 0.1001, - "step": 27950 - }, - { - "epoch": 2.076340412891727, - "grad_norm": 1.4694855213165283, - "learning_rate": 1.7541957522649637e-05, - "loss": 0.068, - "step": 27960 - }, - { - "epoch": 2.077083023912075, - "grad_norm": 2.162365436553955, - "learning_rate": 1.7537501856527552e-05, - "loss": 0.0555, - "step": 27970 - }, - { - "epoch": 2.0778256349324224, - "grad_norm": 1.0880649089813232, - "learning_rate": 1.7533046190405463e-05, - "loss": 0.0558, - "step": 27980 - }, - { - "epoch": 2.07856824595277, - "grad_norm": 2.1188676357269287, - "learning_rate": 1.7528590524283382e-05, - "loss": 0.0862, - "step": 27990 - }, - { - "epoch": 2.0793108569731173, - "grad_norm": 1.4988460540771484, - "learning_rate": 1.7524134858161297e-05, - "loss": 0.0816, - "step": 28000 - }, - { - "epoch": 2.080053467993465, - "grad_norm": 0.9901124238967896, - "learning_rate": 1.751967919203921e-05, - "loss": 0.0772, - "step": 28010 - }, - { - "epoch": 2.0807960790138127, - "grad_norm": 1.7967792749404907, - "learning_rate": 1.7515223525917127e-05, - "loss": 0.062, - "step": 28020 - }, - { - "epoch": 2.08153869003416, - "grad_norm": 1.3273664712905884, - "learning_rate": 1.751076785979504e-05, - "loss": 0.0599, - "step": 28030 - }, - { - "epoch": 2.0822813010545076, - "grad_norm": 2.504648447036743, - "learning_rate": 1.7506312193672953e-05, - "loss": 0.0437, - "step": 28040 - }, - { - "epoch": 2.083023912074855, - "grad_norm": 1.477177381515503, - "learning_rate": 1.7501856527550872e-05, - "loss": 0.0927, - "step": 28050 - }, - { - "epoch": 2.083766523095203, - "grad_norm": 1.7123514413833618, - "learning_rate": 1.7497400861428783e-05, - "loss": 0.0999, - "step": 28060 - }, - { - "epoch": 2.0845091341155504, - "grad_norm": 0.7901507616043091, - "learning_rate": 1.74929451953067e-05, - "loss": 0.0681, - "step": 28070 - }, - { - "epoch": 2.085251745135898, - "grad_norm": 0.9315057396888733, - "learning_rate": 1.7488489529184613e-05, - "loss": 0.045, - "step": 28080 - }, - { - "epoch": 2.0859943561562453, - "grad_norm": 0.80745929479599, - "learning_rate": 1.748403386306253e-05, - "loss": 0.0729, - "step": 28090 - }, - { - "epoch": 2.0867369671765927, - "grad_norm": 1.3841748237609863, - "learning_rate": 1.7479578196940443e-05, - "loss": 0.0651, - "step": 28100 - }, - { - "epoch": 2.0874795781969406, - "grad_norm": 4.800222873687744, - "learning_rate": 1.747512253081836e-05, - "loss": 0.0764, - "step": 28110 - }, - { - "epoch": 2.088222189217288, - "grad_norm": 1.6602140665054321, - "learning_rate": 1.7470666864696273e-05, - "loss": 0.0666, - "step": 28120 - }, - { - "epoch": 2.0889648002376355, - "grad_norm": 1.5725599527359009, - "learning_rate": 1.7466211198574188e-05, - "loss": 0.0759, - "step": 28130 - }, - { - "epoch": 2.089707411257983, - "grad_norm": 2.319767713546753, - "learning_rate": 1.74617555324521e-05, - "loss": 0.1133, - "step": 28140 - }, - { - "epoch": 2.0904500222783304, - "grad_norm": 2.916980266571045, - "learning_rate": 1.7457299866330015e-05, - "loss": 0.0703, - "step": 28150 - }, - { - "epoch": 2.0911926332986783, - "grad_norm": 2.074702501296997, - "learning_rate": 1.7452844200207933e-05, - "loss": 0.0736, - "step": 28160 - }, - { - "epoch": 2.0919352443190258, - "grad_norm": 1.4027667045593262, - "learning_rate": 1.7448388534085845e-05, - "loss": 0.0675, - "step": 28170 - }, - { - "epoch": 2.0926778553393732, - "grad_norm": 2.333289861679077, - "learning_rate": 1.744393286796376e-05, - "loss": 0.0573, - "step": 28180 - }, - { - "epoch": 2.0934204663597207, - "grad_norm": 1.7453340291976929, - "learning_rate": 1.7439477201841678e-05, - "loss": 0.0922, - "step": 28190 - }, - { - "epoch": 2.094163077380068, - "grad_norm": 0.6930978894233704, - "learning_rate": 1.743502153571959e-05, - "loss": 0.0847, - "step": 28200 - }, - { - "epoch": 2.094905688400416, - "grad_norm": 1.3762452602386475, - "learning_rate": 1.7430565869597505e-05, - "loss": 0.1109, - "step": 28210 - }, - { - "epoch": 2.0956482994207635, - "grad_norm": 0.3616078197956085, - "learning_rate": 1.7426110203475423e-05, - "loss": 0.0753, - "step": 28220 - }, - { - "epoch": 2.096390910441111, - "grad_norm": 2.1487832069396973, - "learning_rate": 1.7421654537353335e-05, - "loss": 0.0828, - "step": 28230 - }, - { - "epoch": 2.0971335214614584, - "grad_norm": 0.9581325054168701, - "learning_rate": 1.741719887123125e-05, - "loss": 0.0691, - "step": 28240 - }, - { - "epoch": 2.097876132481806, - "grad_norm": 2.8964858055114746, - "learning_rate": 1.741274320510916e-05, - "loss": 0.0917, - "step": 28250 - }, - { - "epoch": 2.0986187435021537, - "grad_norm": 0.576937198638916, - "learning_rate": 1.740828753898708e-05, - "loss": 0.0624, - "step": 28260 - }, - { - "epoch": 2.099361354522501, - "grad_norm": 1.5471432209014893, - "learning_rate": 1.7403831872864995e-05, - "loss": 0.0873, - "step": 28270 - }, - { - "epoch": 2.1001039655428486, - "grad_norm": 1.0015627145767212, - "learning_rate": 1.7399376206742906e-05, - "loss": 0.0765, - "step": 28280 - }, - { - "epoch": 2.100846576563196, - "grad_norm": 2.5197479724884033, - "learning_rate": 1.7394920540620825e-05, - "loss": 0.0679, - "step": 28290 - }, - { - "epoch": 2.101589187583544, - "grad_norm": 1.3474407196044922, - "learning_rate": 1.739046487449874e-05, - "loss": 0.0497, - "step": 28300 - }, - { - "epoch": 2.1023317986038914, - "grad_norm": 1.8976625204086304, - "learning_rate": 1.738600920837665e-05, - "loss": 0.0885, - "step": 28310 - }, - { - "epoch": 2.103074409624239, - "grad_norm": 3.648165464401245, - "learning_rate": 1.7381553542254566e-05, - "loss": 0.0964, - "step": 28320 - }, - { - "epoch": 2.1038170206445863, - "grad_norm": 0.9834181070327759, - "learning_rate": 1.7377097876132485e-05, - "loss": 0.0524, - "step": 28330 - }, - { - "epoch": 2.1045596316649338, - "grad_norm": 2.057588815689087, - "learning_rate": 1.7372642210010396e-05, - "loss": 0.0609, - "step": 28340 - }, - { - "epoch": 2.1053022426852817, - "grad_norm": 1.9514938592910767, - "learning_rate": 1.736818654388831e-05, - "loss": 0.09, - "step": 28350 - }, - { - "epoch": 2.106044853705629, - "grad_norm": 0.6397399306297302, - "learning_rate": 1.7363730877766226e-05, - "loss": 0.0805, - "step": 28360 - }, - { - "epoch": 2.1067874647259766, - "grad_norm": 0.7287691831588745, - "learning_rate": 1.735927521164414e-05, - "loss": 0.0517, - "step": 28370 - }, - { - "epoch": 2.107530075746324, - "grad_norm": 0.5326383113861084, - "learning_rate": 1.7354819545522056e-05, - "loss": 0.0314, - "step": 28380 - }, - { - "epoch": 2.1082726867666715, - "grad_norm": 0.252254843711853, - "learning_rate": 1.735036387939997e-05, - "loss": 0.0581, - "step": 28390 - }, - { - "epoch": 2.1090152977870193, - "grad_norm": 3.9282045364379883, - "learning_rate": 1.7345908213277886e-05, - "loss": 0.0943, - "step": 28400 - }, - { - "epoch": 2.109757908807367, - "grad_norm": 2.1069839000701904, - "learning_rate": 1.73414525471558e-05, - "loss": 0.0704, - "step": 28410 - }, - { - "epoch": 2.1105005198277142, - "grad_norm": 1.8486924171447754, - "learning_rate": 1.7336996881033713e-05, - "loss": 0.07, - "step": 28420 - }, - { - "epoch": 2.1112431308480617, - "grad_norm": 1.9672880172729492, - "learning_rate": 1.733254121491163e-05, - "loss": 0.0827, - "step": 28430 - }, - { - "epoch": 2.111985741868409, - "grad_norm": 0.7764392495155334, - "learning_rate": 1.7328085548789543e-05, - "loss": 0.056, - "step": 28440 - }, - { - "epoch": 2.112728352888757, - "grad_norm": 1.0631473064422607, - "learning_rate": 1.7323629882667458e-05, - "loss": 0.0485, - "step": 28450 - }, - { - "epoch": 2.1134709639091045, - "grad_norm": 1.6349713802337646, - "learning_rate": 1.7319174216545376e-05, - "loss": 0.0778, - "step": 28460 - }, - { - "epoch": 2.114213574929452, - "grad_norm": 2.0537021160125732, - "learning_rate": 1.7314718550423288e-05, - "loss": 0.072, - "step": 28470 - }, - { - "epoch": 2.1149561859497994, - "grad_norm": 2.0460641384124756, - "learning_rate": 1.7310262884301203e-05, - "loss": 0.0889, - "step": 28480 - }, - { - "epoch": 2.115698796970147, - "grad_norm": 1.6083009243011475, - "learning_rate": 1.7305807218179118e-05, - "loss": 0.0685, - "step": 28490 - }, - { - "epoch": 2.1164414079904947, - "grad_norm": 2.218975305557251, - "learning_rate": 1.7301351552057033e-05, - "loss": 0.0664, - "step": 28500 - }, - { - "epoch": 2.117184019010842, - "grad_norm": 1.3092341423034668, - "learning_rate": 1.7296895885934948e-05, - "loss": 0.0673, - "step": 28510 - }, - { - "epoch": 2.1179266300311896, - "grad_norm": 1.1454136371612549, - "learning_rate": 1.7292440219812863e-05, - "loss": 0.0891, - "step": 28520 - }, - { - "epoch": 2.118669241051537, - "grad_norm": 0.9033706188201904, - "learning_rate": 1.7287984553690778e-05, - "loss": 0.0671, - "step": 28530 - }, - { - "epoch": 2.1194118520718845, - "grad_norm": 1.2205688953399658, - "learning_rate": 1.7283528887568693e-05, - "loss": 0.0726, - "step": 28540 - }, - { - "epoch": 2.1201544630922324, - "grad_norm": 1.5144611597061157, - "learning_rate": 1.7279073221446605e-05, - "loss": 0.0493, - "step": 28550 - }, - { - "epoch": 2.12089707411258, - "grad_norm": 0.9755750894546509, - "learning_rate": 1.727461755532452e-05, - "loss": 0.07, - "step": 28560 - }, - { - "epoch": 2.1216396851329273, - "grad_norm": 1.5496515035629272, - "learning_rate": 1.7270161889202438e-05, - "loss": 0.0532, - "step": 28570 - }, - { - "epoch": 2.122382296153275, - "grad_norm": 1.8168680667877197, - "learning_rate": 1.726570622308035e-05, - "loss": 0.0682, - "step": 28580 - }, - { - "epoch": 2.1231249071736222, - "grad_norm": 1.872641682624817, - "learning_rate": 1.7261250556958264e-05, - "loss": 0.0572, - "step": 28590 - }, - { - "epoch": 2.12386751819397, - "grad_norm": 1.2901333570480347, - "learning_rate": 1.7256794890836183e-05, - "loss": 0.0416, - "step": 28600 - }, - { - "epoch": 2.1246101292143176, - "grad_norm": 1.7562663555145264, - "learning_rate": 1.7252339224714094e-05, - "loss": 0.0695, - "step": 28610 - }, - { - "epoch": 2.125352740234665, - "grad_norm": 0.7023272514343262, - "learning_rate": 1.724788355859201e-05, - "loss": 0.047, - "step": 28620 - }, - { - "epoch": 2.1260953512550125, - "grad_norm": 2.1506128311157227, - "learning_rate": 1.7243427892469928e-05, - "loss": 0.0718, - "step": 28630 - }, - { - "epoch": 2.1268379622753604, - "grad_norm": 1.4215508699417114, - "learning_rate": 1.723897222634784e-05, - "loss": 0.0607, - "step": 28640 - }, - { - "epoch": 2.127580573295708, - "grad_norm": 0.4577394723892212, - "learning_rate": 1.7234516560225754e-05, - "loss": 0.0454, - "step": 28650 - }, - { - "epoch": 2.1283231843160553, - "grad_norm": 1.4944130182266235, - "learning_rate": 1.7230060894103666e-05, - "loss": 0.077, - "step": 28660 - }, - { - "epoch": 2.1290657953364027, - "grad_norm": 1.4956315755844116, - "learning_rate": 1.7225605227981584e-05, - "loss": 0.0619, - "step": 28670 - }, - { - "epoch": 2.12980840635675, - "grad_norm": 1.8317357301712036, - "learning_rate": 1.72211495618595e-05, - "loss": 0.0803, - "step": 28680 - }, - { - "epoch": 2.130551017377098, - "grad_norm": 0.5078200101852417, - "learning_rate": 1.721669389573741e-05, - "loss": 0.0654, - "step": 28690 - }, - { - "epoch": 2.1312936283974455, - "grad_norm": 1.6214134693145752, - "learning_rate": 1.721223822961533e-05, - "loss": 0.0542, - "step": 28700 - }, - { - "epoch": 2.132036239417793, - "grad_norm": 2.1985042095184326, - "learning_rate": 1.7207782563493244e-05, - "loss": 0.0543, - "step": 28710 - }, - { - "epoch": 2.1327788504381404, - "grad_norm": 1.8467528820037842, - "learning_rate": 1.7203326897371156e-05, - "loss": 0.0596, - "step": 28720 - }, - { - "epoch": 2.133521461458488, - "grad_norm": 1.9839740991592407, - "learning_rate": 1.719887123124907e-05, - "loss": 0.082, - "step": 28730 - }, - { - "epoch": 2.1342640724788358, - "grad_norm": 1.7829911708831787, - "learning_rate": 1.719441556512699e-05, - "loss": 0.0709, - "step": 28740 - }, - { - "epoch": 2.135006683499183, - "grad_norm": 1.043108582496643, - "learning_rate": 1.71899598990049e-05, - "loss": 0.0571, - "step": 28750 - }, - { - "epoch": 2.1357492945195307, - "grad_norm": 1.6130720376968384, - "learning_rate": 1.7185504232882816e-05, - "loss": 0.0577, - "step": 28760 - }, - { - "epoch": 2.136491905539878, - "grad_norm": 0.3938934803009033, - "learning_rate": 1.718104856676073e-05, - "loss": 0.0713, - "step": 28770 - }, - { - "epoch": 2.1372345165602256, - "grad_norm": 2.0002403259277344, - "learning_rate": 1.7176592900638646e-05, - "loss": 0.063, - "step": 28780 - }, - { - "epoch": 2.1379771275805735, - "grad_norm": 1.1039925813674927, - "learning_rate": 1.717213723451656e-05, - "loss": 0.0555, - "step": 28790 - }, - { - "epoch": 2.138719738600921, - "grad_norm": 1.2151685953140259, - "learning_rate": 1.7167681568394476e-05, - "loss": 0.0613, - "step": 28800 - }, - { - "epoch": 2.1394623496212684, - "grad_norm": 0.6045921444892883, - "learning_rate": 1.716322590227239e-05, - "loss": 0.0827, - "step": 28810 - }, - { - "epoch": 2.140204960641616, - "grad_norm": 1.541783094406128, - "learning_rate": 1.7158770236150306e-05, - "loss": 0.0771, - "step": 28820 - }, - { - "epoch": 2.1409475716619633, - "grad_norm": 3.0538811683654785, - "learning_rate": 1.7154314570028218e-05, - "loss": 0.1097, - "step": 28830 - }, - { - "epoch": 2.141690182682311, - "grad_norm": 1.1775662899017334, - "learning_rate": 1.7149858903906136e-05, - "loss": 0.0666, - "step": 28840 - }, - { - "epoch": 2.1424327937026586, - "grad_norm": 1.498507022857666, - "learning_rate": 1.7145403237784048e-05, - "loss": 0.0671, - "step": 28850 - }, - { - "epoch": 2.143175404723006, - "grad_norm": 0.7959874868392944, - "learning_rate": 1.7140947571661963e-05, - "loss": 0.057, - "step": 28860 - }, - { - "epoch": 2.1439180157433535, - "grad_norm": 2.1774017810821533, - "learning_rate": 1.713649190553988e-05, - "loss": 0.1016, - "step": 28870 - }, - { - "epoch": 2.1446606267637014, - "grad_norm": 0.7331980466842651, - "learning_rate": 1.7132036239417793e-05, - "loss": 0.0634, - "step": 28880 - }, - { - "epoch": 2.145403237784049, - "grad_norm": 0.8138454556465149, - "learning_rate": 1.7127580573295708e-05, - "loss": 0.0474, - "step": 28890 - }, - { - "epoch": 2.1461458488043963, - "grad_norm": 1.016754388809204, - "learning_rate": 1.7123124907173623e-05, - "loss": 0.0718, - "step": 28900 - }, - { - "epoch": 2.1468884598247437, - "grad_norm": 1.882819414138794, - "learning_rate": 1.7118669241051538e-05, - "loss": 0.0809, - "step": 28910 - }, - { - "epoch": 2.147631070845091, - "grad_norm": 0.7724311351776123, - "learning_rate": 1.7114213574929453e-05, - "loss": 0.0775, - "step": 28920 - }, - { - "epoch": 2.148373681865439, - "grad_norm": 2.551377534866333, - "learning_rate": 1.7109757908807368e-05, - "loss": 0.0776, - "step": 28930 - }, - { - "epoch": 2.1491162928857865, - "grad_norm": 1.454253911972046, - "learning_rate": 1.7105302242685283e-05, - "loss": 0.0635, - "step": 28940 - }, - { - "epoch": 2.149858903906134, - "grad_norm": 0.8662858009338379, - "learning_rate": 1.7100846576563197e-05, - "loss": 0.0689, - "step": 28950 - }, - { - "epoch": 2.1506015149264814, - "grad_norm": 1.5164152383804321, - "learning_rate": 1.709639091044111e-05, - "loss": 0.0956, - "step": 28960 - }, - { - "epoch": 2.151344125946829, - "grad_norm": 1.9796892404556274, - "learning_rate": 1.7091935244319024e-05, - "loss": 0.08, - "step": 28970 - }, - { - "epoch": 2.152086736967177, - "grad_norm": 2.7751400470733643, - "learning_rate": 1.7087479578196942e-05, - "loss": 0.0689, - "step": 28980 - }, - { - "epoch": 2.1528293479875242, - "grad_norm": 0.9930230379104614, - "learning_rate": 1.7083023912074854e-05, - "loss": 0.0691, - "step": 28990 - }, - { - "epoch": 2.1535719590078717, - "grad_norm": 0.8586207032203674, - "learning_rate": 1.707856824595277e-05, - "loss": 0.0729, - "step": 29000 - }, - { - "epoch": 2.154314570028219, - "grad_norm": 1.4148691892623901, - "learning_rate": 1.7074112579830687e-05, - "loss": 0.0488, - "step": 29010 - }, - { - "epoch": 2.1550571810485666, - "grad_norm": 1.3059778213500977, - "learning_rate": 1.70696569137086e-05, - "loss": 0.0812, - "step": 29020 - }, - { - "epoch": 2.1557997920689145, - "grad_norm": 0.3190613389015198, - "learning_rate": 1.7065201247586514e-05, - "loss": 0.0524, - "step": 29030 - }, - { - "epoch": 2.156542403089262, - "grad_norm": 2.4894535541534424, - "learning_rate": 1.7060745581464432e-05, - "loss": 0.1131, - "step": 29040 - }, - { - "epoch": 2.1572850141096094, - "grad_norm": 2.3826353549957275, - "learning_rate": 1.7056289915342344e-05, - "loss": 0.0722, - "step": 29050 - }, - { - "epoch": 2.158027625129957, - "grad_norm": 0.887143611907959, - "learning_rate": 1.705183424922026e-05, - "loss": 0.0498, - "step": 29060 - }, - { - "epoch": 2.1587702361503043, - "grad_norm": 2.6809613704681396, - "learning_rate": 1.704737858309817e-05, - "loss": 0.1214, - "step": 29070 - }, - { - "epoch": 2.159512847170652, - "grad_norm": 1.2329598665237427, - "learning_rate": 1.704292291697609e-05, - "loss": 0.0972, - "step": 29080 - }, - { - "epoch": 2.1602554581909996, - "grad_norm": 2.446826457977295, - "learning_rate": 1.7038467250854004e-05, - "loss": 0.0659, - "step": 29090 - }, - { - "epoch": 2.160998069211347, - "grad_norm": 1.961200475692749, - "learning_rate": 1.7034011584731916e-05, - "loss": 0.0396, - "step": 29100 - }, - { - "epoch": 2.1617406802316945, - "grad_norm": 0.2081407755613327, - "learning_rate": 1.7029555918609834e-05, - "loss": 0.0452, - "step": 29110 - }, - { - "epoch": 2.162483291252042, - "grad_norm": 1.8672429323196411, - "learning_rate": 1.702510025248775e-05, - "loss": 0.0674, - "step": 29120 - }, - { - "epoch": 2.16322590227239, - "grad_norm": 2.0744621753692627, - "learning_rate": 1.702064458636566e-05, - "loss": 0.0899, - "step": 29130 - }, - { - "epoch": 2.1639685132927373, - "grad_norm": 0.9854013323783875, - "learning_rate": 1.7016188920243576e-05, - "loss": 0.0731, - "step": 29140 - }, - { - "epoch": 2.1647111243130848, - "grad_norm": 1.5703667402267456, - "learning_rate": 1.7011733254121494e-05, - "loss": 0.0887, - "step": 29150 - }, - { - "epoch": 2.165453735333432, - "grad_norm": 1.483916997909546, - "learning_rate": 1.7007277587999406e-05, - "loss": 0.0863, - "step": 29160 - }, - { - "epoch": 2.1661963463537797, - "grad_norm": 1.5202964544296265, - "learning_rate": 1.700282192187732e-05, - "loss": 0.0785, - "step": 29170 - }, - { - "epoch": 2.1669389573741276, - "grad_norm": 0.9036029577255249, - "learning_rate": 1.6998366255755236e-05, - "loss": 0.0603, - "step": 29180 - }, - { - "epoch": 2.167681568394475, - "grad_norm": 0.647527277469635, - "learning_rate": 1.699391058963315e-05, - "loss": 0.0313, - "step": 29190 - }, - { - "epoch": 2.1684241794148225, - "grad_norm": 1.4471430778503418, - "learning_rate": 1.6989454923511066e-05, - "loss": 0.1026, - "step": 29200 - }, - { - "epoch": 2.16916679043517, - "grad_norm": 1.5521758794784546, - "learning_rate": 1.698499925738898e-05, - "loss": 0.0581, - "step": 29210 - }, - { - "epoch": 2.169909401455518, - "grad_norm": 0.8579624891281128, - "learning_rate": 1.6980543591266896e-05, - "loss": 0.0591, - "step": 29220 - }, - { - "epoch": 2.1706520124758653, - "grad_norm": 0.8837199211120605, - "learning_rate": 1.697608792514481e-05, - "loss": 0.0776, - "step": 29230 - }, - { - "epoch": 2.1713946234962127, - "grad_norm": 0.8631309866905212, - "learning_rate": 1.6971632259022722e-05, - "loss": 0.0709, - "step": 29240 - }, - { - "epoch": 2.17213723451656, - "grad_norm": 1.2741137742996216, - "learning_rate": 1.696717659290064e-05, - "loss": 0.0607, - "step": 29250 - }, - { - "epoch": 2.1728798455369076, - "grad_norm": 2.396149158477783, - "learning_rate": 1.6962720926778556e-05, - "loss": 0.0546, - "step": 29260 - }, - { - "epoch": 2.1736224565572555, - "grad_norm": 1.4403908252716064, - "learning_rate": 1.6958265260656467e-05, - "loss": 0.0452, - "step": 29270 - }, - { - "epoch": 2.174365067577603, - "grad_norm": 1.8442835807800293, - "learning_rate": 1.6953809594534386e-05, - "loss": 0.0869, - "step": 29280 - }, - { - "epoch": 2.1751076785979504, - "grad_norm": 1.5323288440704346, - "learning_rate": 1.6949353928412297e-05, - "loss": 0.0881, - "step": 29290 - }, - { - "epoch": 2.175850289618298, - "grad_norm": 0.6434569358825684, - "learning_rate": 1.6944898262290212e-05, - "loss": 0.0635, - "step": 29300 - }, - { - "epoch": 2.1765929006386453, - "grad_norm": 0.8383660912513733, - "learning_rate": 1.6940442596168127e-05, - "loss": 0.0545, - "step": 29310 - }, - { - "epoch": 2.177335511658993, - "grad_norm": 1.861343264579773, - "learning_rate": 1.6935986930046042e-05, - "loss": 0.0557, - "step": 29320 - }, - { - "epoch": 2.1780781226793406, - "grad_norm": 1.4994820356369019, - "learning_rate": 1.6931531263923957e-05, - "loss": 0.0584, - "step": 29330 - }, - { - "epoch": 2.178820733699688, - "grad_norm": 0.872047483921051, - "learning_rate": 1.6927075597801872e-05, - "loss": 0.1263, - "step": 29340 - }, - { - "epoch": 2.1795633447200355, - "grad_norm": 1.3193352222442627, - "learning_rate": 1.6922619931679787e-05, - "loss": 0.0715, - "step": 29350 - }, - { - "epoch": 2.180305955740383, - "grad_norm": 1.5889509916305542, - "learning_rate": 1.6918164265557702e-05, - "loss": 0.0859, - "step": 29360 - }, - { - "epoch": 2.181048566760731, - "grad_norm": 1.012092113494873, - "learning_rate": 1.6913708599435614e-05, - "loss": 0.0596, - "step": 29370 - }, - { - "epoch": 2.1817911777810783, - "grad_norm": 1.8053189516067505, - "learning_rate": 1.6909252933313532e-05, - "loss": 0.0625, - "step": 29380 - }, - { - "epoch": 2.182533788801426, - "grad_norm": 0.7296652793884277, - "learning_rate": 1.6904797267191447e-05, - "loss": 0.0597, - "step": 29390 - }, - { - "epoch": 2.1832763998217732, - "grad_norm": 1.110438346862793, - "learning_rate": 1.690034160106936e-05, - "loss": 0.0538, - "step": 29400 - }, - { - "epoch": 2.1840190108421207, - "grad_norm": 2.128885507583618, - "learning_rate": 1.6895885934947274e-05, - "loss": 0.0559, - "step": 29410 - }, - { - "epoch": 2.1847616218624686, - "grad_norm": 1.590346097946167, - "learning_rate": 1.6891430268825192e-05, - "loss": 0.0496, - "step": 29420 - }, - { - "epoch": 2.185504232882816, - "grad_norm": 1.5324519872665405, - "learning_rate": 1.6886974602703104e-05, - "loss": 0.0778, - "step": 29430 - }, - { - "epoch": 2.1862468439031635, - "grad_norm": 0.8141632080078125, - "learning_rate": 1.688251893658102e-05, - "loss": 0.079, - "step": 29440 - }, - { - "epoch": 2.186989454923511, - "grad_norm": 2.3867433071136475, - "learning_rate": 1.6878063270458937e-05, - "loss": 0.0727, - "step": 29450 - }, - { - "epoch": 2.187732065943859, - "grad_norm": 1.383835792541504, - "learning_rate": 1.687360760433685e-05, - "loss": 0.0553, - "step": 29460 - }, - { - "epoch": 2.1884746769642063, - "grad_norm": 1.1126325130462646, - "learning_rate": 1.6869151938214764e-05, - "loss": 0.0698, - "step": 29470 - }, - { - "epoch": 2.1892172879845537, - "grad_norm": 1.8178859949111938, - "learning_rate": 1.6864696272092675e-05, - "loss": 0.087, - "step": 29480 - }, - { - "epoch": 2.189959899004901, - "grad_norm": 1.6789990663528442, - "learning_rate": 1.6860240605970594e-05, - "loss": 0.0652, - "step": 29490 - }, - { - "epoch": 2.1907025100252486, - "grad_norm": 1.2279289960861206, - "learning_rate": 1.685578493984851e-05, - "loss": 0.0915, - "step": 29500 - }, - { - "epoch": 2.1914451210455965, - "grad_norm": 1.7912808656692505, - "learning_rate": 1.685132927372642e-05, - "loss": 0.0849, - "step": 29510 - }, - { - "epoch": 2.192187732065944, - "grad_norm": 0.8681305050849915, - "learning_rate": 1.684687360760434e-05, - "loss": 0.0771, - "step": 29520 - }, - { - "epoch": 2.1929303430862914, - "grad_norm": 3.010956287384033, - "learning_rate": 1.6842417941482254e-05, - "loss": 0.0818, - "step": 29530 - }, - { - "epoch": 2.193672954106639, - "grad_norm": 1.075864553451538, - "learning_rate": 1.6837962275360165e-05, - "loss": 0.0665, - "step": 29540 - }, - { - "epoch": 2.1944155651269863, - "grad_norm": 1.1837166547775269, - "learning_rate": 1.683350660923808e-05, - "loss": 0.0608, - "step": 29550 - }, - { - "epoch": 2.195158176147334, - "grad_norm": 0.6628900766372681, - "learning_rate": 1.6829050943116e-05, - "loss": 0.0717, - "step": 29560 - }, - { - "epoch": 2.1959007871676817, - "grad_norm": 0.9537403583526611, - "learning_rate": 1.682459527699391e-05, - "loss": 0.0573, - "step": 29570 - }, - { - "epoch": 2.196643398188029, - "grad_norm": 2.0913939476013184, - "learning_rate": 1.6820139610871825e-05, - "loss": 0.1074, - "step": 29580 - }, - { - "epoch": 2.1973860092083766, - "grad_norm": 0.6338614821434021, - "learning_rate": 1.681568394474974e-05, - "loss": 0.0655, - "step": 29590 - }, - { - "epoch": 2.198128620228724, - "grad_norm": 2.1373088359832764, - "learning_rate": 1.6811228278627655e-05, - "loss": 0.05, - "step": 29600 - }, - { - "epoch": 2.198871231249072, - "grad_norm": 1.9427019357681274, - "learning_rate": 1.680677261250557e-05, - "loss": 0.0555, - "step": 29610 - }, - { - "epoch": 2.1996138422694194, - "grad_norm": 1.4638181924819946, - "learning_rate": 1.6802316946383485e-05, - "loss": 0.0617, - "step": 29620 - }, - { - "epoch": 2.200356453289767, - "grad_norm": 1.2105026245117188, - "learning_rate": 1.67978612802614e-05, - "loss": 0.0647, - "step": 29630 - }, - { - "epoch": 2.2010990643101143, - "grad_norm": 1.4060852527618408, - "learning_rate": 1.6793405614139315e-05, - "loss": 0.0727, - "step": 29640 - }, - { - "epoch": 2.2018416753304617, - "grad_norm": 2.5179665088653564, - "learning_rate": 1.6788949948017227e-05, - "loss": 0.0736, - "step": 29650 - }, - { - "epoch": 2.2025842863508096, - "grad_norm": 2.2634148597717285, - "learning_rate": 1.6784494281895145e-05, - "loss": 0.0724, - "step": 29660 - }, - { - "epoch": 2.203326897371157, - "grad_norm": 1.1711833477020264, - "learning_rate": 1.678003861577306e-05, - "loss": 0.0677, - "step": 29670 - }, - { - "epoch": 2.2040695083915045, - "grad_norm": 1.9533751010894775, - "learning_rate": 1.6775582949650972e-05, - "loss": 0.0682, - "step": 29680 - }, - { - "epoch": 2.204812119411852, - "grad_norm": 1.6772757768630981, - "learning_rate": 1.677112728352889e-05, - "loss": 0.0952, - "step": 29690 - }, - { - "epoch": 2.2055547304321994, - "grad_norm": 1.3155872821807861, - "learning_rate": 1.6766671617406802e-05, - "loss": 0.0654, - "step": 29700 - }, - { - "epoch": 2.2062973414525473, - "grad_norm": 0.7310417294502258, - "learning_rate": 1.6762215951284717e-05, - "loss": 0.0749, - "step": 29710 - }, - { - "epoch": 2.2070399524728948, - "grad_norm": 1.559289813041687, - "learning_rate": 1.6757760285162632e-05, - "loss": 0.0524, - "step": 29720 - }, - { - "epoch": 2.207782563493242, - "grad_norm": 1.340665578842163, - "learning_rate": 1.6753304619040547e-05, - "loss": 0.0692, - "step": 29730 - }, - { - "epoch": 2.2085251745135897, - "grad_norm": 0.5830607414245605, - "learning_rate": 1.6748848952918462e-05, - "loss": 0.1112, - "step": 29740 - }, - { - "epoch": 2.209267785533937, - "grad_norm": 1.7768720388412476, - "learning_rate": 1.6744393286796377e-05, - "loss": 0.0557, - "step": 29750 - }, - { - "epoch": 2.210010396554285, - "grad_norm": 0.5409281849861145, - "learning_rate": 1.6739937620674292e-05, - "loss": 0.0565, - "step": 29760 - }, - { - "epoch": 2.2107530075746324, - "grad_norm": 1.0394829511642456, - "learning_rate": 1.6735481954552207e-05, - "loss": 0.0581, - "step": 29770 - }, - { - "epoch": 2.21149561859498, - "grad_norm": 1.975055456161499, - "learning_rate": 1.673102628843012e-05, - "loss": 0.101, - "step": 29780 - }, - { - "epoch": 2.2122382296153273, - "grad_norm": 1.235195279121399, - "learning_rate": 1.6726570622308037e-05, - "loss": 0.0529, - "step": 29790 - }, - { - "epoch": 2.2129808406356752, - "grad_norm": 3.506690263748169, - "learning_rate": 1.672211495618595e-05, - "loss": 0.0922, - "step": 29800 - }, - { - "epoch": 2.2137234516560227, - "grad_norm": 2.5403103828430176, - "learning_rate": 1.6717659290063863e-05, - "loss": 0.0768, - "step": 29810 - }, - { - "epoch": 2.21446606267637, - "grad_norm": 1.3894189596176147, - "learning_rate": 1.6713203623941778e-05, - "loss": 0.0642, - "step": 29820 - }, - { - "epoch": 2.2152086736967176, - "grad_norm": 1.7695733308792114, - "learning_rate": 1.6708747957819697e-05, - "loss": 0.0699, - "step": 29830 - }, - { - "epoch": 2.215951284717065, - "grad_norm": 1.9002711772918701, - "learning_rate": 1.6704292291697608e-05, - "loss": 0.0587, - "step": 29840 - }, - { - "epoch": 2.216693895737413, - "grad_norm": 2.1885085105895996, - "learning_rate": 1.6699836625575523e-05, - "loss": 0.0808, - "step": 29850 - }, - { - "epoch": 2.2174365067577604, - "grad_norm": 0.7245926856994629, - "learning_rate": 1.669538095945344e-05, - "loss": 0.0767, - "step": 29860 - }, - { - "epoch": 2.218179117778108, - "grad_norm": 1.1416701078414917, - "learning_rate": 1.6690925293331353e-05, - "loss": 0.0552, - "step": 29870 - }, - { - "epoch": 2.2189217287984553, - "grad_norm": 0.4477648138999939, - "learning_rate": 1.6686469627209268e-05, - "loss": 0.092, - "step": 29880 - }, - { - "epoch": 2.2196643398188027, - "grad_norm": 1.4506340026855469, - "learning_rate": 1.668201396108718e-05, - "loss": 0.0803, - "step": 29890 - }, - { - "epoch": 2.2204069508391506, - "grad_norm": 1.6978100538253784, - "learning_rate": 1.6677558294965098e-05, - "loss": 0.0516, - "step": 29900 - }, - { - "epoch": 2.221149561859498, - "grad_norm": 0.5973829030990601, - "learning_rate": 1.6673102628843013e-05, - "loss": 0.0748, - "step": 29910 - }, - { - "epoch": 2.2218921728798455, - "grad_norm": 1.091837763786316, - "learning_rate": 1.6668646962720925e-05, - "loss": 0.0831, - "step": 29920 - }, - { - "epoch": 2.222634783900193, - "grad_norm": 0.9532679319381714, - "learning_rate": 1.6664191296598843e-05, - "loss": 0.0632, - "step": 29930 - }, - { - "epoch": 2.2233773949205404, - "grad_norm": 1.0691170692443848, - "learning_rate": 1.6659735630476758e-05, - "loss": 0.0863, - "step": 29940 - }, - { - "epoch": 2.2241200059408883, - "grad_norm": 0.724433958530426, - "learning_rate": 1.665527996435467e-05, - "loss": 0.0485, - "step": 29950 - }, - { - "epoch": 2.2248626169612358, - "grad_norm": 2.8994836807250977, - "learning_rate": 1.6650824298232585e-05, - "loss": 0.0611, - "step": 29960 - }, - { - "epoch": 2.2256052279815832, - "grad_norm": 0.5383403301239014, - "learning_rate": 1.6646368632110503e-05, - "loss": 0.0382, - "step": 29970 - }, - { - "epoch": 2.2263478390019307, - "grad_norm": 4.375266075134277, - "learning_rate": 1.6641912965988415e-05, - "loss": 0.0434, - "step": 29980 - }, - { - "epoch": 2.227090450022278, - "grad_norm": 4.827844619750977, - "learning_rate": 1.663745729986633e-05, - "loss": 0.0683, - "step": 29990 - }, - { - "epoch": 2.227833061042626, - "grad_norm": 1.5270919799804688, - "learning_rate": 1.6633001633744245e-05, - "loss": 0.0859, - "step": 30000 - }, - { - "epoch": 2.2285756720629735, - "grad_norm": 2.311201572418213, - "learning_rate": 1.662854596762216e-05, - "loss": 0.1089, - "step": 30010 - }, - { - "epoch": 2.229318283083321, - "grad_norm": 1.0003308057785034, - "learning_rate": 1.6624090301500075e-05, - "loss": 0.0851, - "step": 30020 - }, - { - "epoch": 2.2300608941036684, - "grad_norm": 1.064595103263855, - "learning_rate": 1.661963463537799e-05, - "loss": 0.0711, - "step": 30030 - }, - { - "epoch": 2.2308035051240163, - "grad_norm": 2.0935966968536377, - "learning_rate": 1.6615178969255905e-05, - "loss": 0.073, - "step": 30040 - }, - { - "epoch": 2.2315461161443637, - "grad_norm": 1.3619099855422974, - "learning_rate": 1.661072330313382e-05, - "loss": 0.0707, - "step": 30050 - }, - { - "epoch": 2.232288727164711, - "grad_norm": 3.1474947929382324, - "learning_rate": 1.660626763701173e-05, - "loss": 0.0401, - "step": 30060 - }, - { - "epoch": 2.2330313381850586, - "grad_norm": 2.438246250152588, - "learning_rate": 1.660181197088965e-05, - "loss": 0.0783, - "step": 30070 - }, - { - "epoch": 2.233773949205406, - "grad_norm": 2.8391036987304688, - "learning_rate": 1.6597356304767565e-05, - "loss": 0.0673, - "step": 30080 - }, - { - "epoch": 2.234516560225754, - "grad_norm": 1.0939983129501343, - "learning_rate": 1.6592900638645476e-05, - "loss": 0.0676, - "step": 30090 - }, - { - "epoch": 2.2352591712461014, - "grad_norm": 1.0146929025650024, - "learning_rate": 1.6588444972523395e-05, - "loss": 0.0567, - "step": 30100 - }, - { - "epoch": 2.236001782266449, - "grad_norm": 0.7500823140144348, - "learning_rate": 1.6583989306401306e-05, - "loss": 0.0618, - "step": 30110 - }, - { - "epoch": 2.2367443932867963, - "grad_norm": 1.022570252418518, - "learning_rate": 1.657953364027922e-05, - "loss": 0.0878, - "step": 30120 - }, - { - "epoch": 2.2374870043071438, - "grad_norm": 2.924889087677002, - "learning_rate": 1.6575077974157136e-05, - "loss": 0.0537, - "step": 30130 - }, - { - "epoch": 2.2382296153274917, - "grad_norm": 1.7216036319732666, - "learning_rate": 1.657062230803505e-05, - "loss": 0.1027, - "step": 30140 - }, - { - "epoch": 2.238972226347839, - "grad_norm": 0.9251584410667419, - "learning_rate": 1.6566166641912966e-05, - "loss": 0.0823, - "step": 30150 - }, - { - "epoch": 2.2397148373681866, - "grad_norm": 3.20623779296875, - "learning_rate": 1.656171097579088e-05, - "loss": 0.0619, - "step": 30160 - }, - { - "epoch": 2.240457448388534, - "grad_norm": 1.0136836767196655, - "learning_rate": 1.6557255309668796e-05, - "loss": 0.0755, - "step": 30170 - }, - { - "epoch": 2.2412000594088815, - "grad_norm": 1.4571853876113892, - "learning_rate": 1.655279964354671e-05, - "loss": 0.1068, - "step": 30180 - }, - { - "epoch": 2.2419426704292293, - "grad_norm": 1.8920878171920776, - "learning_rate": 1.6548343977424626e-05, - "loss": 0.0733, - "step": 30190 - }, - { - "epoch": 2.242685281449577, - "grad_norm": 1.619733452796936, - "learning_rate": 1.654388831130254e-05, - "loss": 0.092, - "step": 30200 - }, - { - "epoch": 2.2434278924699242, - "grad_norm": 2.0480968952178955, - "learning_rate": 1.6539432645180456e-05, - "loss": 0.061, - "step": 30210 - }, - { - "epoch": 2.2441705034902717, - "grad_norm": 0.9134529829025269, - "learning_rate": 1.6534976979058368e-05, - "loss": 0.0773, - "step": 30220 - }, - { - "epoch": 2.244913114510619, - "grad_norm": 1.261897325515747, - "learning_rate": 1.6530521312936283e-05, - "loss": 0.0656, - "step": 30230 - }, - { - "epoch": 2.245655725530967, - "grad_norm": 1.7915825843811035, - "learning_rate": 1.65260656468142e-05, - "loss": 0.0685, - "step": 30240 - }, - { - "epoch": 2.2463983365513145, - "grad_norm": 2.638289451599121, - "learning_rate": 1.6521609980692113e-05, - "loss": 0.0938, - "step": 30250 - }, - { - "epoch": 2.247140947571662, - "grad_norm": 1.1665395498275757, - "learning_rate": 1.6517154314570028e-05, - "loss": 0.0999, - "step": 30260 - }, - { - "epoch": 2.2478835585920094, - "grad_norm": 0.8219088912010193, - "learning_rate": 1.6512698648447946e-05, - "loss": 0.0732, - "step": 30270 - }, - { - "epoch": 2.248626169612357, - "grad_norm": 1.1381323337554932, - "learning_rate": 1.6508242982325858e-05, - "loss": 0.1115, - "step": 30280 - }, - { - "epoch": 2.2493687806327047, - "grad_norm": 2.4219374656677246, - "learning_rate": 1.6503787316203773e-05, - "loss": 0.0829, - "step": 30290 - }, - { - "epoch": 2.250111391653052, - "grad_norm": 1.201055645942688, - "learning_rate": 1.6499331650081684e-05, - "loss": 0.047, - "step": 30300 - }, - { - "epoch": 2.2508540026733996, - "grad_norm": 0.7391691207885742, - "learning_rate": 1.6494875983959603e-05, - "loss": 0.0452, - "step": 30310 - }, - { - "epoch": 2.251596613693747, - "grad_norm": 0.3732907176017761, - "learning_rate": 1.6490420317837518e-05, - "loss": 0.0624, - "step": 30320 - }, - { - "epoch": 2.2523392247140945, - "grad_norm": 0.5641918182373047, - "learning_rate": 1.648596465171543e-05, - "loss": 0.0906, - "step": 30330 - }, - { - "epoch": 2.2530818357344424, - "grad_norm": 2.029506206512451, - "learning_rate": 1.6481508985593348e-05, - "loss": 0.0745, - "step": 30340 - }, - { - "epoch": 2.25382444675479, - "grad_norm": 2.202345132827759, - "learning_rate": 1.6477053319471263e-05, - "loss": 0.0843, - "step": 30350 - }, - { - "epoch": 2.2545670577751373, - "grad_norm": 2.1847546100616455, - "learning_rate": 1.6472597653349174e-05, - "loss": 0.0897, - "step": 30360 - }, - { - "epoch": 2.255309668795485, - "grad_norm": 1.7908791303634644, - "learning_rate": 1.646814198722709e-05, - "loss": 0.0694, - "step": 30370 - }, - { - "epoch": 2.2560522798158322, - "grad_norm": 3.3176610469818115, - "learning_rate": 1.6463686321105008e-05, - "loss": 0.0674, - "step": 30380 - }, - { - "epoch": 2.25679489083618, - "grad_norm": 1.1117013692855835, - "learning_rate": 1.645923065498292e-05, - "loss": 0.0419, - "step": 30390 - }, - { - "epoch": 2.2575375018565276, - "grad_norm": 2.5527663230895996, - "learning_rate": 1.6454774988860834e-05, - "loss": 0.0818, - "step": 30400 - }, - { - "epoch": 2.258280112876875, - "grad_norm": 0.6371174454689026, - "learning_rate": 1.645031932273875e-05, - "loss": 0.0626, - "step": 30410 - }, - { - "epoch": 2.2590227238972225, - "grad_norm": 1.5492578744888306, - "learning_rate": 1.6445863656616664e-05, - "loss": 0.0681, - "step": 30420 - }, - { - "epoch": 2.2597653349175704, - "grad_norm": 0.8762646317481995, - "learning_rate": 1.644140799049458e-05, - "loss": 0.0724, - "step": 30430 - }, - { - "epoch": 2.260507945937918, - "grad_norm": 2.67170786857605, - "learning_rate": 1.6436952324372494e-05, - "loss": 0.0614, - "step": 30440 - }, - { - "epoch": 2.2612505569582653, - "grad_norm": 2.633847713470459, - "learning_rate": 1.643249665825041e-05, - "loss": 0.0799, - "step": 30450 - }, - { - "epoch": 2.2619931679786127, - "grad_norm": 2.2049152851104736, - "learning_rate": 1.6428040992128324e-05, - "loss": 0.0641, - "step": 30460 - }, - { - "epoch": 2.26273577899896, - "grad_norm": 6.4670186042785645, - "learning_rate": 1.6423585326006236e-05, - "loss": 0.0696, - "step": 30470 - }, - { - "epoch": 2.263478390019308, - "grad_norm": 1.440319538116455, - "learning_rate": 1.6419129659884154e-05, - "loss": 0.0519, - "step": 30480 - }, - { - "epoch": 2.2642210010396555, - "grad_norm": 1.5117939710617065, - "learning_rate": 1.641467399376207e-05, - "loss": 0.0679, - "step": 30490 - }, - { - "epoch": 2.264963612060003, - "grad_norm": 0.705443799495697, - "learning_rate": 1.641021832763998e-05, - "loss": 0.0956, - "step": 30500 - }, - { - "epoch": 2.2657062230803504, - "grad_norm": 2.2516098022460938, - "learning_rate": 1.64057626615179e-05, - "loss": 0.0704, - "step": 30510 - }, - { - "epoch": 2.266448834100698, - "grad_norm": 1.6648731231689453, - "learning_rate": 1.640130699539581e-05, - "loss": 0.0531, - "step": 30520 - }, - { - "epoch": 2.2671914451210458, - "grad_norm": 1.7640278339385986, - "learning_rate": 1.6396851329273726e-05, - "loss": 0.0787, - "step": 30530 - }, - { - "epoch": 2.267934056141393, - "grad_norm": 0.9552247524261475, - "learning_rate": 1.639239566315164e-05, - "loss": 0.0744, - "step": 30540 - }, - { - "epoch": 2.2686766671617407, - "grad_norm": 2.791886329650879, - "learning_rate": 1.6387939997029556e-05, - "loss": 0.0758, - "step": 30550 - }, - { - "epoch": 2.269419278182088, - "grad_norm": 3.42543363571167, - "learning_rate": 1.638348433090747e-05, - "loss": 0.0743, - "step": 30560 - }, - { - "epoch": 2.270161889202436, - "grad_norm": 2.195741653442383, - "learning_rate": 1.6379028664785386e-05, - "loss": 0.0793, - "step": 30570 - }, - { - "epoch": 2.2709045002227835, - "grad_norm": 1.3603512048721313, - "learning_rate": 1.63745729986633e-05, - "loss": 0.094, - "step": 30580 - }, - { - "epoch": 2.271647111243131, - "grad_norm": 0.9138447642326355, - "learning_rate": 1.6370117332541216e-05, - "loss": 0.0973, - "step": 30590 - }, - { - "epoch": 2.2723897222634784, - "grad_norm": 1.8551127910614014, - "learning_rate": 1.636566166641913e-05, - "loss": 0.037, - "step": 30600 - }, - { - "epoch": 2.273132333283826, - "grad_norm": 0.9217031002044678, - "learning_rate": 1.6361206000297046e-05, - "loss": 0.0537, - "step": 30610 - }, - { - "epoch": 2.2738749443041737, - "grad_norm": 0.9425798654556274, - "learning_rate": 1.635675033417496e-05, - "loss": 0.067, - "step": 30620 - }, - { - "epoch": 2.274617555324521, - "grad_norm": 2.4280683994293213, - "learning_rate": 1.6352294668052872e-05, - "loss": 0.0611, - "step": 30630 - }, - { - "epoch": 2.2753601663448686, - "grad_norm": 1.6596300601959229, - "learning_rate": 1.6347839001930787e-05, - "loss": 0.1251, - "step": 30640 - }, - { - "epoch": 2.276102777365216, - "grad_norm": 1.1469758749008179, - "learning_rate": 1.6343383335808706e-05, - "loss": 0.0594, - "step": 30650 - }, - { - "epoch": 2.2768453883855635, - "grad_norm": 0.3723772466182709, - "learning_rate": 1.6338927669686617e-05, - "loss": 0.046, - "step": 30660 - }, - { - "epoch": 2.2775879994059114, - "grad_norm": 1.2316060066223145, - "learning_rate": 1.6334472003564532e-05, - "loss": 0.0913, - "step": 30670 - }, - { - "epoch": 2.278330610426259, - "grad_norm": 0.8177586793899536, - "learning_rate": 1.633001633744245e-05, - "loss": 0.0796, - "step": 30680 - }, - { - "epoch": 2.2790732214466063, - "grad_norm": 2.4429774284362793, - "learning_rate": 1.6325560671320362e-05, - "loss": 0.0675, - "step": 30690 - }, - { - "epoch": 2.2798158324669537, - "grad_norm": 2.337932586669922, - "learning_rate": 1.6321105005198277e-05, - "loss": 0.0905, - "step": 30700 - }, - { - "epoch": 2.280558443487301, - "grad_norm": 1.1022733449935913, - "learning_rate": 1.6316649339076192e-05, - "loss": 0.0586, - "step": 30710 - }, - { - "epoch": 2.281301054507649, - "grad_norm": 1.6780964136123657, - "learning_rate": 1.6312193672954107e-05, - "loss": 0.0661, - "step": 30720 - }, - { - "epoch": 2.2820436655279965, - "grad_norm": 1.0337419509887695, - "learning_rate": 1.6307738006832022e-05, - "loss": 0.1078, - "step": 30730 - }, - { - "epoch": 2.282786276548344, - "grad_norm": 0.539939284324646, - "learning_rate": 1.6303282340709934e-05, - "loss": 0.0679, - "step": 30740 - }, - { - "epoch": 2.2835288875686914, - "grad_norm": 3.132404088973999, - "learning_rate": 1.6298826674587852e-05, - "loss": 0.0875, - "step": 30750 - }, - { - "epoch": 2.284271498589039, - "grad_norm": 0.9195793271064758, - "learning_rate": 1.6294371008465767e-05, - "loss": 0.0704, - "step": 30760 - }, - { - "epoch": 2.285014109609387, - "grad_norm": 1.0817722082138062, - "learning_rate": 1.628991534234368e-05, - "loss": 0.0503, - "step": 30770 - }, - { - "epoch": 2.2857567206297342, - "grad_norm": 1.1843632459640503, - "learning_rate": 1.6285459676221597e-05, - "loss": 0.0481, - "step": 30780 - }, - { - "epoch": 2.2864993316500817, - "grad_norm": 2.589695930480957, - "learning_rate": 1.6281004010099512e-05, - "loss": 0.0587, - "step": 30790 - }, - { - "epoch": 2.287241942670429, - "grad_norm": 1.0527863502502441, - "learning_rate": 1.6276548343977424e-05, - "loss": 0.0863, - "step": 30800 - }, - { - "epoch": 2.2879845536907766, - "grad_norm": 1.128021478652954, - "learning_rate": 1.627209267785534e-05, - "loss": 0.0738, - "step": 30810 - }, - { - "epoch": 2.2887271647111245, - "grad_norm": 1.0027931928634644, - "learning_rate": 1.6267637011733254e-05, - "loss": 0.0423, - "step": 30820 - }, - { - "epoch": 2.289469775731472, - "grad_norm": 1.4265313148498535, - "learning_rate": 1.626318134561117e-05, - "loss": 0.0381, - "step": 30830 - }, - { - "epoch": 2.2902123867518194, - "grad_norm": 1.3314334154129028, - "learning_rate": 1.6258725679489084e-05, - "loss": 0.0521, - "step": 30840 - }, - { - "epoch": 2.290954997772167, - "grad_norm": 1.9009617567062378, - "learning_rate": 1.6254270013367e-05, - "loss": 0.0532, - "step": 30850 - }, - { - "epoch": 2.2916976087925143, - "grad_norm": 1.4951937198638916, - "learning_rate": 1.6249814347244914e-05, - "loss": 0.0764, - "step": 30860 - }, - { - "epoch": 2.292440219812862, - "grad_norm": 1.393104910850525, - "learning_rate": 1.624535868112283e-05, - "loss": 0.0694, - "step": 30870 - }, - { - "epoch": 2.2931828308332096, - "grad_norm": 1.1076534986495972, - "learning_rate": 1.624090301500074e-05, - "loss": 0.0542, - "step": 30880 - }, - { - "epoch": 2.293925441853557, - "grad_norm": 2.4654695987701416, - "learning_rate": 1.623644734887866e-05, - "loss": 0.0647, - "step": 30890 - }, - { - "epoch": 2.2946680528739045, - "grad_norm": 0.662090003490448, - "learning_rate": 1.6231991682756574e-05, - "loss": 0.0686, - "step": 30900 - }, - { - "epoch": 2.295410663894252, - "grad_norm": 2.0039710998535156, - "learning_rate": 1.6227536016634486e-05, - "loss": 0.0792, - "step": 30910 - }, - { - "epoch": 2.2961532749146, - "grad_norm": 0.9649547338485718, - "learning_rate": 1.6223080350512404e-05, - "loss": 0.0603, - "step": 30920 - }, - { - "epoch": 2.2968958859349473, - "grad_norm": 1.8113723993301392, - "learning_rate": 1.6218624684390316e-05, - "loss": 0.0739, - "step": 30930 - }, - { - "epoch": 2.2976384969552948, - "grad_norm": 0.8658888936042786, - "learning_rate": 1.621416901826823e-05, - "loss": 0.0417, - "step": 30940 - }, - { - "epoch": 2.298381107975642, - "grad_norm": 1.820826530456543, - "learning_rate": 1.6209713352146146e-05, - "loss": 0.0584, - "step": 30950 - }, - { - "epoch": 2.2991237189959897, - "grad_norm": 3.5532517433166504, - "learning_rate": 1.620525768602406e-05, - "loss": 0.0779, - "step": 30960 - }, - { - "epoch": 2.2998663300163376, - "grad_norm": 1.2699205875396729, - "learning_rate": 1.6200802019901976e-05, - "loss": 0.0722, - "step": 30970 - }, - { - "epoch": 2.300608941036685, - "grad_norm": 1.4592735767364502, - "learning_rate": 1.619634635377989e-05, - "loss": 0.0539, - "step": 30980 - }, - { - "epoch": 2.3013515520570325, - "grad_norm": 1.4031466245651245, - "learning_rate": 1.6191890687657805e-05, - "loss": 0.0849, - "step": 30990 - }, - { - "epoch": 2.30209416307738, - "grad_norm": 2.4945876598358154, - "learning_rate": 1.618743502153572e-05, - "loss": 0.0859, - "step": 31000 - }, - { - "epoch": 2.302836774097728, - "grad_norm": 1.129449725151062, - "learning_rate": 1.6182979355413635e-05, - "loss": 0.0807, - "step": 31010 - }, - { - "epoch": 2.3035793851180753, - "grad_norm": 3.043400526046753, - "learning_rate": 1.617852368929155e-05, - "loss": 0.056, - "step": 31020 - }, - { - "epoch": 2.3043219961384227, - "grad_norm": 2.541620969772339, - "learning_rate": 1.6174068023169465e-05, - "loss": 0.0738, - "step": 31030 - }, - { - "epoch": 2.30506460715877, - "grad_norm": 0.742087721824646, - "learning_rate": 1.6169612357047377e-05, - "loss": 0.0583, - "step": 31040 - }, - { - "epoch": 2.3058072181791176, - "grad_norm": 2.608368158340454, - "learning_rate": 1.6165156690925292e-05, - "loss": 0.0645, - "step": 31050 - }, - { - "epoch": 2.3065498291994655, - "grad_norm": 1.629696249961853, - "learning_rate": 1.616070102480321e-05, - "loss": 0.0897, - "step": 31060 - }, - { - "epoch": 2.307292440219813, - "grad_norm": 1.6755260229110718, - "learning_rate": 1.6156245358681122e-05, - "loss": 0.065, - "step": 31070 - }, - { - "epoch": 2.3080350512401604, - "grad_norm": 1.018589735031128, - "learning_rate": 1.6151789692559037e-05, - "loss": 0.0527, - "step": 31080 - }, - { - "epoch": 2.308777662260508, - "grad_norm": 0.7608964443206787, - "learning_rate": 1.6147334026436955e-05, - "loss": 0.0697, - "step": 31090 - }, - { - "epoch": 2.3095202732808553, - "grad_norm": 0.854860246181488, - "learning_rate": 1.6142878360314867e-05, - "loss": 0.0657, - "step": 31100 - }, - { - "epoch": 2.310262884301203, - "grad_norm": 1.3979119062423706, - "learning_rate": 1.6138422694192782e-05, - "loss": 0.0776, - "step": 31110 - }, - { - "epoch": 2.3110054953215506, - "grad_norm": 1.7942464351654053, - "learning_rate": 1.6133967028070697e-05, - "loss": 0.0688, - "step": 31120 - }, - { - "epoch": 2.311748106341898, - "grad_norm": 1.9012426137924194, - "learning_rate": 1.6129511361948612e-05, - "loss": 0.0585, - "step": 31130 - }, - { - "epoch": 2.3124907173622455, - "grad_norm": 1.9309673309326172, - "learning_rate": 1.6125055695826527e-05, - "loss": 0.0747, - "step": 31140 - }, - { - "epoch": 2.3132333283825934, - "grad_norm": 1.511763572692871, - "learning_rate": 1.612060002970444e-05, - "loss": 0.0732, - "step": 31150 - }, - { - "epoch": 2.313975939402941, - "grad_norm": 2.341627836227417, - "learning_rate": 1.6116144363582357e-05, - "loss": 0.0883, - "step": 31160 - }, - { - "epoch": 2.3147185504232883, - "grad_norm": 2.5105557441711426, - "learning_rate": 1.6111688697460272e-05, - "loss": 0.0588, - "step": 31170 - }, - { - "epoch": 2.315461161443636, - "grad_norm": 1.0133614540100098, - "learning_rate": 1.6107233031338184e-05, - "loss": 0.0887, - "step": 31180 - }, - { - "epoch": 2.3162037724639832, - "grad_norm": 5.659106254577637, - "learning_rate": 1.6102777365216102e-05, - "loss": 0.0934, - "step": 31190 - }, - { - "epoch": 2.316946383484331, - "grad_norm": 0.734591543674469, - "learning_rate": 1.6098321699094017e-05, - "loss": 0.0798, - "step": 31200 - }, - { - "epoch": 2.3176889945046786, - "grad_norm": 2.1692800521850586, - "learning_rate": 1.609386603297193e-05, - "loss": 0.0662, - "step": 31210 - }, - { - "epoch": 2.318431605525026, - "grad_norm": 0.6351516842842102, - "learning_rate": 1.6089410366849844e-05, - "loss": 0.0478, - "step": 31220 - }, - { - "epoch": 2.3191742165453735, - "grad_norm": 1.4886484146118164, - "learning_rate": 1.6084954700727762e-05, - "loss": 0.0698, - "step": 31230 - }, - { - "epoch": 2.319916827565721, - "grad_norm": 0.3388558626174927, - "learning_rate": 1.6080499034605674e-05, - "loss": 0.0557, - "step": 31240 - }, - { - "epoch": 2.320659438586069, - "grad_norm": 0.2938145697116852, - "learning_rate": 1.607604336848359e-05, - "loss": 0.0889, - "step": 31250 - }, - { - "epoch": 2.3214020496064163, - "grad_norm": 2.3439390659332275, - "learning_rate": 1.6071587702361504e-05, - "loss": 0.064, - "step": 31260 - }, - { - "epoch": 2.3221446606267637, - "grad_norm": 1.6882779598236084, - "learning_rate": 1.606713203623942e-05, - "loss": 0.0456, - "step": 31270 - }, - { - "epoch": 2.322887271647111, - "grad_norm": 1.6335844993591309, - "learning_rate": 1.6062676370117334e-05, - "loss": 0.0528, - "step": 31280 - }, - { - "epoch": 2.3236298826674586, - "grad_norm": 1.1933094263076782, - "learning_rate": 1.6058220703995245e-05, - "loss": 0.0913, - "step": 31290 - }, - { - "epoch": 2.3243724936878065, - "grad_norm": 2.4354095458984375, - "learning_rate": 1.6053765037873164e-05, - "loss": 0.0795, - "step": 31300 - }, - { - "epoch": 2.325115104708154, - "grad_norm": 1.5631943941116333, - "learning_rate": 1.604930937175108e-05, - "loss": 0.0792, - "step": 31310 - }, - { - "epoch": 2.3258577157285014, - "grad_norm": 2.0377357006073, - "learning_rate": 1.604485370562899e-05, - "loss": 0.0836, - "step": 31320 - }, - { - "epoch": 2.326600326748849, - "grad_norm": 2.2633230686187744, - "learning_rate": 1.604039803950691e-05, - "loss": 0.0778, - "step": 31330 - }, - { - "epoch": 2.3273429377691963, - "grad_norm": 1.7147982120513916, - "learning_rate": 1.603594237338482e-05, - "loss": 0.0757, - "step": 31340 - }, - { - "epoch": 2.328085548789544, - "grad_norm": 2.1653573513031006, - "learning_rate": 1.6031486707262735e-05, - "loss": 0.0769, - "step": 31350 - }, - { - "epoch": 2.3288281598098917, - "grad_norm": 1.5791271924972534, - "learning_rate": 1.602703104114065e-05, - "loss": 0.0756, - "step": 31360 - }, - { - "epoch": 2.329570770830239, - "grad_norm": 2.118759870529175, - "learning_rate": 1.6022575375018565e-05, - "loss": 0.0939, - "step": 31370 - }, - { - "epoch": 2.3303133818505866, - "grad_norm": 1.0190156698226929, - "learning_rate": 1.601811970889648e-05, - "loss": 0.0663, - "step": 31380 - }, - { - "epoch": 2.331055992870934, - "grad_norm": 1.578240156173706, - "learning_rate": 1.6013664042774395e-05, - "loss": 0.0899, - "step": 31390 - }, - { - "epoch": 2.331798603891282, - "grad_norm": 1.2670795917510986, - "learning_rate": 1.600920837665231e-05, - "loss": 0.056, - "step": 31400 - }, - { - "epoch": 2.3325412149116294, - "grad_norm": 0.6794329285621643, - "learning_rate": 1.6004752710530225e-05, - "loss": 0.0792, - "step": 31410 - }, - { - "epoch": 2.333283825931977, - "grad_norm": 1.2964015007019043, - "learning_rate": 1.600029704440814e-05, - "loss": 0.071, - "step": 31420 - }, - { - "epoch": 2.3340264369523243, - "grad_norm": 1.2982021570205688, - "learning_rate": 1.5995841378286055e-05, - "loss": 0.0841, - "step": 31430 - }, - { - "epoch": 2.3347690479726717, - "grad_norm": 2.294980049133301, - "learning_rate": 1.599138571216397e-05, - "loss": 0.0691, - "step": 31440 - }, - { - "epoch": 2.3355116589930196, - "grad_norm": 1.0897853374481201, - "learning_rate": 1.598693004604188e-05, - "loss": 0.0898, - "step": 31450 - }, - { - "epoch": 2.336254270013367, - "grad_norm": 2.2197341918945312, - "learning_rate": 1.5982474379919797e-05, - "loss": 0.0478, - "step": 31460 - }, - { - "epoch": 2.3369968810337145, - "grad_norm": 2.2933666706085205, - "learning_rate": 1.5978018713797715e-05, - "loss": 0.039, - "step": 31470 - }, - { - "epoch": 2.337739492054062, - "grad_norm": 2.1594624519348145, - "learning_rate": 1.5973563047675627e-05, - "loss": 0.0615, - "step": 31480 - }, - { - "epoch": 2.3384821030744094, - "grad_norm": 0.8612133264541626, - "learning_rate": 1.596910738155354e-05, - "loss": 0.0652, - "step": 31490 - }, - { - "epoch": 2.3392247140947573, - "grad_norm": 1.2774549722671509, - "learning_rate": 1.596465171543146e-05, - "loss": 0.0666, - "step": 31500 - }, - { - "epoch": 2.3399673251151047, - "grad_norm": 0.8495298027992249, - "learning_rate": 1.596019604930937e-05, - "loss": 0.0859, - "step": 31510 - }, - { - "epoch": 2.340709936135452, - "grad_norm": 0.653118908405304, - "learning_rate": 1.5955740383187287e-05, - "loss": 0.0565, - "step": 31520 - }, - { - "epoch": 2.3414525471557996, - "grad_norm": 0.9799015522003174, - "learning_rate": 1.59512847170652e-05, - "loss": 0.0624, - "step": 31530 - }, - { - "epoch": 2.342195158176147, - "grad_norm": 0.8276538848876953, - "learning_rate": 1.5946829050943117e-05, - "loss": 0.0819, - "step": 31540 - }, - { - "epoch": 2.342937769196495, - "grad_norm": 2.8302557468414307, - "learning_rate": 1.594237338482103e-05, - "loss": 0.0728, - "step": 31550 - }, - { - "epoch": 2.3436803802168424, - "grad_norm": 2.348175287246704, - "learning_rate": 1.5937917718698943e-05, - "loss": 0.0857, - "step": 31560 - }, - { - "epoch": 2.34442299123719, - "grad_norm": 1.6633504629135132, - "learning_rate": 1.593346205257686e-05, - "loss": 0.0724, - "step": 31570 - }, - { - "epoch": 2.3451656022575373, - "grad_norm": 0.9694968461990356, - "learning_rate": 1.5929006386454777e-05, - "loss": 0.0795, - "step": 31580 - }, - { - "epoch": 2.3459082132778852, - "grad_norm": 2.0099871158599854, - "learning_rate": 1.5924550720332688e-05, - "loss": 0.0568, - "step": 31590 - }, - { - "epoch": 2.3466508242982327, - "grad_norm": 1.2258661985397339, - "learning_rate": 1.5920095054210607e-05, - "loss": 0.0553, - "step": 31600 - }, - { - "epoch": 2.34739343531858, - "grad_norm": 1.5101720094680786, - "learning_rate": 1.591563938808852e-05, - "loss": 0.0631, - "step": 31610 - }, - { - "epoch": 2.3481360463389276, - "grad_norm": 1.2098814249038696, - "learning_rate": 1.5911183721966433e-05, - "loss": 0.0588, - "step": 31620 - }, - { - "epoch": 2.348878657359275, - "grad_norm": 0.48147693276405334, - "learning_rate": 1.5906728055844348e-05, - "loss": 0.0648, - "step": 31630 - }, - { - "epoch": 2.349621268379623, - "grad_norm": 1.1367077827453613, - "learning_rate": 1.5902272389722267e-05, - "loss": 0.0493, - "step": 31640 - }, - { - "epoch": 2.3503638793999704, - "grad_norm": 1.2265082597732544, - "learning_rate": 1.5897816723600178e-05, - "loss": 0.067, - "step": 31650 - }, - { - "epoch": 2.351106490420318, - "grad_norm": 3.4290337562561035, - "learning_rate": 1.5893361057478093e-05, - "loss": 0.0638, - "step": 31660 - }, - { - "epoch": 2.3518491014406653, - "grad_norm": 1.1333622932434082, - "learning_rate": 1.5888905391356008e-05, - "loss": 0.0814, - "step": 31670 - }, - { - "epoch": 2.3525917124610127, - "grad_norm": 1.8252911567687988, - "learning_rate": 1.5884449725233923e-05, - "loss": 0.0561, - "step": 31680 - }, - { - "epoch": 2.3533343234813606, - "grad_norm": 0.9480688571929932, - "learning_rate": 1.5879994059111838e-05, - "loss": 0.0603, - "step": 31690 - }, - { - "epoch": 2.354076934501708, - "grad_norm": 1.576391339302063, - "learning_rate": 1.587553839298975e-05, - "loss": 0.0568, - "step": 31700 - }, - { - "epoch": 2.3548195455220555, - "grad_norm": 0.9777421951293945, - "learning_rate": 1.5871082726867668e-05, - "loss": 0.0481, - "step": 31710 - }, - { - "epoch": 2.355562156542403, - "grad_norm": 1.60342538356781, - "learning_rate": 1.5866627060745583e-05, - "loss": 0.0618, - "step": 31720 - }, - { - "epoch": 2.356304767562751, - "grad_norm": 1.1398688554763794, - "learning_rate": 1.5862171394623495e-05, - "loss": 0.0554, - "step": 31730 - }, - { - "epoch": 2.3570473785830983, - "grad_norm": 0.9877955913543701, - "learning_rate": 1.5857715728501413e-05, - "loss": 0.0851, - "step": 31740 - }, - { - "epoch": 2.3577899896034458, - "grad_norm": 1.1725736856460571, - "learning_rate": 1.5853260062379325e-05, - "loss": 0.0643, - "step": 31750 - }, - { - "epoch": 2.358532600623793, - "grad_norm": 1.4532411098480225, - "learning_rate": 1.584880439625724e-05, - "loss": 0.057, - "step": 31760 - }, - { - "epoch": 2.3592752116441407, - "grad_norm": 1.950925350189209, - "learning_rate": 1.5844348730135155e-05, - "loss": 0.039, - "step": 31770 - }, - { - "epoch": 2.3600178226644886, - "grad_norm": 4.71244478225708, - "learning_rate": 1.583989306401307e-05, - "loss": 0.0499, - "step": 31780 - }, - { - "epoch": 2.360760433684836, - "grad_norm": 3.2281157970428467, - "learning_rate": 1.5835437397890985e-05, - "loss": 0.0861, - "step": 31790 - }, - { - "epoch": 2.3615030447051835, - "grad_norm": 1.568460464477539, - "learning_rate": 1.58309817317689e-05, - "loss": 0.0689, - "step": 31800 - }, - { - "epoch": 2.362245655725531, - "grad_norm": 0.7501446008682251, - "learning_rate": 1.5826526065646815e-05, - "loss": 0.0754, - "step": 31810 - }, - { - "epoch": 2.3629882667458784, - "grad_norm": 1.1231635808944702, - "learning_rate": 1.582207039952473e-05, - "loss": 0.088, - "step": 31820 - }, - { - "epoch": 2.3637308777662263, - "grad_norm": 1.5443603992462158, - "learning_rate": 1.5817614733402645e-05, - "loss": 0.0677, - "step": 31830 - }, - { - "epoch": 2.3644734887865737, - "grad_norm": 0.9648088216781616, - "learning_rate": 1.581315906728056e-05, - "loss": 0.0883, - "step": 31840 - }, - { - "epoch": 2.365216099806921, - "grad_norm": 1.1336426734924316, - "learning_rate": 1.5808703401158475e-05, - "loss": 0.0545, - "step": 31850 - }, - { - "epoch": 2.3659587108272686, - "grad_norm": 1.6498336791992188, - "learning_rate": 1.5804247735036386e-05, - "loss": 0.067, - "step": 31860 - }, - { - "epoch": 2.366701321847616, - "grad_norm": 2.2792677879333496, - "learning_rate": 1.57997920689143e-05, - "loss": 0.0773, - "step": 31870 - }, - { - "epoch": 2.367443932867964, - "grad_norm": 0.7917251586914062, - "learning_rate": 1.579533640279222e-05, - "loss": 0.0453, - "step": 31880 - }, - { - "epoch": 2.3681865438883114, - "grad_norm": 1.3908026218414307, - "learning_rate": 1.579088073667013e-05, - "loss": 0.0596, - "step": 31890 - }, - { - "epoch": 2.368929154908659, - "grad_norm": 0.8732894659042358, - "learning_rate": 1.5786425070548046e-05, - "loss": 0.0487, - "step": 31900 - }, - { - "epoch": 2.3696717659290063, - "grad_norm": 0.30048489570617676, - "learning_rate": 1.5781969404425965e-05, - "loss": 0.0723, - "step": 31910 - }, - { - "epoch": 2.3704143769493538, - "grad_norm": 1.2894927263259888, - "learning_rate": 1.5777513738303876e-05, - "loss": 0.0716, - "step": 31920 - }, - { - "epoch": 2.3711569879697016, - "grad_norm": 0.718590259552002, - "learning_rate": 1.577305807218179e-05, - "loss": 0.0684, - "step": 31930 - }, - { - "epoch": 2.371899598990049, - "grad_norm": 1.6592215299606323, - "learning_rate": 1.5768602406059706e-05, - "loss": 0.0799, - "step": 31940 - }, - { - "epoch": 2.3726422100103965, - "grad_norm": 1.2301288843154907, - "learning_rate": 1.576414673993762e-05, - "loss": 0.0526, - "step": 31950 - }, - { - "epoch": 2.373384821030744, - "grad_norm": 1.742710828781128, - "learning_rate": 1.5759691073815536e-05, - "loss": 0.051, - "step": 31960 - }, - { - "epoch": 2.3741274320510914, - "grad_norm": 0.8959303498268127, - "learning_rate": 1.5755235407693448e-05, - "loss": 0.0439, - "step": 31970 - }, - { - "epoch": 2.3748700430714393, - "grad_norm": 2.8201303482055664, - "learning_rate": 1.5750779741571366e-05, - "loss": 0.0608, - "step": 31980 - }, - { - "epoch": 2.375612654091787, - "grad_norm": 1.701446294784546, - "learning_rate": 1.574632407544928e-05, - "loss": 0.0872, - "step": 31990 - }, - { - "epoch": 2.3763552651121342, - "grad_norm": 0.7672728300094604, - "learning_rate": 1.5741868409327193e-05, - "loss": 0.0975, - "step": 32000 - }, - { - "epoch": 2.3770978761324817, - "grad_norm": 2.049514055252075, - "learning_rate": 1.573741274320511e-05, - "loss": 0.0846, - "step": 32010 - }, - { - "epoch": 2.377840487152829, - "grad_norm": 1.193021535873413, - "learning_rate": 1.5732957077083026e-05, - "loss": 0.0705, - "step": 32020 - }, - { - "epoch": 2.378583098173177, - "grad_norm": 2.212050676345825, - "learning_rate": 1.5728501410960938e-05, - "loss": 0.0715, - "step": 32030 - }, - { - "epoch": 2.3793257091935245, - "grad_norm": 1.0855233669281006, - "learning_rate": 1.5724045744838853e-05, - "loss": 0.0644, - "step": 32040 - }, - { - "epoch": 2.380068320213872, - "grad_norm": 2.6349453926086426, - "learning_rate": 1.571959007871677e-05, - "loss": 0.0604, - "step": 32050 - }, - { - "epoch": 2.3808109312342194, - "grad_norm": 0.948853611946106, - "learning_rate": 1.5715134412594683e-05, - "loss": 0.0721, - "step": 32060 - }, - { - "epoch": 2.381553542254567, - "grad_norm": 1.4371938705444336, - "learning_rate": 1.5710678746472598e-05, - "loss": 0.0499, - "step": 32070 - }, - { - "epoch": 2.3822961532749147, - "grad_norm": 0.6612533926963806, - "learning_rate": 1.5706223080350513e-05, - "loss": 0.0827, - "step": 32080 - }, - { - "epoch": 2.383038764295262, - "grad_norm": 3.743394136428833, - "learning_rate": 1.5701767414228428e-05, - "loss": 0.0729, - "step": 32090 - }, - { - "epoch": 2.3837813753156096, - "grad_norm": 1.6435579061508179, - "learning_rate": 1.5697311748106343e-05, - "loss": 0.0618, - "step": 32100 - }, - { - "epoch": 2.384523986335957, - "grad_norm": 2.4289140701293945, - "learning_rate": 1.5692856081984254e-05, - "loss": 0.0822, - "step": 32110 - }, - { - "epoch": 2.3852665973563045, - "grad_norm": 0.4796588122844696, - "learning_rate": 1.5688400415862173e-05, - "loss": 0.0544, - "step": 32120 - }, - { - "epoch": 2.3860092083766524, - "grad_norm": 2.2078115940093994, - "learning_rate": 1.5683944749740088e-05, - "loss": 0.0714, - "step": 32130 - }, - { - "epoch": 2.386751819397, - "grad_norm": 1.0921403169631958, - "learning_rate": 1.5679489083618e-05, - "loss": 0.0562, - "step": 32140 - }, - { - "epoch": 2.3874944304173473, - "grad_norm": 2.1888418197631836, - "learning_rate": 1.5675033417495918e-05, - "loss": 0.0794, - "step": 32150 - }, - { - "epoch": 2.3882370414376948, - "grad_norm": 2.4097537994384766, - "learning_rate": 1.5670577751373833e-05, - "loss": 0.1025, - "step": 32160 - }, - { - "epoch": 2.3889796524580427, - "grad_norm": 2.782663583755493, - "learning_rate": 1.5666122085251744e-05, - "loss": 0.0667, - "step": 32170 - }, - { - "epoch": 2.38972226347839, - "grad_norm": 2.659151554107666, - "learning_rate": 1.5661666419129663e-05, - "loss": 0.0515, - "step": 32180 - }, - { - "epoch": 2.3904648744987376, - "grad_norm": 1.7082628011703491, - "learning_rate": 1.5657210753007574e-05, - "loss": 0.1092, - "step": 32190 - }, - { - "epoch": 2.391207485519085, - "grad_norm": 1.8074676990509033, - "learning_rate": 1.565275508688549e-05, - "loss": 0.0675, - "step": 32200 - }, - { - "epoch": 2.3919500965394325, - "grad_norm": 1.0085229873657227, - "learning_rate": 1.5648299420763404e-05, - "loss": 0.1148, - "step": 32210 - }, - { - "epoch": 2.3926927075597804, - "grad_norm": 4.384804725646973, - "learning_rate": 1.564384375464132e-05, - "loss": 0.0551, - "step": 32220 - }, - { - "epoch": 2.393435318580128, - "grad_norm": 1.1402451992034912, - "learning_rate": 1.5639388088519234e-05, - "loss": 0.0603, - "step": 32230 - }, - { - "epoch": 2.3941779296004753, - "grad_norm": 0.9392279386520386, - "learning_rate": 1.563493242239715e-05, - "loss": 0.0836, - "step": 32240 - }, - { - "epoch": 2.3949205406208227, - "grad_norm": 1.5614676475524902, - "learning_rate": 1.5630476756275064e-05, - "loss": 0.0956, - "step": 32250 - }, - { - "epoch": 2.39566315164117, - "grad_norm": 1.321561574935913, - "learning_rate": 1.562602109015298e-05, - "loss": 0.0715, - "step": 32260 - }, - { - "epoch": 2.396405762661518, - "grad_norm": 0.910446047782898, - "learning_rate": 1.562156542403089e-05, - "loss": 0.0434, - "step": 32270 - }, - { - "epoch": 2.3971483736818655, - "grad_norm": 0.8245983123779297, - "learning_rate": 1.5617109757908806e-05, - "loss": 0.076, - "step": 32280 - }, - { - "epoch": 2.397890984702213, - "grad_norm": 1.7808678150177002, - "learning_rate": 1.5612654091786724e-05, - "loss": 0.0731, - "step": 32290 - }, - { - "epoch": 2.3986335957225604, - "grad_norm": 2.991241455078125, - "learning_rate": 1.5608198425664636e-05, - "loss": 0.087, - "step": 32300 - }, - { - "epoch": 2.3993762067429083, - "grad_norm": 2.8807268142700195, - "learning_rate": 1.560374275954255e-05, - "loss": 0.0876, - "step": 32310 - }, - { - "epoch": 2.4001188177632558, - "grad_norm": 0.5517368316650391, - "learning_rate": 1.559928709342047e-05, - "loss": 0.0773, - "step": 32320 - }, - { - "epoch": 2.400861428783603, - "grad_norm": 1.6342768669128418, - "learning_rate": 1.559483142729838e-05, - "loss": 0.0609, - "step": 32330 - }, - { - "epoch": 2.4016040398039507, - "grad_norm": 5.523561954498291, - "learning_rate": 1.5590375761176296e-05, - "loss": 0.0431, - "step": 32340 - }, - { - "epoch": 2.402346650824298, - "grad_norm": 0.7796204090118408, - "learning_rate": 1.558592009505421e-05, - "loss": 0.0905, - "step": 32350 - }, - { - "epoch": 2.403089261844646, - "grad_norm": 2.9090576171875, - "learning_rate": 1.5581464428932126e-05, - "loss": 0.0998, - "step": 32360 - }, - { - "epoch": 2.4038318728649934, - "grad_norm": 0.7734149098396301, - "learning_rate": 1.557700876281004e-05, - "loss": 0.0585, - "step": 32370 - }, - { - "epoch": 2.404574483885341, - "grad_norm": 1.852062702178955, - "learning_rate": 1.5572553096687952e-05, - "loss": 0.1048, - "step": 32380 - }, - { - "epoch": 2.4053170949056883, - "grad_norm": 0.8608161807060242, - "learning_rate": 1.556809743056587e-05, - "loss": 0.0548, - "step": 32390 - }, - { - "epoch": 2.406059705926036, - "grad_norm": 3.039947032928467, - "learning_rate": 1.5563641764443786e-05, - "loss": 0.0854, - "step": 32400 - }, - { - "epoch": 2.4068023169463837, - "grad_norm": 2.2535059452056885, - "learning_rate": 1.5559186098321697e-05, - "loss": 0.0921, - "step": 32410 - }, - { - "epoch": 2.407544927966731, - "grad_norm": 3.556171417236328, - "learning_rate": 1.5554730432199616e-05, - "loss": 0.0632, - "step": 32420 - }, - { - "epoch": 2.4082875389870786, - "grad_norm": 2.6815476417541504, - "learning_rate": 1.555027476607753e-05, - "loss": 0.0689, - "step": 32430 - }, - { - "epoch": 2.409030150007426, - "grad_norm": 0.38437405228614807, - "learning_rate": 1.5545819099955442e-05, - "loss": 0.0491, - "step": 32440 - }, - { - "epoch": 2.4097727610277735, - "grad_norm": 1.429911494255066, - "learning_rate": 1.5541363433833357e-05, - "loss": 0.0423, - "step": 32450 - }, - { - "epoch": 2.4105153720481214, - "grad_norm": 1.3753142356872559, - "learning_rate": 1.5536907767711276e-05, - "loss": 0.0816, - "step": 32460 - }, - { - "epoch": 2.411257983068469, - "grad_norm": 1.1020511388778687, - "learning_rate": 1.5532452101589187e-05, - "loss": 0.0403, - "step": 32470 - }, - { - "epoch": 2.4120005940888163, - "grad_norm": 3.2163803577423096, - "learning_rate": 1.5527996435467102e-05, - "loss": 0.09, - "step": 32480 - }, - { - "epoch": 2.4127432051091637, - "grad_norm": 0.8695268630981445, - "learning_rate": 1.5523540769345017e-05, - "loss": 0.0565, - "step": 32490 - }, - { - "epoch": 2.413485816129511, - "grad_norm": 0.9537298083305359, - "learning_rate": 1.5519085103222932e-05, - "loss": 0.0913, - "step": 32500 - }, - { - "epoch": 2.414228427149859, - "grad_norm": 1.1136554479599, - "learning_rate": 1.5514629437100847e-05, - "loss": 0.0829, - "step": 32510 - }, - { - "epoch": 2.4149710381702065, - "grad_norm": 1.2654924392700195, - "learning_rate": 1.551017377097876e-05, - "loss": 0.0915, - "step": 32520 - }, - { - "epoch": 2.415713649190554, - "grad_norm": 3.5749497413635254, - "learning_rate": 1.5505718104856677e-05, - "loss": 0.0921, - "step": 32530 - }, - { - "epoch": 2.4164562602109014, - "grad_norm": 1.6017907857894897, - "learning_rate": 1.5501262438734592e-05, - "loss": 0.0645, - "step": 32540 - }, - { - "epoch": 2.417198871231249, - "grad_norm": 2.2795519828796387, - "learning_rate": 1.5496806772612504e-05, - "loss": 0.0624, - "step": 32550 - }, - { - "epoch": 2.4179414822515968, - "grad_norm": 1.48874831199646, - "learning_rate": 1.5492351106490422e-05, - "loss": 0.0667, - "step": 32560 - }, - { - "epoch": 2.4186840932719442, - "grad_norm": 3.069807529449463, - "learning_rate": 1.5487895440368337e-05, - "loss": 0.0767, - "step": 32570 - }, - { - "epoch": 2.4194267042922917, - "grad_norm": 0.8146727085113525, - "learning_rate": 1.548343977424625e-05, - "loss": 0.09, - "step": 32580 - }, - { - "epoch": 2.420169315312639, - "grad_norm": 2.0153746604919434, - "learning_rate": 1.5478984108124167e-05, - "loss": 0.0834, - "step": 32590 - }, - { - "epoch": 2.4209119263329866, - "grad_norm": 1.9810839891433716, - "learning_rate": 1.547452844200208e-05, - "loss": 0.0843, - "step": 32600 - }, - { - "epoch": 2.4216545373533345, - "grad_norm": 1.213164210319519, - "learning_rate": 1.5470072775879994e-05, - "loss": 0.0798, - "step": 32610 - }, - { - "epoch": 2.422397148373682, - "grad_norm": 0.5485877394676208, - "learning_rate": 1.546561710975791e-05, - "loss": 0.0396, - "step": 32620 - }, - { - "epoch": 2.4231397593940294, - "grad_norm": 1.8385777473449707, - "learning_rate": 1.5461161443635824e-05, - "loss": 0.0538, - "step": 32630 - }, - { - "epoch": 2.423882370414377, - "grad_norm": 2.222101926803589, - "learning_rate": 1.545670577751374e-05, - "loss": 0.0494, - "step": 32640 - }, - { - "epoch": 2.4246249814347243, - "grad_norm": 0.7490872740745544, - "learning_rate": 1.5452250111391654e-05, - "loss": 0.0645, - "step": 32650 - }, - { - "epoch": 2.425367592455072, - "grad_norm": 1.1471827030181885, - "learning_rate": 1.544779444526957e-05, - "loss": 0.0842, - "step": 32660 - }, - { - "epoch": 2.4261102034754196, - "grad_norm": 1.6201061010360718, - "learning_rate": 1.5443338779147484e-05, - "loss": 0.0834, - "step": 32670 - }, - { - "epoch": 2.426852814495767, - "grad_norm": 1.3933659791946411, - "learning_rate": 1.54388831130254e-05, - "loss": 0.0674, - "step": 32680 - }, - { - "epoch": 2.4275954255161145, - "grad_norm": 1.0082453489303589, - "learning_rate": 1.543442744690331e-05, - "loss": 0.043, - "step": 32690 - }, - { - "epoch": 2.428338036536462, - "grad_norm": 0.7856671214103699, - "learning_rate": 1.542997178078123e-05, - "loss": 0.048, - "step": 32700 - }, - { - "epoch": 2.42908064755681, - "grad_norm": 0.4259887635707855, - "learning_rate": 1.542551611465914e-05, - "loss": 0.0641, - "step": 32710 - }, - { - "epoch": 2.4298232585771573, - "grad_norm": 0.42960453033447266, - "learning_rate": 1.5421060448537055e-05, - "loss": 0.0755, - "step": 32720 - }, - { - "epoch": 2.4305658695975048, - "grad_norm": 3.8865599632263184, - "learning_rate": 1.5416604782414974e-05, - "loss": 0.0687, - "step": 32730 - }, - { - "epoch": 2.431308480617852, - "grad_norm": 3.587674140930176, - "learning_rate": 1.5412149116292885e-05, - "loss": 0.0657, - "step": 32740 - }, - { - "epoch": 2.4320510916382, - "grad_norm": 1.8166769742965698, - "learning_rate": 1.54076934501708e-05, - "loss": 0.0776, - "step": 32750 - }, - { - "epoch": 2.4327937026585476, - "grad_norm": 2.718137264251709, - "learning_rate": 1.5403237784048715e-05, - "loss": 0.0761, - "step": 32760 - }, - { - "epoch": 2.433536313678895, - "grad_norm": 1.776787281036377, - "learning_rate": 1.539878211792663e-05, - "loss": 0.0838, - "step": 32770 - }, - { - "epoch": 2.4342789246992425, - "grad_norm": 0.9153753519058228, - "learning_rate": 1.5394326451804545e-05, - "loss": 0.065, - "step": 32780 - }, - { - "epoch": 2.43502153571959, - "grad_norm": 1.0639044046401978, - "learning_rate": 1.5389870785682457e-05, - "loss": 0.0608, - "step": 32790 - }, - { - "epoch": 2.435764146739938, - "grad_norm": 1.5037258863449097, - "learning_rate": 1.5385415119560375e-05, - "loss": 0.067, - "step": 32800 - }, - { - "epoch": 2.4365067577602852, - "grad_norm": 1.892593502998352, - "learning_rate": 1.538095945343829e-05, - "loss": 0.061, - "step": 32810 - }, - { - "epoch": 2.4372493687806327, - "grad_norm": 3.2514467239379883, - "learning_rate": 1.5376503787316202e-05, - "loss": 0.0801, - "step": 32820 - }, - { - "epoch": 2.43799197980098, - "grad_norm": 1.7820117473602295, - "learning_rate": 1.537204812119412e-05, - "loss": 0.0887, - "step": 32830 - }, - { - "epoch": 2.4387345908213276, - "grad_norm": 1.159784197807312, - "learning_rate": 1.5367592455072035e-05, - "loss": 0.0719, - "step": 32840 - }, - { - "epoch": 2.4394772018416755, - "grad_norm": 2.0374605655670166, - "learning_rate": 1.5363136788949947e-05, - "loss": 0.0854, - "step": 32850 - }, - { - "epoch": 2.440219812862023, - "grad_norm": 2.429708957672119, - "learning_rate": 1.5358681122827862e-05, - "loss": 0.0552, - "step": 32860 - }, - { - "epoch": 2.4409624238823704, - "grad_norm": 2.368227243423462, - "learning_rate": 1.535422545670578e-05, - "loss": 0.07, - "step": 32870 - }, - { - "epoch": 2.441705034902718, - "grad_norm": 0.7669575214385986, - "learning_rate": 1.5349769790583692e-05, - "loss": 0.0622, - "step": 32880 - }, - { - "epoch": 2.4424476459230657, - "grad_norm": 0.9094696044921875, - "learning_rate": 1.5345314124461607e-05, - "loss": 0.0522, - "step": 32890 - }, - { - "epoch": 2.443190256943413, - "grad_norm": 1.134655237197876, - "learning_rate": 1.5340858458339522e-05, - "loss": 0.0789, - "step": 32900 - }, - { - "epoch": 2.4439328679637606, - "grad_norm": 1.5787122249603271, - "learning_rate": 1.5336402792217437e-05, - "loss": 0.0823, - "step": 32910 - }, - { - "epoch": 2.444675478984108, - "grad_norm": 1.5267248153686523, - "learning_rate": 1.5331947126095352e-05, - "loss": 0.0557, - "step": 32920 - }, - { - "epoch": 2.4454180900044555, - "grad_norm": 1.0408943891525269, - "learning_rate": 1.5327491459973264e-05, - "loss": 0.0481, - "step": 32930 - }, - { - "epoch": 2.4461607010248034, - "grad_norm": 0.36609914898872375, - "learning_rate": 1.5323035793851182e-05, - "loss": 0.0744, - "step": 32940 - }, - { - "epoch": 2.446903312045151, - "grad_norm": 0.8200104832649231, - "learning_rate": 1.5318580127729097e-05, - "loss": 0.087, - "step": 32950 - }, - { - "epoch": 2.4476459230654983, - "grad_norm": 1.5445940494537354, - "learning_rate": 1.531412446160701e-05, - "loss": 0.0407, - "step": 32960 - }, - { - "epoch": 2.448388534085846, - "grad_norm": 1.621883749961853, - "learning_rate": 1.5309668795484927e-05, - "loss": 0.0679, - "step": 32970 - }, - { - "epoch": 2.4491311451061932, - "grad_norm": 0.8579855561256409, - "learning_rate": 1.5305213129362842e-05, - "loss": 0.0799, - "step": 32980 - }, - { - "epoch": 2.449873756126541, - "grad_norm": 1.6563255786895752, - "learning_rate": 1.5300757463240754e-05, - "loss": 0.0634, - "step": 32990 - }, - { - "epoch": 2.4506163671468886, - "grad_norm": 0.7070105671882629, - "learning_rate": 1.5296301797118672e-05, - "loss": 0.0956, - "step": 33000 - }, - { - "epoch": 2.451358978167236, - "grad_norm": 2.0392887592315674, - "learning_rate": 1.5291846130996584e-05, - "loss": 0.0828, - "step": 33010 - }, - { - "epoch": 2.4521015891875835, - "grad_norm": 1.0714646577835083, - "learning_rate": 1.52873904648745e-05, - "loss": 0.0737, - "step": 33020 - }, - { - "epoch": 2.452844200207931, - "grad_norm": 1.0105502605438232, - "learning_rate": 1.5282934798752413e-05, - "loss": 0.0637, - "step": 33030 - }, - { - "epoch": 2.453586811228279, - "grad_norm": 2.0838091373443604, - "learning_rate": 1.527847913263033e-05, - "loss": 0.0807, - "step": 33040 - }, - { - "epoch": 2.4543294222486263, - "grad_norm": 3.914405107498169, - "learning_rate": 1.5274023466508243e-05, - "loss": 0.0905, - "step": 33050 - }, - { - "epoch": 2.4550720332689737, - "grad_norm": 0.8861109018325806, - "learning_rate": 1.526956780038616e-05, - "loss": 0.0531, - "step": 33060 - }, - { - "epoch": 2.455814644289321, - "grad_norm": 1.2929595708847046, - "learning_rate": 1.5265112134264073e-05, - "loss": 0.068, - "step": 33070 - }, - { - "epoch": 2.4565572553096686, - "grad_norm": 1.6953091621398926, - "learning_rate": 1.526065646814199e-05, - "loss": 0.0674, - "step": 33080 - }, - { - "epoch": 2.4572998663300165, - "grad_norm": 1.0414247512817383, - "learning_rate": 1.5256200802019903e-05, - "loss": 0.0852, - "step": 33090 - }, - { - "epoch": 2.458042477350364, - "grad_norm": 1.8523513078689575, - "learning_rate": 1.5251745135897817e-05, - "loss": 0.0707, - "step": 33100 - }, - { - "epoch": 2.4587850883707114, - "grad_norm": 1.1955831050872803, - "learning_rate": 1.5247289469775732e-05, - "loss": 0.0472, - "step": 33110 - }, - { - "epoch": 2.459527699391059, - "grad_norm": 2.124166250228882, - "learning_rate": 1.5242833803653645e-05, - "loss": 0.0604, - "step": 33120 - }, - { - "epoch": 2.4602703104114063, - "grad_norm": 2.213921070098877, - "learning_rate": 1.5238378137531562e-05, - "loss": 0.0677, - "step": 33130 - }, - { - "epoch": 2.461012921431754, - "grad_norm": 0.6650950312614441, - "learning_rate": 1.5233922471409477e-05, - "loss": 0.0743, - "step": 33140 - }, - { - "epoch": 2.4617555324521017, - "grad_norm": 0.972344160079956, - "learning_rate": 1.522946680528739e-05, - "loss": 0.0534, - "step": 33150 - }, - { - "epoch": 2.462498143472449, - "grad_norm": 0.7517353296279907, - "learning_rate": 1.5225011139165305e-05, - "loss": 0.0736, - "step": 33160 - }, - { - "epoch": 2.4632407544927966, - "grad_norm": 2.8627500534057617, - "learning_rate": 1.5220555473043222e-05, - "loss": 0.0417, - "step": 33170 - }, - { - "epoch": 2.463983365513144, - "grad_norm": 0.2295779585838318, - "learning_rate": 1.5216099806921135e-05, - "loss": 0.0424, - "step": 33180 - }, - { - "epoch": 2.464725976533492, - "grad_norm": 1.0014530420303345, - "learning_rate": 1.521164414079905e-05, - "loss": 0.0509, - "step": 33190 - }, - { - "epoch": 2.4654685875538394, - "grad_norm": 0.8116422295570374, - "learning_rate": 1.5207188474676963e-05, - "loss": 0.0773, - "step": 33200 - }, - { - "epoch": 2.466211198574187, - "grad_norm": 2.368131399154663, - "learning_rate": 1.5202732808554878e-05, - "loss": 0.0643, - "step": 33210 - }, - { - "epoch": 2.4669538095945343, - "grad_norm": 0.573657751083374, - "learning_rate": 1.5198277142432795e-05, - "loss": 0.0482, - "step": 33220 - }, - { - "epoch": 2.4676964206148817, - "grad_norm": 2.1423192024230957, - "learning_rate": 1.5193821476310708e-05, - "loss": 0.0896, - "step": 33230 - }, - { - "epoch": 2.4684390316352296, - "grad_norm": 0.7535884976387024, - "learning_rate": 1.5189365810188623e-05, - "loss": 0.0625, - "step": 33240 - }, - { - "epoch": 2.469181642655577, - "grad_norm": 1.2931323051452637, - "learning_rate": 1.518491014406654e-05, - "loss": 0.053, - "step": 33250 - }, - { - "epoch": 2.4699242536759245, - "grad_norm": 2.7069478034973145, - "learning_rate": 1.5180454477944452e-05, - "loss": 0.0806, - "step": 33260 - }, - { - "epoch": 2.470666864696272, - "grad_norm": 0.8650771379470825, - "learning_rate": 1.5175998811822368e-05, - "loss": 0.0689, - "step": 33270 - }, - { - "epoch": 2.4714094757166194, - "grad_norm": 3.503204345703125, - "learning_rate": 1.5171543145700283e-05, - "loss": 0.0681, - "step": 33280 - }, - { - "epoch": 2.4721520867369673, - "grad_norm": 0.5587957501411438, - "learning_rate": 1.5167087479578197e-05, - "loss": 0.0648, - "step": 33290 - }, - { - "epoch": 2.4728946977573147, - "grad_norm": 2.2347841262817383, - "learning_rate": 1.5162631813456113e-05, - "loss": 0.0624, - "step": 33300 - }, - { - "epoch": 2.473637308777662, - "grad_norm": 3.54778790473938, - "learning_rate": 1.5158176147334025e-05, - "loss": 0.0632, - "step": 33310 - }, - { - "epoch": 2.4743799197980096, - "grad_norm": 1.227449893951416, - "learning_rate": 1.5153720481211942e-05, - "loss": 0.0717, - "step": 33320 - }, - { - "epoch": 2.4751225308183575, - "grad_norm": 1.61305570602417, - "learning_rate": 1.5149264815089857e-05, - "loss": 0.0751, - "step": 33330 - }, - { - "epoch": 2.475865141838705, - "grad_norm": 0.8405054807662964, - "learning_rate": 1.514480914896777e-05, - "loss": 0.0638, - "step": 33340 - }, - { - "epoch": 2.4766077528590524, - "grad_norm": 0.8352612257003784, - "learning_rate": 1.5140353482845687e-05, - "loss": 0.0644, - "step": 33350 - }, - { - "epoch": 2.4773503638794, - "grad_norm": 3.4174530506134033, - "learning_rate": 1.5135897816723602e-05, - "loss": 0.0562, - "step": 33360 - }, - { - "epoch": 2.4780929748997473, - "grad_norm": 1.8341305255889893, - "learning_rate": 1.5131442150601515e-05, - "loss": 0.0558, - "step": 33370 - }, - { - "epoch": 2.4788355859200952, - "grad_norm": 2.0367071628570557, - "learning_rate": 1.512698648447943e-05, - "loss": 0.0765, - "step": 33380 - }, - { - "epoch": 2.4795781969404427, - "grad_norm": 2.6080222129821777, - "learning_rate": 1.5122530818357347e-05, - "loss": 0.0549, - "step": 33390 - }, - { - "epoch": 2.48032080796079, - "grad_norm": 2.060661792755127, - "learning_rate": 1.5118075152235258e-05, - "loss": 0.0727, - "step": 33400 - }, - { - "epoch": 2.4810634189811376, - "grad_norm": 0.36137092113494873, - "learning_rate": 1.5113619486113175e-05, - "loss": 0.0718, - "step": 33410 - }, - { - "epoch": 2.481806030001485, - "grad_norm": 0.67535001039505, - "learning_rate": 1.5109163819991088e-05, - "loss": 0.0842, - "step": 33420 - }, - { - "epoch": 2.482548641021833, - "grad_norm": 1.2539730072021484, - "learning_rate": 1.5104708153869003e-05, - "loss": 0.107, - "step": 33430 - }, - { - "epoch": 2.4832912520421804, - "grad_norm": 1.4725462198257446, - "learning_rate": 1.510025248774692e-05, - "loss": 0.0611, - "step": 33440 - }, - { - "epoch": 2.484033863062528, - "grad_norm": 2.846672534942627, - "learning_rate": 1.5095796821624831e-05, - "loss": 0.1161, - "step": 33450 - }, - { - "epoch": 2.4847764740828753, - "grad_norm": 3.4515435695648193, - "learning_rate": 1.5091341155502748e-05, - "loss": 0.0771, - "step": 33460 - }, - { - "epoch": 2.485519085103223, - "grad_norm": 2.5990917682647705, - "learning_rate": 1.5086885489380665e-05, - "loss": 0.0759, - "step": 33470 - }, - { - "epoch": 2.4862616961235706, - "grad_norm": 0.4841431975364685, - "learning_rate": 1.5082429823258576e-05, - "loss": 0.0585, - "step": 33480 - }, - { - "epoch": 2.487004307143918, - "grad_norm": 1.1416738033294678, - "learning_rate": 1.5077974157136493e-05, - "loss": 0.0593, - "step": 33490 - }, - { - "epoch": 2.4877469181642655, - "grad_norm": 1.2249414920806885, - "learning_rate": 1.5073518491014408e-05, - "loss": 0.079, - "step": 33500 - }, - { - "epoch": 2.488489529184613, - "grad_norm": 2.8479864597320557, - "learning_rate": 1.5069062824892321e-05, - "loss": 0.0688, - "step": 33510 - }, - { - "epoch": 2.489232140204961, - "grad_norm": 0.494975745677948, - "learning_rate": 1.5064607158770236e-05, - "loss": 0.0848, - "step": 33520 - }, - { - "epoch": 2.4899747512253083, - "grad_norm": 0.8648329973220825, - "learning_rate": 1.506015149264815e-05, - "loss": 0.079, - "step": 33530 - }, - { - "epoch": 2.4907173622456558, - "grad_norm": 2.451526641845703, - "learning_rate": 1.5055695826526066e-05, - "loss": 0.0679, - "step": 33540 - }, - { - "epoch": 2.491459973266003, - "grad_norm": 0.8754955530166626, - "learning_rate": 1.5051240160403981e-05, - "loss": 0.0766, - "step": 33550 - }, - { - "epoch": 2.4922025842863507, - "grad_norm": 2.0408942699432373, - "learning_rate": 1.5046784494281895e-05, - "loss": 0.0696, - "step": 33560 - }, - { - "epoch": 2.4929451953066986, - "grad_norm": 3.6392688751220703, - "learning_rate": 1.504232882815981e-05, - "loss": 0.0703, - "step": 33570 - }, - { - "epoch": 2.493687806327046, - "grad_norm": 1.6540186405181885, - "learning_rate": 1.5037873162037726e-05, - "loss": 0.0852, - "step": 33580 - }, - { - "epoch": 2.4944304173473935, - "grad_norm": 1.6503866910934448, - "learning_rate": 1.503341749591564e-05, - "loss": 0.0865, - "step": 33590 - }, - { - "epoch": 2.495173028367741, - "grad_norm": 0.8147965669631958, - "learning_rate": 1.5028961829793555e-05, - "loss": 0.0757, - "step": 33600 - }, - { - "epoch": 2.4959156393880884, - "grad_norm": 2.5200459957122803, - "learning_rate": 1.5024506163671471e-05, - "loss": 0.0582, - "step": 33610 - }, - { - "epoch": 2.4966582504084363, - "grad_norm": 2.8182950019836426, - "learning_rate": 1.5020050497549383e-05, - "loss": 0.0855, - "step": 33620 - }, - { - "epoch": 2.4974008614287837, - "grad_norm": 0.40025582909584045, - "learning_rate": 1.50155948314273e-05, - "loss": 0.0717, - "step": 33630 - }, - { - "epoch": 2.498143472449131, - "grad_norm": 1.4286984205245972, - "learning_rate": 1.5011139165305213e-05, - "loss": 0.0483, - "step": 33640 - }, - { - "epoch": 2.4988860834694786, - "grad_norm": 0.4235764145851135, - "learning_rate": 1.5006683499183128e-05, - "loss": 0.0372, - "step": 33650 - }, - { - "epoch": 2.499628694489826, - "grad_norm": 1.5366827249526978, - "learning_rate": 1.5002227833061045e-05, - "loss": 0.0574, - "step": 33660 - }, - { - "epoch": 2.500371305510174, - "grad_norm": 1.9339724779129028, - "learning_rate": 1.4997772166938958e-05, - "loss": 0.0612, - "step": 33670 - }, - { - "epoch": 2.5011139165305214, - "grad_norm": 2.4985029697418213, - "learning_rate": 1.4993316500816873e-05, - "loss": 0.0724, - "step": 33680 - }, - { - "epoch": 2.501856527550869, - "grad_norm": 1.3384310007095337, - "learning_rate": 1.4988860834694786e-05, - "loss": 0.0636, - "step": 33690 - }, - { - "epoch": 2.5025991385712163, - "grad_norm": 0.9728517532348633, - "learning_rate": 1.4984405168572701e-05, - "loss": 0.0611, - "step": 33700 - }, - { - "epoch": 2.5033417495915637, - "grad_norm": 1.3432775735855103, - "learning_rate": 1.4979949502450618e-05, - "loss": 0.0541, - "step": 33710 - }, - { - "epoch": 2.5040843606119116, - "grad_norm": 0.8654043674468994, - "learning_rate": 1.4975493836328531e-05, - "loss": 0.065, - "step": 33720 - }, - { - "epoch": 2.504826971632259, - "grad_norm": 0.9931495785713196, - "learning_rate": 1.4971038170206446e-05, - "loss": 0.0447, - "step": 33730 - }, - { - "epoch": 2.5055695826526065, - "grad_norm": 0.5981758832931519, - "learning_rate": 1.4966582504084361e-05, - "loss": 0.0511, - "step": 33740 - }, - { - "epoch": 2.506312193672954, - "grad_norm": 1.3194613456726074, - "learning_rate": 1.4962126837962276e-05, - "loss": 0.0902, - "step": 33750 - }, - { - "epoch": 2.5070548046933014, - "grad_norm": 1.7330251932144165, - "learning_rate": 1.4957671171840191e-05, - "loss": 0.0522, - "step": 33760 - }, - { - "epoch": 2.5077974157136493, - "grad_norm": 0.47713515162467957, - "learning_rate": 1.4953215505718104e-05, - "loss": 0.0676, - "step": 33770 - }, - { - "epoch": 2.508540026733997, - "grad_norm": 2.3502461910247803, - "learning_rate": 1.4948759839596021e-05, - "loss": 0.0537, - "step": 33780 - }, - { - "epoch": 2.5092826377543442, - "grad_norm": 0.9015212655067444, - "learning_rate": 1.4944304173473934e-05, - "loss": 0.0861, - "step": 33790 - }, - { - "epoch": 2.5100252487746917, - "grad_norm": 0.7821179628372192, - "learning_rate": 1.493984850735185e-05, - "loss": 0.05, - "step": 33800 - }, - { - "epoch": 2.510767859795039, - "grad_norm": 0.5906331539154053, - "learning_rate": 1.4935392841229764e-05, - "loss": 0.0414, - "step": 33810 - }, - { - "epoch": 2.511510470815387, - "grad_norm": 3.766472101211548, - "learning_rate": 1.493093717510768e-05, - "loss": 0.087, - "step": 33820 - }, - { - "epoch": 2.5122530818357345, - "grad_norm": 0.8629242777824402, - "learning_rate": 1.4926481508985594e-05, - "loss": 0.0452, - "step": 33830 - }, - { - "epoch": 2.512995692856082, - "grad_norm": 0.4955524504184723, - "learning_rate": 1.4922025842863508e-05, - "loss": 0.0487, - "step": 33840 - }, - { - "epoch": 2.5137383038764294, - "grad_norm": 1.7956591844558716, - "learning_rate": 1.4917570176741423e-05, - "loss": 0.0838, - "step": 33850 - }, - { - "epoch": 2.514480914896777, - "grad_norm": 1.9371930360794067, - "learning_rate": 1.4913114510619338e-05, - "loss": 0.0987, - "step": 33860 - }, - { - "epoch": 2.5152235259171247, - "grad_norm": 0.846228837966919, - "learning_rate": 1.4908658844497253e-05, - "loss": 0.0762, - "step": 33870 - }, - { - "epoch": 2.515966136937472, - "grad_norm": 2.3206639289855957, - "learning_rate": 1.4904203178375168e-05, - "loss": 0.0978, - "step": 33880 - }, - { - "epoch": 2.5167087479578196, - "grad_norm": 1.545559048652649, - "learning_rate": 1.4899747512253083e-05, - "loss": 0.0446, - "step": 33890 - }, - { - "epoch": 2.517451358978167, - "grad_norm": 1.571700930595398, - "learning_rate": 1.4895291846130998e-05, - "loss": 0.0786, - "step": 33900 - }, - { - "epoch": 2.5181939699985145, - "grad_norm": 1.5507023334503174, - "learning_rate": 1.4890836180008911e-05, - "loss": 0.0737, - "step": 33910 - }, - { - "epoch": 2.5189365810188624, - "grad_norm": 1.5131269693374634, - "learning_rate": 1.4886380513886826e-05, - "loss": 0.0549, - "step": 33920 - }, - { - "epoch": 2.51967919203921, - "grad_norm": 0.8395630121231079, - "learning_rate": 1.4881924847764743e-05, - "loss": 0.0665, - "step": 33930 - }, - { - "epoch": 2.5204218030595573, - "grad_norm": 2.2483623027801514, - "learning_rate": 1.4877469181642656e-05, - "loss": 0.0329, - "step": 33940 - }, - { - "epoch": 2.521164414079905, - "grad_norm": 1.996645212173462, - "learning_rate": 1.4873013515520571e-05, - "loss": 0.0658, - "step": 33950 - }, - { - "epoch": 2.521907025100252, - "grad_norm": 1.3301950693130493, - "learning_rate": 1.4868557849398484e-05, - "loss": 0.0595, - "step": 33960 - }, - { - "epoch": 2.5226496361206, - "grad_norm": 2.5241854190826416, - "learning_rate": 1.4864102183276401e-05, - "loss": 0.0837, - "step": 33970 - }, - { - "epoch": 2.5233922471409476, - "grad_norm": 0.841391384601593, - "learning_rate": 1.4859646517154314e-05, - "loss": 0.0789, - "step": 33980 - }, - { - "epoch": 2.524134858161295, - "grad_norm": 1.6445497274398804, - "learning_rate": 1.485519085103223e-05, - "loss": 0.0841, - "step": 33990 - }, - { - "epoch": 2.524877469181643, - "grad_norm": 1.169519305229187, - "learning_rate": 1.4850735184910146e-05, - "loss": 0.0483, - "step": 34000 - }, - { - "epoch": 2.5256200802019904, - "grad_norm": 1.069122076034546, - "learning_rate": 1.484627951878806e-05, - "loss": 0.0683, - "step": 34010 - }, - { - "epoch": 2.526362691222338, - "grad_norm": 2.5198476314544678, - "learning_rate": 1.4841823852665974e-05, - "loss": 0.0837, - "step": 34020 - }, - { - "epoch": 2.5271053022426853, - "grad_norm": 1.8398845195770264, - "learning_rate": 1.4837368186543888e-05, - "loss": 0.0589, - "step": 34030 - }, - { - "epoch": 2.5278479132630327, - "grad_norm": 1.1466633081436157, - "learning_rate": 1.4832912520421804e-05, - "loss": 0.0439, - "step": 34040 - }, - { - "epoch": 2.5285905242833806, - "grad_norm": 1.7048221826553345, - "learning_rate": 1.482845685429972e-05, - "loss": 0.0828, - "step": 34050 - }, - { - "epoch": 2.529333135303728, - "grad_norm": 1.1695541143417358, - "learning_rate": 1.4824001188177632e-05, - "loss": 0.0756, - "step": 34060 - }, - { - "epoch": 2.5300757463240755, - "grad_norm": 0.7336133718490601, - "learning_rate": 1.4819545522055547e-05, - "loss": 0.0519, - "step": 34070 - }, - { - "epoch": 2.530818357344423, - "grad_norm": 3.3735294342041016, - "learning_rate": 1.4815089855933462e-05, - "loss": 0.0542, - "step": 34080 - }, - { - "epoch": 2.5315609683647704, - "grad_norm": 4.34521484375, - "learning_rate": 1.4810634189811377e-05, - "loss": 0.0878, - "step": 34090 - }, - { - "epoch": 2.5323035793851183, - "grad_norm": 2.667654037475586, - "learning_rate": 1.480617852368929e-05, - "loss": 0.0794, - "step": 34100 - }, - { - "epoch": 2.5330461904054657, - "grad_norm": 3.387084722518921, - "learning_rate": 1.4801722857567206e-05, - "loss": 0.0941, - "step": 34110 - }, - { - "epoch": 2.533788801425813, - "grad_norm": 2.6281583309173584, - "learning_rate": 1.4797267191445122e-05, - "loss": 0.0624, - "step": 34120 - }, - { - "epoch": 2.5345314124461606, - "grad_norm": 1.0451610088348389, - "learning_rate": 1.4792811525323036e-05, - "loss": 0.0515, - "step": 34130 - }, - { - "epoch": 2.535274023466508, - "grad_norm": 1.5573267936706543, - "learning_rate": 1.478835585920095e-05, - "loss": 0.0878, - "step": 34140 - }, - { - "epoch": 2.536016634486856, - "grad_norm": 1.6244726181030273, - "learning_rate": 1.4783900193078866e-05, - "loss": 0.0647, - "step": 34150 - }, - { - "epoch": 2.5367592455072034, - "grad_norm": 2.02620005607605, - "learning_rate": 1.477944452695678e-05, - "loss": 0.0734, - "step": 34160 - }, - { - "epoch": 2.537501856527551, - "grad_norm": 1.4137933254241943, - "learning_rate": 1.4774988860834696e-05, - "loss": 0.0881, - "step": 34170 - }, - { - "epoch": 2.5382444675478983, - "grad_norm": 1.8512823581695557, - "learning_rate": 1.4770533194712609e-05, - "loss": 0.0635, - "step": 34180 - }, - { - "epoch": 2.538987078568246, - "grad_norm": 1.565675973892212, - "learning_rate": 1.4766077528590526e-05, - "loss": 0.0625, - "step": 34190 - }, - { - "epoch": 2.5397296895885937, - "grad_norm": 0.9743090271949768, - "learning_rate": 1.4761621862468439e-05, - "loss": 0.0686, - "step": 34200 - }, - { - "epoch": 2.540472300608941, - "grad_norm": 1.4764388799667358, - "learning_rate": 1.4757166196346354e-05, - "loss": 0.0563, - "step": 34210 - }, - { - "epoch": 2.5412149116292886, - "grad_norm": 1.5421714782714844, - "learning_rate": 1.4752710530224269e-05, - "loss": 0.0727, - "step": 34220 - }, - { - "epoch": 2.541957522649636, - "grad_norm": 0.654322624206543, - "learning_rate": 1.4748254864102184e-05, - "loss": 0.0523, - "step": 34230 - }, - { - "epoch": 2.5427001336699835, - "grad_norm": 1.733870029449463, - "learning_rate": 1.4743799197980099e-05, - "loss": 0.0854, - "step": 34240 - }, - { - "epoch": 2.5434427446903314, - "grad_norm": 2.0312111377716064, - "learning_rate": 1.4739343531858012e-05, - "loss": 0.0809, - "step": 34250 - }, - { - "epoch": 2.544185355710679, - "grad_norm": 1.3134266138076782, - "learning_rate": 1.4734887865735929e-05, - "loss": 0.0722, - "step": 34260 - }, - { - "epoch": 2.5449279667310263, - "grad_norm": 2.5417449474334717, - "learning_rate": 1.4730432199613842e-05, - "loss": 0.0819, - "step": 34270 - }, - { - "epoch": 2.5456705777513737, - "grad_norm": 1.3097118139266968, - "learning_rate": 1.4725976533491757e-05, - "loss": 0.0818, - "step": 34280 - }, - { - "epoch": 2.546413188771721, - "grad_norm": 1.2188619375228882, - "learning_rate": 1.4721520867369672e-05, - "loss": 0.0692, - "step": 34290 - }, - { - "epoch": 2.547155799792069, - "grad_norm": 2.3822097778320312, - "learning_rate": 1.4717065201247587e-05, - "loss": 0.0847, - "step": 34300 - }, - { - "epoch": 2.5478984108124165, - "grad_norm": 1.0267736911773682, - "learning_rate": 1.4712609535125502e-05, - "loss": 0.0844, - "step": 34310 - }, - { - "epoch": 2.548641021832764, - "grad_norm": 1.53618323802948, - "learning_rate": 1.4708153869003416e-05, - "loss": 0.0804, - "step": 34320 - }, - { - "epoch": 2.5493836328531114, - "grad_norm": 1.5292679071426392, - "learning_rate": 1.470369820288133e-05, - "loss": 0.0752, - "step": 34330 - }, - { - "epoch": 2.550126243873459, - "grad_norm": 0.5811061263084412, - "learning_rate": 1.4699242536759247e-05, - "loss": 0.0684, - "step": 34340 - }, - { - "epoch": 2.5508688548938068, - "grad_norm": 0.8153356313705444, - "learning_rate": 1.469478687063716e-05, - "loss": 0.048, - "step": 34350 - }, - { - "epoch": 2.551611465914154, - "grad_norm": 1.236395001411438, - "learning_rate": 1.4690331204515076e-05, - "loss": 0.0614, - "step": 34360 - }, - { - "epoch": 2.5523540769345017, - "grad_norm": 1.525625467300415, - "learning_rate": 1.4685875538392989e-05, - "loss": 0.0802, - "step": 34370 - }, - { - "epoch": 2.553096687954849, - "grad_norm": 0.4339189827442169, - "learning_rate": 1.4681419872270906e-05, - "loss": 0.041, - "step": 34380 - }, - { - "epoch": 2.5538392989751966, - "grad_norm": 1.1005926132202148, - "learning_rate": 1.4676964206148819e-05, - "loss": 0.0557, - "step": 34390 - }, - { - "epoch": 2.5545819099955445, - "grad_norm": 2.0460987091064453, - "learning_rate": 1.4672508540026734e-05, - "loss": 0.0777, - "step": 34400 - }, - { - "epoch": 2.555324521015892, - "grad_norm": 1.1774736642837524, - "learning_rate": 1.466805287390465e-05, - "loss": 0.0499, - "step": 34410 - }, - { - "epoch": 2.5560671320362394, - "grad_norm": 1.0830439329147339, - "learning_rate": 1.4663597207782564e-05, - "loss": 0.0378, - "step": 34420 - }, - { - "epoch": 2.556809743056587, - "grad_norm": 3.122680187225342, - "learning_rate": 1.4659141541660479e-05, - "loss": 0.0711, - "step": 34430 - }, - { - "epoch": 2.5575523540769343, - "grad_norm": 1.451540231704712, - "learning_rate": 1.4654685875538392e-05, - "loss": 0.0909, - "step": 34440 - }, - { - "epoch": 2.558294965097282, - "grad_norm": 2.591353416442871, - "learning_rate": 1.4650230209416309e-05, - "loss": 0.0714, - "step": 34450 - }, - { - "epoch": 2.5590375761176296, - "grad_norm": 1.4591681957244873, - "learning_rate": 1.4645774543294224e-05, - "loss": 0.0777, - "step": 34460 - }, - { - "epoch": 2.559780187137977, - "grad_norm": 0.7905107736587524, - "learning_rate": 1.4641318877172137e-05, - "loss": 0.056, - "step": 34470 - }, - { - "epoch": 2.5605227981583245, - "grad_norm": 2.1354310512542725, - "learning_rate": 1.4636863211050052e-05, - "loss": 0.0647, - "step": 34480 - }, - { - "epoch": 2.561265409178672, - "grad_norm": 0.35180187225341797, - "learning_rate": 1.4632407544927967e-05, - "loss": 0.056, - "step": 34490 - }, - { - "epoch": 2.56200802019902, - "grad_norm": 1.2425521612167358, - "learning_rate": 1.4627951878805882e-05, - "loss": 0.0533, - "step": 34500 - }, - { - "epoch": 2.5627506312193673, - "grad_norm": 2.0177273750305176, - "learning_rate": 1.4623496212683797e-05, - "loss": 0.039, - "step": 34510 - }, - { - "epoch": 2.5634932422397148, - "grad_norm": 0.6943618059158325, - "learning_rate": 1.461904054656171e-05, - "loss": 0.0622, - "step": 34520 - }, - { - "epoch": 2.5642358532600626, - "grad_norm": 2.4245269298553467, - "learning_rate": 1.4614584880439627e-05, - "loss": 0.0611, - "step": 34530 - }, - { - "epoch": 2.5649784642804097, - "grad_norm": 0.730995237827301, - "learning_rate": 1.461012921431754e-05, - "loss": 0.0834, - "step": 34540 - }, - { - "epoch": 2.5657210753007575, - "grad_norm": 0.8295930027961731, - "learning_rate": 1.4605673548195455e-05, - "loss": 0.0567, - "step": 34550 - }, - { - "epoch": 2.566463686321105, - "grad_norm": 2.3141775131225586, - "learning_rate": 1.460121788207337e-05, - "loss": 0.0663, - "step": 34560 - }, - { - "epoch": 2.5672062973414524, - "grad_norm": 1.6702011823654175, - "learning_rate": 1.4596762215951285e-05, - "loss": 0.1019, - "step": 34570 - }, - { - "epoch": 2.5679489083618003, - "grad_norm": 1.9209109544754028, - "learning_rate": 1.45923065498292e-05, - "loss": 0.0763, - "step": 34580 - }, - { - "epoch": 2.568691519382148, - "grad_norm": 1.5046935081481934, - "learning_rate": 1.4587850883707114e-05, - "loss": 0.0805, - "step": 34590 - }, - { - "epoch": 2.5694341304024952, - "grad_norm": 0.8070915937423706, - "learning_rate": 1.458339521758503e-05, - "loss": 0.0334, - "step": 34600 - }, - { - "epoch": 2.5701767414228427, - "grad_norm": 0.2428605705499649, - "learning_rate": 1.4578939551462944e-05, - "loss": 0.0457, - "step": 34610 - }, - { - "epoch": 2.57091935244319, - "grad_norm": 0.3563167452812195, - "learning_rate": 1.4574483885340859e-05, - "loss": 0.0598, - "step": 34620 - }, - { - "epoch": 2.571661963463538, - "grad_norm": 0.9736761450767517, - "learning_rate": 1.4570028219218774e-05, - "loss": 0.058, - "step": 34630 - }, - { - "epoch": 2.5724045744838855, - "grad_norm": 1.3607254028320312, - "learning_rate": 1.4565572553096689e-05, - "loss": 0.069, - "step": 34640 - }, - { - "epoch": 2.573147185504233, - "grad_norm": 0.6492049694061279, - "learning_rate": 1.4561116886974604e-05, - "loss": 0.0406, - "step": 34650 - }, - { - "epoch": 2.5738897965245804, - "grad_norm": 1.3823007345199585, - "learning_rate": 1.4556661220852517e-05, - "loss": 0.062, - "step": 34660 - }, - { - "epoch": 2.574632407544928, - "grad_norm": 3.305699110031128, - "learning_rate": 1.4552205554730434e-05, - "loss": 0.1082, - "step": 34670 - }, - { - "epoch": 2.5753750185652757, - "grad_norm": 1.8386292457580566, - "learning_rate": 1.4547749888608347e-05, - "loss": 0.0667, - "step": 34680 - }, - { - "epoch": 2.576117629585623, - "grad_norm": 0.8886379599571228, - "learning_rate": 1.4543294222486262e-05, - "loss": 0.0873, - "step": 34690 - }, - { - "epoch": 2.5768602406059706, - "grad_norm": 1.2363057136535645, - "learning_rate": 1.4538838556364177e-05, - "loss": 0.0978, - "step": 34700 - }, - { - "epoch": 2.577602851626318, - "grad_norm": 1.2348647117614746, - "learning_rate": 1.4534382890242092e-05, - "loss": 0.0801, - "step": 34710 - }, - { - "epoch": 2.5783454626466655, - "grad_norm": 1.7312116622924805, - "learning_rate": 1.4529927224120007e-05, - "loss": 0.0794, - "step": 34720 - }, - { - "epoch": 2.5790880736670134, - "grad_norm": 1.0014289617538452, - "learning_rate": 1.452547155799792e-05, - "loss": 0.061, - "step": 34730 - }, - { - "epoch": 2.579830684687361, - "grad_norm": 0.8576075434684753, - "learning_rate": 1.4521015891875835e-05, - "loss": 0.0756, - "step": 34740 - }, - { - "epoch": 2.5805732957077083, - "grad_norm": 2.0893824100494385, - "learning_rate": 1.4516560225753752e-05, - "loss": 0.0672, - "step": 34750 - }, - { - "epoch": 2.5813159067280558, - "grad_norm": 0.5497003197669983, - "learning_rate": 1.4512104559631665e-05, - "loss": 0.037, - "step": 34760 - }, - { - "epoch": 2.5820585177484032, - "grad_norm": 3.0224320888519287, - "learning_rate": 1.450764889350958e-05, - "loss": 0.0525, - "step": 34770 - }, - { - "epoch": 2.582801128768751, - "grad_norm": 0.8441876173019409, - "learning_rate": 1.4503193227387493e-05, - "loss": 0.0875, - "step": 34780 - }, - { - "epoch": 2.5835437397890986, - "grad_norm": 2.5795955657958984, - "learning_rate": 1.449873756126541e-05, - "loss": 0.0817, - "step": 34790 - }, - { - "epoch": 2.584286350809446, - "grad_norm": 0.8166103363037109, - "learning_rate": 1.4494281895143323e-05, - "loss": 0.0503, - "step": 34800 - }, - { - "epoch": 2.5850289618297935, - "grad_norm": 1.8198186159133911, - "learning_rate": 1.4489826229021238e-05, - "loss": 0.0388, - "step": 34810 - }, - { - "epoch": 2.585771572850141, - "grad_norm": 4.351510524749756, - "learning_rate": 1.4485370562899155e-05, - "loss": 0.0762, - "step": 34820 - }, - { - "epoch": 2.586514183870489, - "grad_norm": 0.9294065833091736, - "learning_rate": 1.4480914896777068e-05, - "loss": 0.0678, - "step": 34830 - }, - { - "epoch": 2.5872567948908363, - "grad_norm": 1.0012507438659668, - "learning_rate": 1.4476459230654983e-05, - "loss": 0.0536, - "step": 34840 - }, - { - "epoch": 2.5879994059111837, - "grad_norm": 0.6957378387451172, - "learning_rate": 1.4472003564532897e-05, - "loss": 0.0958, - "step": 34850 - }, - { - "epoch": 2.588742016931531, - "grad_norm": 0.9014194011688232, - "learning_rate": 1.4467547898410813e-05, - "loss": 0.0647, - "step": 34860 - }, - { - "epoch": 2.5894846279518786, - "grad_norm": 1.4799509048461914, - "learning_rate": 1.4463092232288728e-05, - "loss": 0.0757, - "step": 34870 - }, - { - "epoch": 2.5902272389722265, - "grad_norm": 1.020585060119629, - "learning_rate": 1.4458636566166642e-05, - "loss": 0.0537, - "step": 34880 - }, - { - "epoch": 2.590969849992574, - "grad_norm": 3.012230396270752, - "learning_rate": 1.4454180900044557e-05, - "loss": 0.0807, - "step": 34890 - }, - { - "epoch": 2.5917124610129214, - "grad_norm": 1.4325774908065796, - "learning_rate": 1.4449725233922472e-05, - "loss": 0.0746, - "step": 34900 - }, - { - "epoch": 2.592455072033269, - "grad_norm": 0.7965050339698792, - "learning_rate": 1.4445269567800387e-05, - "loss": 0.0488, - "step": 34910 - }, - { - "epoch": 2.5931976830536163, - "grad_norm": 0.29672083258628845, - "learning_rate": 1.4440813901678302e-05, - "loss": 0.0621, - "step": 34920 - }, - { - "epoch": 2.593940294073964, - "grad_norm": 1.9577540159225464, - "learning_rate": 1.4436358235556217e-05, - "loss": 0.0845, - "step": 34930 - }, - { - "epoch": 2.5946829050943117, - "grad_norm": 2.028249502182007, - "learning_rate": 1.4431902569434132e-05, - "loss": 0.0853, - "step": 34940 - }, - { - "epoch": 2.595425516114659, - "grad_norm": 0.9768528342247009, - "learning_rate": 1.4427446903312045e-05, - "loss": 0.0762, - "step": 34950 - }, - { - "epoch": 2.5961681271350066, - "grad_norm": 0.36251920461654663, - "learning_rate": 1.442299123718996e-05, - "loss": 0.0513, - "step": 34960 - }, - { - "epoch": 2.596910738155354, - "grad_norm": 1.7192022800445557, - "learning_rate": 1.4418535571067875e-05, - "loss": 0.0643, - "step": 34970 - }, - { - "epoch": 2.597653349175702, - "grad_norm": 1.3157782554626465, - "learning_rate": 1.441407990494579e-05, - "loss": 0.0863, - "step": 34980 - }, - { - "epoch": 2.5983959601960493, - "grad_norm": 1.921720027923584, - "learning_rate": 1.4409624238823705e-05, - "loss": 0.0806, - "step": 34990 - }, - { - "epoch": 2.599138571216397, - "grad_norm": 0.9074265360832214, - "learning_rate": 1.4405168572701618e-05, - "loss": 0.0916, - "step": 35000 - }, - { - "epoch": 2.5998811822367442, - "grad_norm": 1.1961455345153809, - "learning_rate": 1.4400712906579535e-05, - "loss": 0.0818, - "step": 35010 - }, - { - "epoch": 2.6006237932570917, - "grad_norm": 1.6636606454849243, - "learning_rate": 1.4396257240457448e-05, - "loss": 0.0947, - "step": 35020 - }, - { - "epoch": 2.6013664042774396, - "grad_norm": 1.7410112619400024, - "learning_rate": 1.4391801574335363e-05, - "loss": 0.0614, - "step": 35030 - }, - { - "epoch": 2.602109015297787, - "grad_norm": 4.592065811157227, - "learning_rate": 1.4387345908213278e-05, - "loss": 0.0652, - "step": 35040 - }, - { - "epoch": 2.6028516263181345, - "grad_norm": 2.8058197498321533, - "learning_rate": 1.4382890242091193e-05, - "loss": 0.0731, - "step": 35050 - }, - { - "epoch": 2.603594237338482, - "grad_norm": 1.1537928581237793, - "learning_rate": 1.4378434575969108e-05, - "loss": 0.0789, - "step": 35060 - }, - { - "epoch": 2.6043368483588294, - "grad_norm": 1.5462356805801392, - "learning_rate": 1.4373978909847021e-05, - "loss": 0.0735, - "step": 35070 - }, - { - "epoch": 2.6050794593791773, - "grad_norm": 2.397684097290039, - "learning_rate": 1.4369523243724938e-05, - "loss": 0.0812, - "step": 35080 - }, - { - "epoch": 2.6058220703995247, - "grad_norm": 0.8381139039993286, - "learning_rate": 1.4365067577602851e-05, - "loss": 0.0472, - "step": 35090 - }, - { - "epoch": 2.606564681419872, - "grad_norm": 0.911646842956543, - "learning_rate": 1.4360611911480766e-05, - "loss": 0.0668, - "step": 35100 - }, - { - "epoch": 2.60730729244022, - "grad_norm": 1.0542629957199097, - "learning_rate": 1.4356156245358681e-05, - "loss": 0.0711, - "step": 35110 - }, - { - "epoch": 2.608049903460567, - "grad_norm": 0.4654415547847748, - "learning_rate": 1.4351700579236596e-05, - "loss": 0.0561, - "step": 35120 - }, - { - "epoch": 2.608792514480915, - "grad_norm": 1.3107812404632568, - "learning_rate": 1.4347244913114511e-05, - "loss": 0.0691, - "step": 35130 - }, - { - "epoch": 2.6095351255012624, - "grad_norm": 2.2061398029327393, - "learning_rate": 1.4342789246992425e-05, - "loss": 0.1093, - "step": 35140 - }, - { - "epoch": 2.61027773652161, - "grad_norm": 1.198928952217102, - "learning_rate": 1.433833358087034e-05, - "loss": 0.0445, - "step": 35150 - }, - { - "epoch": 2.6110203475419578, - "grad_norm": 1.055016279220581, - "learning_rate": 1.4333877914748256e-05, - "loss": 0.057, - "step": 35160 - }, - { - "epoch": 2.6117629585623052, - "grad_norm": 1.0568102598190308, - "learning_rate": 1.432942224862617e-05, - "loss": 0.0598, - "step": 35170 - }, - { - "epoch": 2.6125055695826527, - "grad_norm": 1.305461049079895, - "learning_rate": 1.4324966582504085e-05, - "loss": 0.0859, - "step": 35180 - }, - { - "epoch": 2.613248180603, - "grad_norm": 1.0581294298171997, - "learning_rate": 1.4320510916382e-05, - "loss": 0.0662, - "step": 35190 - }, - { - "epoch": 2.6139907916233476, - "grad_norm": 0.9426524639129639, - "learning_rate": 1.4316055250259915e-05, - "loss": 0.0559, - "step": 35200 - }, - { - "epoch": 2.6147334026436955, - "grad_norm": 0.5946950316429138, - "learning_rate": 1.431159958413783e-05, - "loss": 0.0954, - "step": 35210 - }, - { - "epoch": 2.615476013664043, - "grad_norm": 1.6754854917526245, - "learning_rate": 1.4307143918015743e-05, - "loss": 0.0866, - "step": 35220 - }, - { - "epoch": 2.6162186246843904, - "grad_norm": 1.6336374282836914, - "learning_rate": 1.430268825189366e-05, - "loss": 0.0815, - "step": 35230 - }, - { - "epoch": 2.616961235704738, - "grad_norm": 2.549908399581909, - "learning_rate": 1.4298232585771573e-05, - "loss": 0.0529, - "step": 35240 - }, - { - "epoch": 2.6177038467250853, - "grad_norm": 1.3991200923919678, - "learning_rate": 1.4293776919649488e-05, - "loss": 0.0483, - "step": 35250 - }, - { - "epoch": 2.618446457745433, - "grad_norm": 0.7398178577423096, - "learning_rate": 1.4289321253527401e-05, - "loss": 0.0677, - "step": 35260 - }, - { - "epoch": 2.6191890687657806, - "grad_norm": 1.8208078145980835, - "learning_rate": 1.4284865587405318e-05, - "loss": 0.0539, - "step": 35270 - }, - { - "epoch": 2.619931679786128, - "grad_norm": 1.7018234729766846, - "learning_rate": 1.4280409921283233e-05, - "loss": 0.0529, - "step": 35280 - }, - { - "epoch": 2.6206742908064755, - "grad_norm": 2.4244110584259033, - "learning_rate": 1.4275954255161146e-05, - "loss": 0.0595, - "step": 35290 - }, - { - "epoch": 2.621416901826823, - "grad_norm": 0.505042314529419, - "learning_rate": 1.4271498589039061e-05, - "loss": 0.0865, - "step": 35300 - }, - { - "epoch": 2.622159512847171, - "grad_norm": 1.206248164176941, - "learning_rate": 1.4267042922916976e-05, - "loss": 0.0662, - "step": 35310 - }, - { - "epoch": 2.6229021238675183, - "grad_norm": 1.3556511402130127, - "learning_rate": 1.4262587256794891e-05, - "loss": 0.0575, - "step": 35320 - }, - { - "epoch": 2.6236447348878658, - "grad_norm": 0.7870049476623535, - "learning_rate": 1.4258131590672806e-05, - "loss": 0.0614, - "step": 35330 - }, - { - "epoch": 2.624387345908213, - "grad_norm": 1.794494867324829, - "learning_rate": 1.4253675924550721e-05, - "loss": 0.097, - "step": 35340 - }, - { - "epoch": 2.6251299569285607, - "grad_norm": 0.7083643078804016, - "learning_rate": 1.4249220258428636e-05, - "loss": 0.0648, - "step": 35350 - }, - { - "epoch": 2.6258725679489086, - "grad_norm": 0.7302588224411011, - "learning_rate": 1.424476459230655e-05, - "loss": 0.0578, - "step": 35360 - }, - { - "epoch": 2.626615178969256, - "grad_norm": 3.079280376434326, - "learning_rate": 1.4240308926184465e-05, - "loss": 0.0897, - "step": 35370 - }, - { - "epoch": 2.6273577899896035, - "grad_norm": 0.824223518371582, - "learning_rate": 1.423585326006238e-05, - "loss": 0.08, - "step": 35380 - }, - { - "epoch": 2.628100401009951, - "grad_norm": 1.3617218732833862, - "learning_rate": 1.4231397593940295e-05, - "loss": 0.0633, - "step": 35390 - }, - { - "epoch": 2.6288430120302984, - "grad_norm": 0.5929654240608215, - "learning_rate": 1.422694192781821e-05, - "loss": 0.0467, - "step": 35400 - }, - { - "epoch": 2.6295856230506462, - "grad_norm": 2.259077310562134, - "learning_rate": 1.4222486261696123e-05, - "loss": 0.0735, - "step": 35410 - }, - { - "epoch": 2.6303282340709937, - "grad_norm": 1.6033036708831787, - "learning_rate": 1.421803059557404e-05, - "loss": 0.0972, - "step": 35420 - }, - { - "epoch": 2.631070845091341, - "grad_norm": 1.9454528093338013, - "learning_rate": 1.4213574929451953e-05, - "loss": 0.0736, - "step": 35430 - }, - { - "epoch": 2.6318134561116886, - "grad_norm": 0.5253037810325623, - "learning_rate": 1.4209119263329868e-05, - "loss": 0.0651, - "step": 35440 - }, - { - "epoch": 2.632556067132036, - "grad_norm": 0.5184679627418518, - "learning_rate": 1.4204663597207784e-05, - "loss": 0.0403, - "step": 35450 - }, - { - "epoch": 2.633298678152384, - "grad_norm": 0.7617425918579102, - "learning_rate": 1.4200207931085698e-05, - "loss": 0.0699, - "step": 35460 - }, - { - "epoch": 2.6340412891727314, - "grad_norm": 1.4257124662399292, - "learning_rate": 1.4195752264963613e-05, - "loss": 0.0725, - "step": 35470 - }, - { - "epoch": 2.634783900193079, - "grad_norm": 2.0177693367004395, - "learning_rate": 1.4191296598841526e-05, - "loss": 0.0693, - "step": 35480 - }, - { - "epoch": 2.6355265112134263, - "grad_norm": 1.3134448528289795, - "learning_rate": 1.4186840932719443e-05, - "loss": 0.0739, - "step": 35490 - }, - { - "epoch": 2.6362691222337737, - "grad_norm": 2.646014928817749, - "learning_rate": 1.4182385266597356e-05, - "loss": 0.0605, - "step": 35500 - }, - { - "epoch": 2.6370117332541216, - "grad_norm": 1.8550797700881958, - "learning_rate": 1.4177929600475271e-05, - "loss": 0.0577, - "step": 35510 - }, - { - "epoch": 2.637754344274469, - "grad_norm": 1.6561418771743774, - "learning_rate": 1.4173473934353186e-05, - "loss": 0.0798, - "step": 35520 - }, - { - "epoch": 2.6384969552948165, - "grad_norm": 1.4984925985336304, - "learning_rate": 1.4169018268231101e-05, - "loss": 0.0764, - "step": 35530 - }, - { - "epoch": 2.639239566315164, - "grad_norm": 2.3113274574279785, - "learning_rate": 1.4164562602109016e-05, - "loss": 0.0465, - "step": 35540 - }, - { - "epoch": 2.6399821773355114, - "grad_norm": 2.2579538822174072, - "learning_rate": 1.416010693598693e-05, - "loss": 0.0767, - "step": 35550 - }, - { - "epoch": 2.6407247883558593, - "grad_norm": 1.6482024192810059, - "learning_rate": 1.4155651269864844e-05, - "loss": 0.0514, - "step": 35560 - }, - { - "epoch": 2.641467399376207, - "grad_norm": 2.0257744789123535, - "learning_rate": 1.4151195603742761e-05, - "loss": 0.0452, - "step": 35570 - }, - { - "epoch": 2.6422100103965542, - "grad_norm": 1.083173394203186, - "learning_rate": 1.4146739937620674e-05, - "loss": 0.059, - "step": 35580 - }, - { - "epoch": 2.6429526214169017, - "grad_norm": 1.5998643636703491, - "learning_rate": 1.414228427149859e-05, - "loss": 0.0557, - "step": 35590 - }, - { - "epoch": 2.643695232437249, - "grad_norm": 0.8439221382141113, - "learning_rate": 1.4137828605376504e-05, - "loss": 0.0582, - "step": 35600 - }, - { - "epoch": 2.644437843457597, - "grad_norm": 2.156799793243408, - "learning_rate": 1.413337293925442e-05, - "loss": 0.0562, - "step": 35610 - }, - { - "epoch": 2.6451804544779445, - "grad_norm": 0.5263361930847168, - "learning_rate": 1.4128917273132334e-05, - "loss": 0.0844, - "step": 35620 - }, - { - "epoch": 2.645923065498292, - "grad_norm": 2.5201919078826904, - "learning_rate": 1.4124461607010248e-05, - "loss": 0.0867, - "step": 35630 - }, - { - "epoch": 2.6466656765186394, - "grad_norm": 1.7519117593765259, - "learning_rate": 1.4120005940888164e-05, - "loss": 0.0578, - "step": 35640 - }, - { - "epoch": 2.647408287538987, - "grad_norm": 1.5524243116378784, - "learning_rate": 1.4115550274766078e-05, - "loss": 0.0662, - "step": 35650 - }, - { - "epoch": 2.6481508985593347, - "grad_norm": 0.8685120344161987, - "learning_rate": 1.4111094608643993e-05, - "loss": 0.0509, - "step": 35660 - }, - { - "epoch": 2.648893509579682, - "grad_norm": 2.201120376586914, - "learning_rate": 1.4106638942521906e-05, - "loss": 0.057, - "step": 35670 - }, - { - "epoch": 2.6496361206000296, - "grad_norm": 0.4596274197101593, - "learning_rate": 1.4102183276399823e-05, - "loss": 0.0714, - "step": 35680 - }, - { - "epoch": 2.6503787316203775, - "grad_norm": 2.369061231613159, - "learning_rate": 1.4097727610277738e-05, - "loss": 0.05, - "step": 35690 - }, - { - "epoch": 2.6511213426407245, - "grad_norm": 0.30310168862342834, - "learning_rate": 1.4093271944155651e-05, - "loss": 0.073, - "step": 35700 - }, - { - "epoch": 2.6518639536610724, - "grad_norm": 0.8447324633598328, - "learning_rate": 1.4088816278033568e-05, - "loss": 0.0769, - "step": 35710 - }, - { - "epoch": 2.65260656468142, - "grad_norm": 1.5331531763076782, - "learning_rate": 1.4084360611911481e-05, - "loss": 0.0837, - "step": 35720 - }, - { - "epoch": 2.6533491757017673, - "grad_norm": 0.9283561110496521, - "learning_rate": 1.4079904945789396e-05, - "loss": 0.0608, - "step": 35730 - }, - { - "epoch": 2.654091786722115, - "grad_norm": 1.556794285774231, - "learning_rate": 1.4075449279667311e-05, - "loss": 0.0839, - "step": 35740 - }, - { - "epoch": 2.6548343977424627, - "grad_norm": 2.0326085090637207, - "learning_rate": 1.4070993613545226e-05, - "loss": 0.079, - "step": 35750 - }, - { - "epoch": 2.65557700876281, - "grad_norm": 1.1070688962936401, - "learning_rate": 1.406653794742314e-05, - "loss": 0.0598, - "step": 35760 - }, - { - "epoch": 2.6563196197831576, - "grad_norm": 2.687786817550659, - "learning_rate": 1.4062082281301054e-05, - "loss": 0.0528, - "step": 35770 - }, - { - "epoch": 2.657062230803505, - "grad_norm": 2.0500941276550293, - "learning_rate": 1.4057626615178969e-05, - "loss": 0.0586, - "step": 35780 - }, - { - "epoch": 2.657804841823853, - "grad_norm": 1.7865089178085327, - "learning_rate": 1.4053170949056884e-05, - "loss": 0.0816, - "step": 35790 - }, - { - "epoch": 2.6585474528442004, - "grad_norm": 1.018984317779541, - "learning_rate": 1.4048715282934799e-05, - "loss": 0.0338, - "step": 35800 - }, - { - "epoch": 2.659290063864548, - "grad_norm": 0.3521486520767212, - "learning_rate": 1.4044259616812714e-05, - "loss": 0.0549, - "step": 35810 - }, - { - "epoch": 2.6600326748848953, - "grad_norm": 2.4242541790008545, - "learning_rate": 1.4039803950690627e-05, - "loss": 0.1002, - "step": 35820 - }, - { - "epoch": 2.6607752859052427, - "grad_norm": 1.1004574298858643, - "learning_rate": 1.4035348284568544e-05, - "loss": 0.0457, - "step": 35830 - }, - { - "epoch": 2.6615178969255906, - "grad_norm": 1.469435214996338, - "learning_rate": 1.4030892618446457e-05, - "loss": 0.0819, - "step": 35840 - }, - { - "epoch": 2.662260507945938, - "grad_norm": 0.20133927464485168, - "learning_rate": 1.4026436952324372e-05, - "loss": 0.0591, - "step": 35850 - }, - { - "epoch": 2.6630031189662855, - "grad_norm": 1.4048292636871338, - "learning_rate": 1.4021981286202289e-05, - "loss": 0.0562, - "step": 35860 - }, - { - "epoch": 2.663745729986633, - "grad_norm": 5.874467849731445, - "learning_rate": 1.4017525620080202e-05, - "loss": 0.0712, - "step": 35870 - }, - { - "epoch": 2.6644883410069804, - "grad_norm": 1.0913431644439697, - "learning_rate": 1.4013069953958117e-05, - "loss": 0.0703, - "step": 35880 - }, - { - "epoch": 2.6652309520273283, - "grad_norm": 0.3893365263938904, - "learning_rate": 1.400861428783603e-05, - "loss": 0.0593, - "step": 35890 - }, - { - "epoch": 2.6659735630476757, - "grad_norm": 1.0970251560211182, - "learning_rate": 1.4004158621713947e-05, - "loss": 0.0716, - "step": 35900 - }, - { - "epoch": 2.666716174068023, - "grad_norm": 1.392922282218933, - "learning_rate": 1.3999702955591862e-05, - "loss": 0.0589, - "step": 35910 - }, - { - "epoch": 2.6674587850883706, - "grad_norm": 0.6275796890258789, - "learning_rate": 1.3995247289469776e-05, - "loss": 0.037, - "step": 35920 - }, - { - "epoch": 2.668201396108718, - "grad_norm": 1.6049987077713013, - "learning_rate": 1.399079162334769e-05, - "loss": 0.0566, - "step": 35930 - }, - { - "epoch": 2.668944007129066, - "grad_norm": 1.582099199295044, - "learning_rate": 1.3986335957225606e-05, - "loss": 0.0689, - "step": 35940 - }, - { - "epoch": 2.6696866181494134, - "grad_norm": 0.8754908442497253, - "learning_rate": 1.398188029110352e-05, - "loss": 0.0926, - "step": 35950 - }, - { - "epoch": 2.670429229169761, - "grad_norm": 2.45027756690979, - "learning_rate": 1.3977424624981434e-05, - "loss": 0.0468, - "step": 35960 - }, - { - "epoch": 2.6711718401901083, - "grad_norm": 1.8053902387619019, - "learning_rate": 1.3972968958859349e-05, - "loss": 0.0489, - "step": 35970 - }, - { - "epoch": 2.671914451210456, - "grad_norm": 3.0964303016662598, - "learning_rate": 1.3968513292737266e-05, - "loss": 0.0834, - "step": 35980 - }, - { - "epoch": 2.6726570622308037, - "grad_norm": 2.1275410652160645, - "learning_rate": 1.3964057626615179e-05, - "loss": 0.0938, - "step": 35990 - }, - { - "epoch": 2.673399673251151, - "grad_norm": 0.8171222805976868, - "learning_rate": 1.3959601960493094e-05, - "loss": 0.0669, - "step": 36000 - }, - { - "epoch": 2.6741422842714986, - "grad_norm": 3.5696773529052734, - "learning_rate": 1.3955146294371009e-05, - "loss": 0.0935, - "step": 36010 - }, - { - "epoch": 2.674884895291846, - "grad_norm": 1.2515684366226196, - "learning_rate": 1.3950690628248924e-05, - "loss": 0.0902, - "step": 36020 - }, - { - "epoch": 2.6756275063121935, - "grad_norm": 3.313480854034424, - "learning_rate": 1.3946234962126839e-05, - "loss": 0.0592, - "step": 36030 - }, - { - "epoch": 2.6763701173325414, - "grad_norm": 0.7134093046188354, - "learning_rate": 1.3941779296004752e-05, - "loss": 0.0455, - "step": 36040 - }, - { - "epoch": 2.677112728352889, - "grad_norm": 0.5225452184677124, - "learning_rate": 1.3937323629882669e-05, - "loss": 0.0487, - "step": 36050 - }, - { - "epoch": 2.6778553393732363, - "grad_norm": 0.6673758625984192, - "learning_rate": 1.3932867963760582e-05, - "loss": 0.079, - "step": 36060 - }, - { - "epoch": 2.6785979503935837, - "grad_norm": 2.7382137775421143, - "learning_rate": 1.3928412297638497e-05, - "loss": 0.0757, - "step": 36070 - }, - { - "epoch": 2.679340561413931, - "grad_norm": 0.9488750696182251, - "learning_rate": 1.392395663151641e-05, - "loss": 0.0542, - "step": 36080 - }, - { - "epoch": 2.680083172434279, - "grad_norm": 4.239487648010254, - "learning_rate": 1.3919500965394327e-05, - "loss": 0.0986, - "step": 36090 - }, - { - "epoch": 2.6808257834546265, - "grad_norm": 1.1247403621673584, - "learning_rate": 1.3915045299272242e-05, - "loss": 0.0535, - "step": 36100 - }, - { - "epoch": 2.681568394474974, - "grad_norm": 1.4115970134735107, - "learning_rate": 1.3910589633150155e-05, - "loss": 0.0712, - "step": 36110 - }, - { - "epoch": 2.6823110054953214, - "grad_norm": 1.4354156255722046, - "learning_rate": 1.3906133967028072e-05, - "loss": 0.0612, - "step": 36120 - }, - { - "epoch": 2.683053616515669, - "grad_norm": 0.6323860287666321, - "learning_rate": 1.3901678300905985e-05, - "loss": 0.0839, - "step": 36130 - }, - { - "epoch": 2.6837962275360168, - "grad_norm": 1.8889610767364502, - "learning_rate": 1.38972226347839e-05, - "loss": 0.0528, - "step": 36140 - }, - { - "epoch": 2.684538838556364, - "grad_norm": 1.292384386062622, - "learning_rate": 1.3892766968661815e-05, - "loss": 0.0699, - "step": 36150 - }, - { - "epoch": 2.6852814495767117, - "grad_norm": 1.048690676689148, - "learning_rate": 1.388831130253973e-05, - "loss": 0.0679, - "step": 36160 - }, - { - "epoch": 2.686024060597059, - "grad_norm": 0.9815926551818848, - "learning_rate": 1.3883855636417645e-05, - "loss": 0.0747, - "step": 36170 - }, - { - "epoch": 2.6867666716174066, - "grad_norm": 0.6208893060684204, - "learning_rate": 1.3879399970295559e-05, - "loss": 0.0517, - "step": 36180 - }, - { - "epoch": 2.6875092826377545, - "grad_norm": 1.4446412324905396, - "learning_rate": 1.3874944304173474e-05, - "loss": 0.0549, - "step": 36190 - }, - { - "epoch": 2.688251893658102, - "grad_norm": 0.39226940274238586, - "learning_rate": 1.3870488638051389e-05, - "loss": 0.0788, - "step": 36200 - }, - { - "epoch": 2.6889945046784494, - "grad_norm": 1.446395754814148, - "learning_rate": 1.3866032971929304e-05, - "loss": 0.1031, - "step": 36210 - }, - { - "epoch": 2.689737115698797, - "grad_norm": 1.849453330039978, - "learning_rate": 1.3861577305807219e-05, - "loss": 0.0683, - "step": 36220 - }, - { - "epoch": 2.6904797267191443, - "grad_norm": 1.7892287969589233, - "learning_rate": 1.3857121639685132e-05, - "loss": 0.0647, - "step": 36230 - }, - { - "epoch": 2.691222337739492, - "grad_norm": 0.9724948406219482, - "learning_rate": 1.3852665973563049e-05, - "loss": 0.0798, - "step": 36240 - }, - { - "epoch": 2.6919649487598396, - "grad_norm": 1.3628493547439575, - "learning_rate": 1.3848210307440962e-05, - "loss": 0.0556, - "step": 36250 - }, - { - "epoch": 2.692707559780187, - "grad_norm": 1.9433894157409668, - "learning_rate": 1.3843754641318877e-05, - "loss": 0.0985, - "step": 36260 - }, - { - "epoch": 2.693450170800535, - "grad_norm": 1.5981539487838745, - "learning_rate": 1.3839298975196794e-05, - "loss": 0.0582, - "step": 36270 - }, - { - "epoch": 2.694192781820882, - "grad_norm": 0.5563480257987976, - "learning_rate": 1.3834843309074707e-05, - "loss": 0.0417, - "step": 36280 - }, - { - "epoch": 2.69493539284123, - "grad_norm": 0.9371748566627502, - "learning_rate": 1.3830387642952622e-05, - "loss": 0.0775, - "step": 36290 - }, - { - "epoch": 2.6956780038615773, - "grad_norm": 1.239343523979187, - "learning_rate": 1.3825931976830535e-05, - "loss": 0.0469, - "step": 36300 - }, - { - "epoch": 2.6964206148819247, - "grad_norm": 0.48741811513900757, - "learning_rate": 1.3821476310708452e-05, - "loss": 0.0756, - "step": 36310 - }, - { - "epoch": 2.6971632259022726, - "grad_norm": 3.158456802368164, - "learning_rate": 1.3817020644586367e-05, - "loss": 0.0276, - "step": 36320 - }, - { - "epoch": 2.69790583692262, - "grad_norm": 1.2816053628921509, - "learning_rate": 1.381256497846428e-05, - "loss": 0.0553, - "step": 36330 - }, - { - "epoch": 2.6986484479429675, - "grad_norm": 0.4111814796924591, - "learning_rate": 1.3808109312342195e-05, - "loss": 0.0452, - "step": 36340 - }, - { - "epoch": 2.699391058963315, - "grad_norm": 1.7918380498886108, - "learning_rate": 1.380365364622011e-05, - "loss": 0.0432, - "step": 36350 - }, - { - "epoch": 2.7001336699836624, - "grad_norm": 1.718360424041748, - "learning_rate": 1.3799197980098025e-05, - "loss": 0.1107, - "step": 36360 - }, - { - "epoch": 2.7008762810040103, - "grad_norm": 0.892610490322113, - "learning_rate": 1.3794742313975939e-05, - "loss": 0.0709, - "step": 36370 - }, - { - "epoch": 2.701618892024358, - "grad_norm": 0.5579351186752319, - "learning_rate": 1.3790286647853855e-05, - "loss": 0.048, - "step": 36380 - }, - { - "epoch": 2.7023615030447052, - "grad_norm": 0.5769586563110352, - "learning_rate": 1.378583098173177e-05, - "loss": 0.0572, - "step": 36390 - }, - { - "epoch": 2.7031041140650527, - "grad_norm": 2.853304624557495, - "learning_rate": 1.3781375315609684e-05, - "loss": 0.0766, - "step": 36400 - }, - { - "epoch": 2.7038467250854, - "grad_norm": 3.345918655395508, - "learning_rate": 1.3776919649487599e-05, - "loss": 0.0761, - "step": 36410 - }, - { - "epoch": 2.704589336105748, - "grad_norm": 1.423073649406433, - "learning_rate": 1.3772463983365514e-05, - "loss": 0.0698, - "step": 36420 - }, - { - "epoch": 2.7053319471260955, - "grad_norm": 2.362412929534912, - "learning_rate": 1.3768008317243429e-05, - "loss": 0.08, - "step": 36430 - }, - { - "epoch": 2.706074558146443, - "grad_norm": 2.3598601818084717, - "learning_rate": 1.3763552651121344e-05, - "loss": 0.0829, - "step": 36440 - }, - { - "epoch": 2.7068171691667904, - "grad_norm": 1.5622998476028442, - "learning_rate": 1.3759096984999257e-05, - "loss": 0.0447, - "step": 36450 - }, - { - "epoch": 2.707559780187138, - "grad_norm": 0.6126532554626465, - "learning_rate": 1.3754641318877173e-05, - "loss": 0.0703, - "step": 36460 - }, - { - "epoch": 2.7083023912074857, - "grad_norm": 1.302445411682129, - "learning_rate": 1.3750185652755087e-05, - "loss": 0.0636, - "step": 36470 - }, - { - "epoch": 2.709045002227833, - "grad_norm": 0.8420956134796143, - "learning_rate": 1.3745729986633002e-05, - "loss": 0.0789, - "step": 36480 - }, - { - "epoch": 2.7097876132481806, - "grad_norm": 1.0522314310073853, - "learning_rate": 1.3741274320510915e-05, - "loss": 0.0619, - "step": 36490 - }, - { - "epoch": 2.710530224268528, - "grad_norm": 1.1721168756484985, - "learning_rate": 1.3736818654388832e-05, - "loss": 0.047, - "step": 36500 - }, - { - "epoch": 2.7112728352888755, - "grad_norm": 1.1313562393188477, - "learning_rate": 1.3732362988266747e-05, - "loss": 0.0793, - "step": 36510 - }, - { - "epoch": 2.7120154463092234, - "grad_norm": 0.656134307384491, - "learning_rate": 1.372790732214466e-05, - "loss": 0.0709, - "step": 36520 - }, - { - "epoch": 2.712758057329571, - "grad_norm": 0.298880934715271, - "learning_rate": 1.3723451656022577e-05, - "loss": 0.0516, - "step": 36530 - }, - { - "epoch": 2.7135006683499183, - "grad_norm": 0.7972229719161987, - "learning_rate": 1.371899598990049e-05, - "loss": 0.0448, - "step": 36540 - }, - { - "epoch": 2.7142432793702658, - "grad_norm": 1.8355180025100708, - "learning_rate": 1.3714540323778405e-05, - "loss": 0.0938, - "step": 36550 - }, - { - "epoch": 2.714985890390613, - "grad_norm": 1.5195986032485962, - "learning_rate": 1.371008465765632e-05, - "loss": 0.0733, - "step": 36560 - }, - { - "epoch": 2.715728501410961, - "grad_norm": 0.950968861579895, - "learning_rate": 1.3705628991534235e-05, - "loss": 0.038, - "step": 36570 - }, - { - "epoch": 2.7164711124313086, - "grad_norm": 2.1783084869384766, - "learning_rate": 1.370117332541215e-05, - "loss": 0.0732, - "step": 36580 - }, - { - "epoch": 2.717213723451656, - "grad_norm": 4.514249801635742, - "learning_rate": 1.3696717659290063e-05, - "loss": 0.0534, - "step": 36590 - }, - { - "epoch": 2.7179563344720035, - "grad_norm": 1.7248497009277344, - "learning_rate": 1.3692261993167978e-05, - "loss": 0.0703, - "step": 36600 - }, - { - "epoch": 2.718698945492351, - "grad_norm": 2.7249276638031006, - "learning_rate": 1.3687806327045895e-05, - "loss": 0.0967, - "step": 36610 - }, - { - "epoch": 2.719441556512699, - "grad_norm": 0.6530225276947021, - "learning_rate": 1.3683350660923808e-05, - "loss": 0.0476, - "step": 36620 - }, - { - "epoch": 2.7201841675330463, - "grad_norm": 0.6490178108215332, - "learning_rate": 1.3678894994801723e-05, - "loss": 0.0502, - "step": 36630 - }, - { - "epoch": 2.7209267785533937, - "grad_norm": 2.1632354259490967, - "learning_rate": 1.3674439328679638e-05, - "loss": 0.0634, - "step": 36640 - }, - { - "epoch": 2.721669389573741, - "grad_norm": 0.618598997592926, - "learning_rate": 1.3669983662557553e-05, - "loss": 0.0631, - "step": 36650 - }, - { - "epoch": 2.7224120005940886, - "grad_norm": 0.579268753528595, - "learning_rate": 1.3665527996435467e-05, - "loss": 0.0632, - "step": 36660 - }, - { - "epoch": 2.7231546116144365, - "grad_norm": 0.623193085193634, - "learning_rate": 1.3661072330313382e-05, - "loss": 0.074, - "step": 36670 - }, - { - "epoch": 2.723897222634784, - "grad_norm": 0.6630807518959045, - "learning_rate": 1.3656616664191298e-05, - "loss": 0.0574, - "step": 36680 - }, - { - "epoch": 2.7246398336551314, - "grad_norm": 1.1906079053878784, - "learning_rate": 1.3652160998069212e-05, - "loss": 0.0833, - "step": 36690 - }, - { - "epoch": 2.725382444675479, - "grad_norm": 0.7799108624458313, - "learning_rate": 1.3647705331947127e-05, - "loss": 0.0696, - "step": 36700 - }, - { - "epoch": 2.7261250556958263, - "grad_norm": 0.28752097487449646, - "learning_rate": 1.364324966582504e-05, - "loss": 0.0523, - "step": 36710 - }, - { - "epoch": 2.726867666716174, - "grad_norm": 1.7490395307540894, - "learning_rate": 1.3638793999702957e-05, - "loss": 0.0729, - "step": 36720 - }, - { - "epoch": 2.7276102777365216, - "grad_norm": 0.7951035499572754, - "learning_rate": 1.3634338333580872e-05, - "loss": 0.0777, - "step": 36730 - }, - { - "epoch": 2.728352888756869, - "grad_norm": 1.3298048973083496, - "learning_rate": 1.3629882667458785e-05, - "loss": 0.0631, - "step": 36740 - }, - { - "epoch": 2.7290954997772165, - "grad_norm": 0.4356074929237366, - "learning_rate": 1.36254270013367e-05, - "loss": 0.0439, - "step": 36750 - }, - { - "epoch": 2.729838110797564, - "grad_norm": 0.5063789486885071, - "learning_rate": 1.3620971335214615e-05, - "loss": 0.0666, - "step": 36760 - }, - { - "epoch": 2.730580721817912, - "grad_norm": 2.056678533554077, - "learning_rate": 1.361651566909253e-05, - "loss": 0.078, - "step": 36770 - }, - { - "epoch": 2.7313233328382593, - "grad_norm": 1.4419987201690674, - "learning_rate": 1.3612060002970443e-05, - "loss": 0.0662, - "step": 36780 - }, - { - "epoch": 2.732065943858607, - "grad_norm": 1.547361969947815, - "learning_rate": 1.360760433684836e-05, - "loss": 0.046, - "step": 36790 - }, - { - "epoch": 2.7328085548789542, - "grad_norm": 2.8562614917755127, - "learning_rate": 1.3603148670726275e-05, - "loss": 0.0744, - "step": 36800 - }, - { - "epoch": 2.7335511658993017, - "grad_norm": 3.6716110706329346, - "learning_rate": 1.3598693004604188e-05, - "loss": 0.0709, - "step": 36810 - }, - { - "epoch": 2.7342937769196496, - "grad_norm": 1.2131446599960327, - "learning_rate": 1.3594237338482103e-05, - "loss": 0.0557, - "step": 36820 - }, - { - "epoch": 2.735036387939997, - "grad_norm": 0.5412776470184326, - "learning_rate": 1.3589781672360018e-05, - "loss": 0.0738, - "step": 36830 - }, - { - "epoch": 2.7357789989603445, - "grad_norm": 1.8566441535949707, - "learning_rate": 1.3585326006237933e-05, - "loss": 0.0771, - "step": 36840 - }, - { - "epoch": 2.7365216099806924, - "grad_norm": 1.2451858520507812, - "learning_rate": 1.3580870340115848e-05, - "loss": 0.0773, - "step": 36850 - }, - { - "epoch": 2.7372642210010394, - "grad_norm": 1.140763759613037, - "learning_rate": 1.3576414673993761e-05, - "loss": 0.0661, - "step": 36860 - }, - { - "epoch": 2.7380068320213873, - "grad_norm": 1.0305567979812622, - "learning_rate": 1.3571959007871678e-05, - "loss": 0.0349, - "step": 36870 - }, - { - "epoch": 2.7387494430417347, - "grad_norm": 1.3327600955963135, - "learning_rate": 1.3567503341749591e-05, - "loss": 0.0824, - "step": 36880 - }, - { - "epoch": 2.739492054062082, - "grad_norm": 1.6335985660552979, - "learning_rate": 1.3563047675627506e-05, - "loss": 0.0727, - "step": 36890 - }, - { - "epoch": 2.74023466508243, - "grad_norm": 1.2704558372497559, - "learning_rate": 1.3558592009505421e-05, - "loss": 0.0589, - "step": 36900 - }, - { - "epoch": 2.7409772761027775, - "grad_norm": 1.6128848791122437, - "learning_rate": 1.3554136343383336e-05, - "loss": 0.0584, - "step": 36910 - }, - { - "epoch": 2.741719887123125, - "grad_norm": 1.1986207962036133, - "learning_rate": 1.3549680677261251e-05, - "loss": 0.0891, - "step": 36920 - }, - { - "epoch": 2.7424624981434724, - "grad_norm": 0.5783780813217163, - "learning_rate": 1.3545225011139165e-05, - "loss": 0.0748, - "step": 36930 - }, - { - "epoch": 2.74320510916382, - "grad_norm": 2.1520473957061768, - "learning_rate": 1.3540769345017081e-05, - "loss": 0.0562, - "step": 36940 - }, - { - "epoch": 2.7439477201841678, - "grad_norm": 1.8193594217300415, - "learning_rate": 1.3536313678894995e-05, - "loss": 0.0797, - "step": 36950 - }, - { - "epoch": 2.744690331204515, - "grad_norm": 2.4824233055114746, - "learning_rate": 1.353185801277291e-05, - "loss": 0.0791, - "step": 36960 - }, - { - "epoch": 2.7454329422248627, - "grad_norm": 1.889683723449707, - "learning_rate": 1.3527402346650825e-05, - "loss": 0.0832, - "step": 36970 - }, - { - "epoch": 2.74617555324521, - "grad_norm": 1.3686025142669678, - "learning_rate": 1.352294668052874e-05, - "loss": 0.0714, - "step": 36980 - }, - { - "epoch": 2.7469181642655576, - "grad_norm": 1.2818831205368042, - "learning_rate": 1.3518491014406655e-05, - "loss": 0.0581, - "step": 36990 - }, - { - "epoch": 2.7476607752859055, - "grad_norm": 1.8120100498199463, - "learning_rate": 1.3514035348284568e-05, - "loss": 0.0515, - "step": 37000 - }, - { - "epoch": 2.748403386306253, - "grad_norm": 2.3514275550842285, - "learning_rate": 1.3509579682162483e-05, - "loss": 0.0766, - "step": 37010 - }, - { - "epoch": 2.7491459973266004, - "grad_norm": 0.7103281617164612, - "learning_rate": 1.35051240160404e-05, - "loss": 0.06, - "step": 37020 - }, - { - "epoch": 2.749888608346948, - "grad_norm": 0.8013458251953125, - "learning_rate": 1.3500668349918313e-05, - "loss": 0.042, - "step": 37030 - }, - { - "epoch": 2.7506312193672953, - "grad_norm": 0.9104951024055481, - "learning_rate": 1.3496212683796228e-05, - "loss": 0.0657, - "step": 37040 - }, - { - "epoch": 2.751373830387643, - "grad_norm": 3.061896324157715, - "learning_rate": 1.3491757017674143e-05, - "loss": 0.0995, - "step": 37050 - }, - { - "epoch": 2.7521164414079906, - "grad_norm": 2.806757688522339, - "learning_rate": 1.3487301351552058e-05, - "loss": 0.0623, - "step": 37060 - }, - { - "epoch": 2.752859052428338, - "grad_norm": 0.8061108589172363, - "learning_rate": 1.3482845685429971e-05, - "loss": 0.0434, - "step": 37070 - }, - { - "epoch": 2.7536016634486855, - "grad_norm": 0.7972543835639954, - "learning_rate": 1.3478390019307886e-05, - "loss": 0.0756, - "step": 37080 - }, - { - "epoch": 2.754344274469033, - "grad_norm": 1.5233701467514038, - "learning_rate": 1.3473934353185803e-05, - "loss": 0.0669, - "step": 37090 - }, - { - "epoch": 2.755086885489381, - "grad_norm": 3.342548370361328, - "learning_rate": 1.3469478687063716e-05, - "loss": 0.0706, - "step": 37100 - }, - { - "epoch": 2.7558294965097283, - "grad_norm": 0.6841835975646973, - "learning_rate": 1.3465023020941631e-05, - "loss": 0.0526, - "step": 37110 - }, - { - "epoch": 2.7565721075300758, - "grad_norm": 0.362078994512558, - "learning_rate": 1.3460567354819544e-05, - "loss": 0.0528, - "step": 37120 - }, - { - "epoch": 2.757314718550423, - "grad_norm": 2.367532968521118, - "learning_rate": 1.3456111688697461e-05, - "loss": 0.0709, - "step": 37130 - }, - { - "epoch": 2.7580573295707707, - "grad_norm": 1.0397535562515259, - "learning_rate": 1.3451656022575376e-05, - "loss": 0.0827, - "step": 37140 - }, - { - "epoch": 2.7587999405911185, - "grad_norm": 2.048051118850708, - "learning_rate": 1.344720035645329e-05, - "loss": 0.0707, - "step": 37150 - }, - { - "epoch": 2.759542551611466, - "grad_norm": 0.5190430283546448, - "learning_rate": 1.3442744690331206e-05, - "loss": 0.0711, - "step": 37160 - }, - { - "epoch": 2.7602851626318134, - "grad_norm": 2.5641369819641113, - "learning_rate": 1.343828902420912e-05, - "loss": 0.0677, - "step": 37170 - }, - { - "epoch": 2.761027773652161, - "grad_norm": 0.1991005390882492, - "learning_rate": 1.3433833358087034e-05, - "loss": 0.0298, - "step": 37180 - }, - { - "epoch": 2.7617703846725083, - "grad_norm": 1.7412118911743164, - "learning_rate": 1.3429377691964948e-05, - "loss": 0.061, - "step": 37190 - }, - { - "epoch": 2.7625129956928562, - "grad_norm": 1.8074331283569336, - "learning_rate": 1.3424922025842864e-05, - "loss": 0.0684, - "step": 37200 - }, - { - "epoch": 2.7632556067132037, - "grad_norm": 1.0141547918319702, - "learning_rate": 1.342046635972078e-05, - "loss": 0.0713, - "step": 37210 - }, - { - "epoch": 2.763998217733551, - "grad_norm": 1.6694709062576294, - "learning_rate": 1.3416010693598693e-05, - "loss": 0.0599, - "step": 37220 - }, - { - "epoch": 2.7647408287538986, - "grad_norm": 2.54500675201416, - "learning_rate": 1.3411555027476608e-05, - "loss": 0.054, - "step": 37230 - }, - { - "epoch": 2.765483439774246, - "grad_norm": 2.212883710861206, - "learning_rate": 1.3407099361354523e-05, - "loss": 0.0734, - "step": 37240 - }, - { - "epoch": 2.766226050794594, - "grad_norm": 1.2556638717651367, - "learning_rate": 1.3402643695232438e-05, - "loss": 0.0718, - "step": 37250 - }, - { - "epoch": 2.7669686618149414, - "grad_norm": 2.478182792663574, - "learning_rate": 1.3398188029110353e-05, - "loss": 0.0535, - "step": 37260 - }, - { - "epoch": 2.767711272835289, - "grad_norm": 1.532631516456604, - "learning_rate": 1.3393732362988266e-05, - "loss": 0.0807, - "step": 37270 - }, - { - "epoch": 2.7684538838556363, - "grad_norm": 1.9082090854644775, - "learning_rate": 1.3389276696866183e-05, - "loss": 0.0805, - "step": 37280 - }, - { - "epoch": 2.7691964948759837, - "grad_norm": 1.0164248943328857, - "learning_rate": 1.3384821030744096e-05, - "loss": 0.0843, - "step": 37290 - }, - { - "epoch": 2.7699391058963316, - "grad_norm": 0.967978298664093, - "learning_rate": 1.3380365364622011e-05, - "loss": 0.0693, - "step": 37300 - }, - { - "epoch": 2.770681716916679, - "grad_norm": 1.1831194162368774, - "learning_rate": 1.3375909698499928e-05, - "loss": 0.0716, - "step": 37310 - }, - { - "epoch": 2.7714243279370265, - "grad_norm": 2.0037786960601807, - "learning_rate": 1.3371454032377841e-05, - "loss": 0.0813, - "step": 37320 - }, - { - "epoch": 2.772166938957374, - "grad_norm": 1.3486874103546143, - "learning_rate": 1.3366998366255756e-05, - "loss": 0.0912, - "step": 37330 - }, - { - "epoch": 2.7729095499777214, - "grad_norm": 1.2542924880981445, - "learning_rate": 1.336254270013367e-05, - "loss": 0.067, - "step": 37340 - }, - { - "epoch": 2.7736521609980693, - "grad_norm": 0.595507025718689, - "learning_rate": 1.3358087034011586e-05, - "loss": 0.0758, - "step": 37350 - }, - { - "epoch": 2.7743947720184168, - "grad_norm": 1.162650465965271, - "learning_rate": 1.33536313678895e-05, - "loss": 0.0643, - "step": 37360 - }, - { - "epoch": 2.7751373830387642, - "grad_norm": 0.5855199098587036, - "learning_rate": 1.3349175701767414e-05, - "loss": 0.0572, - "step": 37370 - }, - { - "epoch": 2.7758799940591117, - "grad_norm": 3.25514554977417, - "learning_rate": 1.334472003564533e-05, - "loss": 0.0628, - "step": 37380 - }, - { - "epoch": 2.776622605079459, - "grad_norm": 2.44706392288208, - "learning_rate": 1.3340264369523244e-05, - "loss": 0.0798, - "step": 37390 - }, - { - "epoch": 2.777365216099807, - "grad_norm": 1.5468707084655762, - "learning_rate": 1.333580870340116e-05, - "loss": 0.0699, - "step": 37400 - }, - { - "epoch": 2.7781078271201545, - "grad_norm": 3.0609419345855713, - "learning_rate": 1.3331353037279073e-05, - "loss": 0.0741, - "step": 37410 - }, - { - "epoch": 2.778850438140502, - "grad_norm": 2.7245450019836426, - "learning_rate": 1.3326897371156988e-05, - "loss": 0.0528, - "step": 37420 - }, - { - "epoch": 2.77959304916085, - "grad_norm": 0.9166297912597656, - "learning_rate": 1.3322441705034904e-05, - "loss": 0.0744, - "step": 37430 - }, - { - "epoch": 2.780335660181197, - "grad_norm": 1.0476568937301636, - "learning_rate": 1.3317986038912818e-05, - "loss": 0.0556, - "step": 37440 - }, - { - "epoch": 2.7810782712015447, - "grad_norm": 2.8554935455322266, - "learning_rate": 1.3313530372790733e-05, - "loss": 0.0651, - "step": 37450 - }, - { - "epoch": 2.781820882221892, - "grad_norm": 1.7968850135803223, - "learning_rate": 1.3309074706668648e-05, - "loss": 0.0684, - "step": 37460 - }, - { - "epoch": 2.7825634932422396, - "grad_norm": 3.455589532852173, - "learning_rate": 1.3304619040546563e-05, - "loss": 0.0739, - "step": 37470 - }, - { - "epoch": 2.7833061042625875, - "grad_norm": 1.8191416263580322, - "learning_rate": 1.3300163374424476e-05, - "loss": 0.0527, - "step": 37480 - }, - { - "epoch": 2.784048715282935, - "grad_norm": 1.094232201576233, - "learning_rate": 1.329570770830239e-05, - "loss": 0.076, - "step": 37490 - }, - { - "epoch": 2.7847913263032824, - "grad_norm": 4.273893356323242, - "learning_rate": 1.3291252042180307e-05, - "loss": 0.086, - "step": 37500 - }, - { - "epoch": 2.78553393732363, - "grad_norm": 2.4333572387695312, - "learning_rate": 1.328679637605822e-05, - "loss": 0.102, - "step": 37510 - }, - { - "epoch": 2.7862765483439773, - "grad_norm": 1.4521609544754028, - "learning_rate": 1.3282340709936136e-05, - "loss": 0.0733, - "step": 37520 - }, - { - "epoch": 2.787019159364325, - "grad_norm": 1.7854300737380981, - "learning_rate": 1.3277885043814049e-05, - "loss": 0.0426, - "step": 37530 - }, - { - "epoch": 2.7877617703846727, - "grad_norm": 1.4833481311798096, - "learning_rate": 1.3273429377691966e-05, - "loss": 0.0501, - "step": 37540 - }, - { - "epoch": 2.78850438140502, - "grad_norm": 3.3596692085266113, - "learning_rate": 1.326897371156988e-05, - "loss": 0.0536, - "step": 37550 - }, - { - "epoch": 2.7892469924253676, - "grad_norm": 3.833606481552124, - "learning_rate": 1.3264518045447794e-05, - "loss": 0.0684, - "step": 37560 - }, - { - "epoch": 2.789989603445715, - "grad_norm": 0.862786054611206, - "learning_rate": 1.326006237932571e-05, - "loss": 0.0419, - "step": 37570 - }, - { - "epoch": 2.790732214466063, - "grad_norm": 0.7479864954948425, - "learning_rate": 1.3255606713203624e-05, - "loss": 0.062, - "step": 37580 - }, - { - "epoch": 2.7914748254864103, - "grad_norm": 1.0688323974609375, - "learning_rate": 1.3251151047081539e-05, - "loss": 0.0577, - "step": 37590 - }, - { - "epoch": 2.792217436506758, - "grad_norm": 1.409751057624817, - "learning_rate": 1.3246695380959454e-05, - "loss": 0.0587, - "step": 37600 - }, - { - "epoch": 2.7929600475271052, - "grad_norm": 0.3575490415096283, - "learning_rate": 1.3242239714837369e-05, - "loss": 0.0798, - "step": 37610 - }, - { - "epoch": 2.7937026585474527, - "grad_norm": 1.4559156894683838, - "learning_rate": 1.3237784048715284e-05, - "loss": 0.0729, - "step": 37620 - }, - { - "epoch": 2.7944452695678006, - "grad_norm": 1.0611257553100586, - "learning_rate": 1.3233328382593197e-05, - "loss": 0.0695, - "step": 37630 - }, - { - "epoch": 2.795187880588148, - "grad_norm": 1.5635493993759155, - "learning_rate": 1.3228872716471112e-05, - "loss": 0.0826, - "step": 37640 - }, - { - "epoch": 2.7959304916084955, - "grad_norm": 0.6104263663291931, - "learning_rate": 1.3224417050349027e-05, - "loss": 0.0548, - "step": 37650 - }, - { - "epoch": 2.796673102628843, - "grad_norm": 1.3987880945205688, - "learning_rate": 1.3219961384226942e-05, - "loss": 0.0678, - "step": 37660 - }, - { - "epoch": 2.7974157136491904, - "grad_norm": 0.8820175528526306, - "learning_rate": 1.3215505718104857e-05, - "loss": 0.0589, - "step": 37670 - }, - { - "epoch": 2.7981583246695383, - "grad_norm": 1.4698015451431274, - "learning_rate": 1.321105005198277e-05, - "loss": 0.0607, - "step": 37680 - }, - { - "epoch": 2.7989009356898857, - "grad_norm": 2.0641324520111084, - "learning_rate": 1.3206594385860687e-05, - "loss": 0.0633, - "step": 37690 - }, - { - "epoch": 2.799643546710233, - "grad_norm": 3.1822593212127686, - "learning_rate": 1.32021387197386e-05, - "loss": 0.0465, - "step": 37700 - }, - { - "epoch": 2.8003861577305806, - "grad_norm": 1.4389050006866455, - "learning_rate": 1.3197683053616516e-05, - "loss": 0.0879, - "step": 37710 - }, - { - "epoch": 2.801128768750928, - "grad_norm": 1.4606937170028687, - "learning_rate": 1.3193227387494432e-05, - "loss": 0.0548, - "step": 37720 - }, - { - "epoch": 2.801871379771276, - "grad_norm": 2.7403457164764404, - "learning_rate": 1.3188771721372346e-05, - "loss": 0.0507, - "step": 37730 - }, - { - "epoch": 2.8026139907916234, - "grad_norm": 2.382749080657959, - "learning_rate": 1.318431605525026e-05, - "loss": 0.0725, - "step": 37740 - }, - { - "epoch": 2.803356601811971, - "grad_norm": 1.1223398447036743, - "learning_rate": 1.3179860389128174e-05, - "loss": 0.0901, - "step": 37750 - }, - { - "epoch": 2.8040992128323183, - "grad_norm": 1.5770460367202759, - "learning_rate": 1.317540472300609e-05, - "loss": 0.0897, - "step": 37760 - }, - { - "epoch": 2.804841823852666, - "grad_norm": 1.267210602760315, - "learning_rate": 1.3170949056884004e-05, - "loss": 0.0532, - "step": 37770 - }, - { - "epoch": 2.8055844348730137, - "grad_norm": 0.7207576036453247, - "learning_rate": 1.3166493390761919e-05, - "loss": 0.0356, - "step": 37780 - }, - { - "epoch": 2.806327045893361, - "grad_norm": 0.6520107984542847, - "learning_rate": 1.3162037724639834e-05, - "loss": 0.0663, - "step": 37790 - }, - { - "epoch": 2.8070696569137086, - "grad_norm": 1.4991291761398315, - "learning_rate": 1.3157582058517749e-05, - "loss": 0.0732, - "step": 37800 - }, - { - "epoch": 2.807812267934056, - "grad_norm": 2.074842691421509, - "learning_rate": 1.3153126392395664e-05, - "loss": 0.0878, - "step": 37810 - }, - { - "epoch": 2.8085548789544035, - "grad_norm": 0.5263580083847046, - "learning_rate": 1.3148670726273577e-05, - "loss": 0.098, - "step": 37820 - }, - { - "epoch": 2.8092974899747514, - "grad_norm": 2.034339427947998, - "learning_rate": 1.3144215060151494e-05, - "loss": 0.0592, - "step": 37830 - }, - { - "epoch": 2.810040100995099, - "grad_norm": 1.7744560241699219, - "learning_rate": 1.3139759394029409e-05, - "loss": 0.0785, - "step": 37840 - }, - { - "epoch": 2.8107827120154463, - "grad_norm": 1.718765377998352, - "learning_rate": 1.3135303727907322e-05, - "loss": 0.0602, - "step": 37850 - }, - { - "epoch": 2.8115253230357937, - "grad_norm": 0.45636001229286194, - "learning_rate": 1.3130848061785237e-05, - "loss": 0.0625, - "step": 37860 - }, - { - "epoch": 2.812267934056141, - "grad_norm": 0.8540383577346802, - "learning_rate": 1.3126392395663152e-05, - "loss": 0.0749, - "step": 37870 - }, - { - "epoch": 2.813010545076489, - "grad_norm": 0.7994318604469299, - "learning_rate": 1.3121936729541067e-05, - "loss": 0.0738, - "step": 37880 - }, - { - "epoch": 2.8137531560968365, - "grad_norm": 2.370769739151001, - "learning_rate": 1.311748106341898e-05, - "loss": 0.0594, - "step": 37890 - }, - { - "epoch": 2.814495767117184, - "grad_norm": 3.0861258506774902, - "learning_rate": 1.3113025397296895e-05, - "loss": 0.0758, - "step": 37900 - }, - { - "epoch": 2.8152383781375314, - "grad_norm": 0.46465158462524414, - "learning_rate": 1.3108569731174812e-05, - "loss": 0.0581, - "step": 37910 - }, - { - "epoch": 2.815980989157879, - "grad_norm": 2.443127393722534, - "learning_rate": 1.3104114065052725e-05, - "loss": 0.0948, - "step": 37920 - }, - { - "epoch": 2.8167236001782268, - "grad_norm": 2.299797534942627, - "learning_rate": 1.309965839893064e-05, - "loss": 0.0565, - "step": 37930 - }, - { - "epoch": 2.817466211198574, - "grad_norm": 3.1325736045837402, - "learning_rate": 1.3095202732808554e-05, - "loss": 0.0834, - "step": 37940 - }, - { - "epoch": 2.8182088222189217, - "grad_norm": 1.0582780838012695, - "learning_rate": 1.309074706668647e-05, - "loss": 0.0739, - "step": 37950 - }, - { - "epoch": 2.818951433239269, - "grad_norm": 2.284137725830078, - "learning_rate": 1.3086291400564385e-05, - "loss": 0.0861, - "step": 37960 - }, - { - "epoch": 2.8196940442596166, - "grad_norm": 0.8023969531059265, - "learning_rate": 1.3081835734442299e-05, - "loss": 0.0615, - "step": 37970 - }, - { - "epoch": 2.8204366552799645, - "grad_norm": 1.1526970863342285, - "learning_rate": 1.3077380068320215e-05, - "loss": 0.0686, - "step": 37980 - }, - { - "epoch": 2.821179266300312, - "grad_norm": 2.1727919578552246, - "learning_rate": 1.3072924402198129e-05, - "loss": 0.0874, - "step": 37990 - }, - { - "epoch": 2.8219218773206594, - "grad_norm": 0.8734510540962219, - "learning_rate": 1.3068468736076044e-05, - "loss": 0.0577, - "step": 38000 - }, - { - "epoch": 2.8226644883410072, - "grad_norm": 0.24533693492412567, - "learning_rate": 1.3064013069953959e-05, - "loss": 0.0887, - "step": 38010 - }, - { - "epoch": 2.8234070993613543, - "grad_norm": 2.325021982192993, - "learning_rate": 1.3059557403831874e-05, - "loss": 0.0681, - "step": 38020 - }, - { - "epoch": 2.824149710381702, - "grad_norm": 1.5730549097061157, - "learning_rate": 1.3055101737709789e-05, - "loss": 0.0522, - "step": 38030 - }, - { - "epoch": 2.8248923214020496, - "grad_norm": 1.1065586805343628, - "learning_rate": 1.3050646071587702e-05, - "loss": 0.0581, - "step": 38040 - }, - { - "epoch": 2.825634932422397, - "grad_norm": 0.48450005054473877, - "learning_rate": 1.3046190405465617e-05, - "loss": 0.0715, - "step": 38050 - }, - { - "epoch": 2.826377543442745, - "grad_norm": 1.2192469835281372, - "learning_rate": 1.3041734739343532e-05, - "loss": 0.0557, - "step": 38060 - }, - { - "epoch": 2.8271201544630924, - "grad_norm": 1.8304122686386108, - "learning_rate": 1.3037279073221447e-05, - "loss": 0.0832, - "step": 38070 - }, - { - "epoch": 2.82786276548344, - "grad_norm": 1.7451564073562622, - "learning_rate": 1.3032823407099362e-05, - "loss": 0.0839, - "step": 38080 - }, - { - "epoch": 2.8286053765037873, - "grad_norm": 1.190588355064392, - "learning_rate": 1.3028367740977277e-05, - "loss": 0.0659, - "step": 38090 - }, - { - "epoch": 2.8293479875241347, - "grad_norm": 0.46794483065605164, - "learning_rate": 1.3023912074855192e-05, - "loss": 0.0554, - "step": 38100 - }, - { - "epoch": 2.8300905985444826, - "grad_norm": 1.307004451751709, - "learning_rate": 1.3019456408733105e-05, - "loss": 0.0637, - "step": 38110 - }, - { - "epoch": 2.83083320956483, - "grad_norm": 1.7343428134918213, - "learning_rate": 1.301500074261102e-05, - "loss": 0.0469, - "step": 38120 - }, - { - "epoch": 2.8315758205851775, - "grad_norm": 1.2779510021209717, - "learning_rate": 1.3010545076488937e-05, - "loss": 0.0376, - "step": 38130 - }, - { - "epoch": 2.832318431605525, - "grad_norm": 0.3328961431980133, - "learning_rate": 1.300608941036685e-05, - "loss": 0.0531, - "step": 38140 - }, - { - "epoch": 2.8330610426258724, - "grad_norm": 1.039929986000061, - "learning_rate": 1.3001633744244765e-05, - "loss": 0.0729, - "step": 38150 - }, - { - "epoch": 2.8338036536462203, - "grad_norm": 1.5998934507369995, - "learning_rate": 1.2997178078122678e-05, - "loss": 0.0736, - "step": 38160 - }, - { - "epoch": 2.834546264666568, - "grad_norm": 0.7443707585334778, - "learning_rate": 1.2992722412000595e-05, - "loss": 0.0618, - "step": 38170 - }, - { - "epoch": 2.8352888756869152, - "grad_norm": 1.4436475038528442, - "learning_rate": 1.2988266745878508e-05, - "loss": 0.0762, - "step": 38180 - }, - { - "epoch": 2.8360314867072627, - "grad_norm": 1.3913630247116089, - "learning_rate": 1.2983811079756423e-05, - "loss": 0.0825, - "step": 38190 - }, - { - "epoch": 2.83677409772761, - "grad_norm": 1.0317375659942627, - "learning_rate": 1.2979355413634338e-05, - "loss": 0.0693, - "step": 38200 - }, - { - "epoch": 2.837516708747958, - "grad_norm": 1.5049179792404175, - "learning_rate": 1.2974899747512253e-05, - "loss": 0.0597, - "step": 38210 - }, - { - "epoch": 2.8382593197683055, - "grad_norm": 1.5254199504852295, - "learning_rate": 1.2970444081390168e-05, - "loss": 0.0452, - "step": 38220 - }, - { - "epoch": 2.839001930788653, - "grad_norm": 2.0400617122650146, - "learning_rate": 1.2965988415268082e-05, - "loss": 0.0726, - "step": 38230 - }, - { - "epoch": 2.8397445418090004, - "grad_norm": 1.2715054750442505, - "learning_rate": 1.2961532749145998e-05, - "loss": 0.0657, - "step": 38240 - }, - { - "epoch": 2.840487152829348, - "grad_norm": 1.5253748893737793, - "learning_rate": 1.2957077083023913e-05, - "loss": 0.093, - "step": 38250 - }, - { - "epoch": 2.8412297638496957, - "grad_norm": 1.2937556505203247, - "learning_rate": 1.2952621416901827e-05, - "loss": 0.0773, - "step": 38260 - }, - { - "epoch": 2.841972374870043, - "grad_norm": 0.9976204633712769, - "learning_rate": 1.2948165750779742e-05, - "loss": 0.0774, - "step": 38270 - }, - { - "epoch": 2.8427149858903906, - "grad_norm": 0.886090874671936, - "learning_rate": 1.2943710084657657e-05, - "loss": 0.051, - "step": 38280 - }, - { - "epoch": 2.843457596910738, - "grad_norm": 1.4611785411834717, - "learning_rate": 1.2939254418535572e-05, - "loss": 0.0612, - "step": 38290 - }, - { - "epoch": 2.8442002079310855, - "grad_norm": 0.9807224869728088, - "learning_rate": 1.2934798752413487e-05, - "loss": 0.0654, - "step": 38300 - }, - { - "epoch": 2.8449428189514334, - "grad_norm": 1.1847294569015503, - "learning_rate": 1.29303430862914e-05, - "loss": 0.0605, - "step": 38310 - }, - { - "epoch": 2.845685429971781, - "grad_norm": 0.535963237285614, - "learning_rate": 1.2925887420169317e-05, - "loss": 0.0763, - "step": 38320 - }, - { - "epoch": 2.8464280409921283, - "grad_norm": 2.856031894683838, - "learning_rate": 1.292143175404723e-05, - "loss": 0.064, - "step": 38330 - }, - { - "epoch": 2.8471706520124758, - "grad_norm": 0.5636598467826843, - "learning_rate": 1.2916976087925145e-05, - "loss": 0.0675, - "step": 38340 - }, - { - "epoch": 2.847913263032823, - "grad_norm": 1.67021644115448, - "learning_rate": 1.291252042180306e-05, - "loss": 0.0784, - "step": 38350 - }, - { - "epoch": 2.848655874053171, - "grad_norm": 1.2641798257827759, - "learning_rate": 1.2908064755680975e-05, - "loss": 0.063, - "step": 38360 - }, - { - "epoch": 2.8493984850735186, - "grad_norm": 0.549030601978302, - "learning_rate": 1.290360908955889e-05, - "loss": 0.0584, - "step": 38370 - }, - { - "epoch": 2.850141096093866, - "grad_norm": 2.4092066287994385, - "learning_rate": 1.2899153423436803e-05, - "loss": 0.0935, - "step": 38380 - }, - { - "epoch": 2.8508837071142135, - "grad_norm": 0.9181311726570129, - "learning_rate": 1.289469775731472e-05, - "loss": 0.0586, - "step": 38390 - }, - { - "epoch": 2.851626318134561, - "grad_norm": 1.6261708736419678, - "learning_rate": 1.2890242091192633e-05, - "loss": 0.0871, - "step": 38400 - }, - { - "epoch": 2.852368929154909, - "grad_norm": 1.0663944482803345, - "learning_rate": 1.2885786425070548e-05, - "loss": 0.0464, - "step": 38410 - }, - { - "epoch": 2.8531115401752563, - "grad_norm": 1.0980523824691772, - "learning_rate": 1.2881330758948463e-05, - "loss": 0.0805, - "step": 38420 - }, - { - "epoch": 2.8538541511956037, - "grad_norm": 1.2846684455871582, - "learning_rate": 1.2876875092826378e-05, - "loss": 0.1012, - "step": 38430 - }, - { - "epoch": 2.854596762215951, - "grad_norm": 2.4955661296844482, - "learning_rate": 1.2872419426704293e-05, - "loss": 0.0585, - "step": 38440 - }, - { - "epoch": 2.8553393732362986, - "grad_norm": 1.1014326810836792, - "learning_rate": 1.2867963760582207e-05, - "loss": 0.0656, - "step": 38450 - }, - { - "epoch": 2.8560819842566465, - "grad_norm": 0.6860207915306091, - "learning_rate": 1.2863508094460122e-05, - "loss": 0.0522, - "step": 38460 - }, - { - "epoch": 2.856824595276994, - "grad_norm": 1.087835669517517, - "learning_rate": 1.2859052428338037e-05, - "loss": 0.0673, - "step": 38470 - }, - { - "epoch": 2.8575672062973414, - "grad_norm": 2.119745969772339, - "learning_rate": 1.2854596762215952e-05, - "loss": 0.0697, - "step": 38480 - }, - { - "epoch": 2.858309817317689, - "grad_norm": 1.239503026008606, - "learning_rate": 1.2850141096093867e-05, - "loss": 0.0857, - "step": 38490 - }, - { - "epoch": 2.8590524283380363, - "grad_norm": 4.281582832336426, - "learning_rate": 1.2845685429971781e-05, - "loss": 0.0598, - "step": 38500 - }, - { - "epoch": 2.859795039358384, - "grad_norm": 2.14847993850708, - "learning_rate": 1.2841229763849696e-05, - "loss": 0.068, - "step": 38510 - }, - { - "epoch": 2.8605376503787316, - "grad_norm": 4.647385120391846, - "learning_rate": 1.283677409772761e-05, - "loss": 0.0738, - "step": 38520 - }, - { - "epoch": 2.861280261399079, - "grad_norm": 0.7696908116340637, - "learning_rate": 1.2832318431605525e-05, - "loss": 0.0458, - "step": 38530 - }, - { - "epoch": 2.8620228724194265, - "grad_norm": 0.9217883348464966, - "learning_rate": 1.2827862765483441e-05, - "loss": 0.0427, - "step": 38540 - }, - { - "epoch": 2.862765483439774, - "grad_norm": 2.1366024017333984, - "learning_rate": 1.2823407099361355e-05, - "loss": 0.0735, - "step": 38550 - }, - { - "epoch": 2.863508094460122, - "grad_norm": 1.3464000225067139, - "learning_rate": 1.281895143323927e-05, - "loss": 0.0712, - "step": 38560 - }, - { - "epoch": 2.8642507054804693, - "grad_norm": 0.7203729152679443, - "learning_rate": 1.2814495767117183e-05, - "loss": 0.0611, - "step": 38570 - }, - { - "epoch": 2.864993316500817, - "grad_norm": 1.2216914892196655, - "learning_rate": 1.28100401009951e-05, - "loss": 0.0697, - "step": 38580 - }, - { - "epoch": 2.8657359275211647, - "grad_norm": 1.4661186933517456, - "learning_rate": 1.2805584434873013e-05, - "loss": 0.0771, - "step": 38590 - }, - { - "epoch": 2.8664785385415117, - "grad_norm": 0.9744288325309753, - "learning_rate": 1.2801128768750928e-05, - "loss": 0.0693, - "step": 38600 - }, - { - "epoch": 2.8672211495618596, - "grad_norm": 0.961794912815094, - "learning_rate": 1.2796673102628845e-05, - "loss": 0.0956, - "step": 38610 - }, - { - "epoch": 2.867963760582207, - "grad_norm": 2.7422969341278076, - "learning_rate": 1.2792217436506758e-05, - "loss": 0.0994, - "step": 38620 - }, - { - "epoch": 2.8687063716025545, - "grad_norm": 1.6554310321807861, - "learning_rate": 1.2787761770384673e-05, - "loss": 0.0521, - "step": 38630 - }, - { - "epoch": 2.8694489826229024, - "grad_norm": 2.223524808883667, - "learning_rate": 1.2783306104262586e-05, - "loss": 0.0773, - "step": 38640 - }, - { - "epoch": 2.87019159364325, - "grad_norm": 0.8952299356460571, - "learning_rate": 1.2778850438140503e-05, - "loss": 0.0592, - "step": 38650 - }, - { - "epoch": 2.8709342046635973, - "grad_norm": 0.7390848398208618, - "learning_rate": 1.2774394772018418e-05, - "loss": 0.0638, - "step": 38660 - }, - { - "epoch": 2.8716768156839447, - "grad_norm": 1.8188756704330444, - "learning_rate": 1.2769939105896331e-05, - "loss": 0.0533, - "step": 38670 - }, - { - "epoch": 2.872419426704292, - "grad_norm": 1.062387466430664, - "learning_rate": 1.2765483439774246e-05, - "loss": 0.0603, - "step": 38680 - }, - { - "epoch": 2.87316203772464, - "grad_norm": 2.831735134124756, - "learning_rate": 1.2761027773652161e-05, - "loss": 0.0383, - "step": 38690 - }, - { - "epoch": 2.8739046487449875, - "grad_norm": 0.8888131976127625, - "learning_rate": 1.2756572107530076e-05, - "loss": 0.0578, - "step": 38700 - }, - { - "epoch": 2.874647259765335, - "grad_norm": 2.9310948848724365, - "learning_rate": 1.2752116441407991e-05, - "loss": 0.068, - "step": 38710 - }, - { - "epoch": 2.8753898707856824, - "grad_norm": 0.4100227952003479, - "learning_rate": 1.2747660775285905e-05, - "loss": 0.0827, - "step": 38720 - }, - { - "epoch": 2.87613248180603, - "grad_norm": 1.1365009546279907, - "learning_rate": 1.2743205109163821e-05, - "loss": 0.0718, - "step": 38730 - }, - { - "epoch": 2.8768750928263778, - "grad_norm": 1.4055360555648804, - "learning_rate": 1.2738749443041735e-05, - "loss": 0.0788, - "step": 38740 - }, - { - "epoch": 2.877617703846725, - "grad_norm": 1.0839377641677856, - "learning_rate": 1.273429377691965e-05, - "loss": 0.0649, - "step": 38750 - }, - { - "epoch": 2.8783603148670727, - "grad_norm": 1.1226552724838257, - "learning_rate": 1.2729838110797565e-05, - "loss": 0.0717, - "step": 38760 - }, - { - "epoch": 2.87910292588742, - "grad_norm": 0.4335779845714569, - "learning_rate": 1.272538244467548e-05, - "loss": 0.0535, - "step": 38770 - }, - { - "epoch": 2.8798455369077676, - "grad_norm": 0.9374495148658752, - "learning_rate": 1.2720926778553395e-05, - "loss": 0.069, - "step": 38780 - }, - { - "epoch": 2.8805881479281155, - "grad_norm": 1.4652955532073975, - "learning_rate": 1.2716471112431308e-05, - "loss": 0.0836, - "step": 38790 - }, - { - "epoch": 2.881330758948463, - "grad_norm": 1.3489465713500977, - "learning_rate": 1.2712015446309225e-05, - "loss": 0.0757, - "step": 38800 - }, - { - "epoch": 2.8820733699688104, - "grad_norm": 3.2947144508361816, - "learning_rate": 1.2707559780187138e-05, - "loss": 0.0454, - "step": 38810 - }, - { - "epoch": 2.882815980989158, - "grad_norm": 1.3606735467910767, - "learning_rate": 1.2703104114065053e-05, - "loss": 0.0732, - "step": 38820 - }, - { - "epoch": 2.8835585920095053, - "grad_norm": 0.7241372466087341, - "learning_rate": 1.2698648447942968e-05, - "loss": 0.0727, - "step": 38830 - }, - { - "epoch": 2.884301203029853, - "grad_norm": 1.2209150791168213, - "learning_rate": 1.2694192781820883e-05, - "loss": 0.0625, - "step": 38840 - }, - { - "epoch": 2.8850438140502006, - "grad_norm": 3.2763359546661377, - "learning_rate": 1.2689737115698798e-05, - "loss": 0.0619, - "step": 38850 - }, - { - "epoch": 2.885786425070548, - "grad_norm": 0.7242000699043274, - "learning_rate": 1.2685281449576711e-05, - "loss": 0.0428, - "step": 38860 - }, - { - "epoch": 2.8865290360908955, - "grad_norm": 1.276310682296753, - "learning_rate": 1.2680825783454628e-05, - "loss": 0.0873, - "step": 38870 - }, - { - "epoch": 2.887271647111243, - "grad_norm": 0.70942223072052, - "learning_rate": 1.2676370117332541e-05, - "loss": 0.0563, - "step": 38880 - }, - { - "epoch": 2.888014258131591, - "grad_norm": 1.4560551643371582, - "learning_rate": 1.2671914451210456e-05, - "loss": 0.0754, - "step": 38890 - }, - { - "epoch": 2.8887568691519383, - "grad_norm": 3.525283098220825, - "learning_rate": 1.2667458785088371e-05, - "loss": 0.1017, - "step": 38900 - }, - { - "epoch": 2.8894994801722858, - "grad_norm": 2.5498082637786865, - "learning_rate": 1.2663003118966286e-05, - "loss": 0.0777, - "step": 38910 - }, - { - "epoch": 2.890242091192633, - "grad_norm": 1.0680961608886719, - "learning_rate": 1.2658547452844201e-05, - "loss": 0.058, - "step": 38920 - }, - { - "epoch": 2.8909847022129807, - "grad_norm": 0.860130786895752, - "learning_rate": 1.2654091786722114e-05, - "loss": 0.0519, - "step": 38930 - }, - { - "epoch": 2.8917273132333285, - "grad_norm": 0.4104064106941223, - "learning_rate": 1.264963612060003e-05, - "loss": 0.0721, - "step": 38940 - }, - { - "epoch": 2.892469924253676, - "grad_norm": 1.9882882833480835, - "learning_rate": 1.2645180454477946e-05, - "loss": 0.0761, - "step": 38950 - }, - { - "epoch": 2.8932125352740234, - "grad_norm": 0.7212990522384644, - "learning_rate": 1.264072478835586e-05, - "loss": 0.0602, - "step": 38960 - }, - { - "epoch": 2.893955146294371, - "grad_norm": 0.5965471863746643, - "learning_rate": 1.2636269122233774e-05, - "loss": 0.0764, - "step": 38970 - }, - { - "epoch": 2.8946977573147183, - "grad_norm": 2.7292585372924805, - "learning_rate": 1.2631813456111688e-05, - "loss": 0.0523, - "step": 38980 - }, - { - "epoch": 2.8954403683350662, - "grad_norm": 0.5721240043640137, - "learning_rate": 1.2627357789989604e-05, - "loss": 0.0278, - "step": 38990 - }, - { - "epoch": 2.8961829793554137, - "grad_norm": 2.3758206367492676, - "learning_rate": 1.262290212386752e-05, - "loss": 0.0495, - "step": 39000 - }, - { - "epoch": 2.896925590375761, - "grad_norm": 0.5098469257354736, - "learning_rate": 1.2618446457745433e-05, - "loss": 0.0734, - "step": 39010 - }, - { - "epoch": 2.8976682013961086, - "grad_norm": 1.3292839527130127, - "learning_rate": 1.261399079162335e-05, - "loss": 0.0594, - "step": 39020 - }, - { - "epoch": 2.898410812416456, - "grad_norm": 0.4254518747329712, - "learning_rate": 1.2609535125501263e-05, - "loss": 0.0818, - "step": 39030 - }, - { - "epoch": 2.899153423436804, - "grad_norm": 1.5062333345413208, - "learning_rate": 1.2605079459379178e-05, - "loss": 0.058, - "step": 39040 - }, - { - "epoch": 2.8998960344571514, - "grad_norm": 1.215062141418457, - "learning_rate": 1.2600623793257091e-05, - "loss": 0.0422, - "step": 39050 - }, - { - "epoch": 2.900638645477499, - "grad_norm": 1.1051884889602661, - "learning_rate": 1.2596168127135008e-05, - "loss": 0.0726, - "step": 39060 - }, - { - "epoch": 2.9013812564978463, - "grad_norm": 0.9767510294914246, - "learning_rate": 1.2591712461012923e-05, - "loss": 0.0557, - "step": 39070 - }, - { - "epoch": 2.9021238675181937, - "grad_norm": 3.3917346000671387, - "learning_rate": 1.2587256794890836e-05, - "loss": 0.0595, - "step": 39080 - }, - { - "epoch": 2.9028664785385416, - "grad_norm": 2.1821186542510986, - "learning_rate": 1.2582801128768751e-05, - "loss": 0.0616, - "step": 39090 - }, - { - "epoch": 2.903609089558889, - "grad_norm": 3.1181464195251465, - "learning_rate": 1.2578345462646666e-05, - "loss": 0.0614, - "step": 39100 - }, - { - "epoch": 2.9043517005792365, - "grad_norm": 1.3670252561569214, - "learning_rate": 1.2573889796524581e-05, - "loss": 0.0571, - "step": 39110 - }, - { - "epoch": 2.905094311599584, - "grad_norm": 2.3952903747558594, - "learning_rate": 1.2569434130402496e-05, - "loss": 0.0576, - "step": 39120 - }, - { - "epoch": 2.9058369226199314, - "grad_norm": 3.1629419326782227, - "learning_rate": 1.256497846428041e-05, - "loss": 0.0895, - "step": 39130 - }, - { - "epoch": 2.9065795336402793, - "grad_norm": 2.0828757286071777, - "learning_rate": 1.2560522798158326e-05, - "loss": 0.0773, - "step": 39140 - }, - { - "epoch": 2.9073221446606268, - "grad_norm": 0.463571161031723, - "learning_rate": 1.255606713203624e-05, - "loss": 0.053, - "step": 39150 - }, - { - "epoch": 2.9080647556809742, - "grad_norm": 1.1848664283752441, - "learning_rate": 1.2551611465914154e-05, - "loss": 0.072, - "step": 39160 - }, - { - "epoch": 2.908807366701322, - "grad_norm": 0.9706199765205383, - "learning_rate": 1.254715579979207e-05, - "loss": 0.0496, - "step": 39170 - }, - { - "epoch": 2.909549977721669, - "grad_norm": 2.834559440612793, - "learning_rate": 1.2542700133669984e-05, - "loss": 0.0794, - "step": 39180 - }, - { - "epoch": 2.910292588742017, - "grad_norm": 2.5755867958068848, - "learning_rate": 1.25382444675479e-05, - "loss": 0.0941, - "step": 39190 - }, - { - "epoch": 2.9110351997623645, - "grad_norm": 0.6742496490478516, - "learning_rate": 1.2533788801425812e-05, - "loss": 0.0551, - "step": 39200 - }, - { - "epoch": 2.911777810782712, - "grad_norm": 1.3099998235702515, - "learning_rate": 1.2529333135303729e-05, - "loss": 0.0438, - "step": 39210 - }, - { - "epoch": 2.91252042180306, - "grad_norm": 1.9794929027557373, - "learning_rate": 1.2524877469181642e-05, - "loss": 0.0746, - "step": 39220 - }, - { - "epoch": 2.9132630328234073, - "grad_norm": 1.8509886264801025, - "learning_rate": 1.2520421803059557e-05, - "loss": 0.0832, - "step": 39230 - }, - { - "epoch": 2.9140056438437547, - "grad_norm": 1.6332608461380005, - "learning_rate": 1.2515966136937472e-05, - "loss": 0.0653, - "step": 39240 - }, - { - "epoch": 2.914748254864102, - "grad_norm": 1.8351725339889526, - "learning_rate": 1.2511510470815387e-05, - "loss": 0.075, - "step": 39250 - }, - { - "epoch": 2.9154908658844496, - "grad_norm": 2.058716058731079, - "learning_rate": 1.2507054804693302e-05, - "loss": 0.0559, - "step": 39260 - }, - { - "epoch": 2.9162334769047975, - "grad_norm": 0.9268501996994019, - "learning_rate": 1.2502599138571216e-05, - "loss": 0.0599, - "step": 39270 - }, - { - "epoch": 2.916976087925145, - "grad_norm": 1.5155894756317139, - "learning_rate": 1.2498143472449132e-05, - "loss": 0.0994, - "step": 39280 - }, - { - "epoch": 2.9177186989454924, - "grad_norm": 0.5851942896842957, - "learning_rate": 1.2493687806327046e-05, - "loss": 0.0485, - "step": 39290 - }, - { - "epoch": 2.91846130996584, - "grad_norm": 3.38140606880188, - "learning_rate": 1.248923214020496e-05, - "loss": 0.0576, - "step": 39300 - }, - { - "epoch": 2.9192039209861873, - "grad_norm": 1.3043699264526367, - "learning_rate": 1.2484776474082876e-05, - "loss": 0.0433, - "step": 39310 - }, - { - "epoch": 2.919946532006535, - "grad_norm": 2.2233948707580566, - "learning_rate": 1.248032080796079e-05, - "loss": 0.0585, - "step": 39320 - }, - { - "epoch": 2.9206891430268827, - "grad_norm": 2.5557191371917725, - "learning_rate": 1.2475865141838706e-05, - "loss": 0.0506, - "step": 39330 - }, - { - "epoch": 2.92143175404723, - "grad_norm": 1.088620662689209, - "learning_rate": 1.2471409475716619e-05, - "loss": 0.0709, - "step": 39340 - }, - { - "epoch": 2.9221743650675776, - "grad_norm": 1.4036482572555542, - "learning_rate": 1.2466953809594534e-05, - "loss": 0.0976, - "step": 39350 - }, - { - "epoch": 2.922916976087925, - "grad_norm": 2.0633914470672607, - "learning_rate": 1.246249814347245e-05, - "loss": 0.0545, - "step": 39360 - }, - { - "epoch": 2.923659587108273, - "grad_norm": 1.7512578964233398, - "learning_rate": 1.2458042477350364e-05, - "loss": 0.0749, - "step": 39370 - }, - { - "epoch": 2.9244021981286203, - "grad_norm": 0.6414874792098999, - "learning_rate": 1.2453586811228279e-05, - "loss": 0.0569, - "step": 39380 - }, - { - "epoch": 2.925144809148968, - "grad_norm": 1.033894419670105, - "learning_rate": 1.2449131145106192e-05, - "loss": 0.0706, - "step": 39390 - }, - { - "epoch": 2.9258874201693152, - "grad_norm": 1.335205316543579, - "learning_rate": 1.2444675478984109e-05, - "loss": 0.085, - "step": 39400 - }, - { - "epoch": 2.9266300311896627, - "grad_norm": 1.1902940273284912, - "learning_rate": 1.2440219812862024e-05, - "loss": 0.0359, - "step": 39410 - }, - { - "epoch": 2.9273726422100106, - "grad_norm": 2.9166228771209717, - "learning_rate": 1.2435764146739937e-05, - "loss": 0.0584, - "step": 39420 - }, - { - "epoch": 2.928115253230358, - "grad_norm": 0.8290153741836548, - "learning_rate": 1.2431308480617854e-05, - "loss": 0.0669, - "step": 39430 - }, - { - "epoch": 2.9288578642507055, - "grad_norm": 0.8835294842720032, - "learning_rate": 1.2426852814495767e-05, - "loss": 0.0545, - "step": 39440 - }, - { - "epoch": 2.929600475271053, - "grad_norm": 0.4865367114543915, - "learning_rate": 1.2422397148373682e-05, - "loss": 0.0727, - "step": 39450 - }, - { - "epoch": 2.9303430862914004, - "grad_norm": 0.7041136622428894, - "learning_rate": 1.2417941482251596e-05, - "loss": 0.0369, - "step": 39460 - }, - { - "epoch": 2.9310856973117483, - "grad_norm": 1.066720724105835, - "learning_rate": 1.2413485816129512e-05, - "loss": 0.0801, - "step": 39470 - }, - { - "epoch": 2.9318283083320957, - "grad_norm": 0.8632923364639282, - "learning_rate": 1.2409030150007427e-05, - "loss": 0.0296, - "step": 39480 - }, - { - "epoch": 2.932570919352443, - "grad_norm": 0.2960319221019745, - "learning_rate": 1.240457448388534e-05, - "loss": 0.0488, - "step": 39490 - }, - { - "epoch": 2.9333135303727906, - "grad_norm": 2.5065932273864746, - "learning_rate": 1.2400118817763256e-05, - "loss": 0.068, - "step": 39500 - }, - { - "epoch": 2.934056141393138, - "grad_norm": 0.5187915563583374, - "learning_rate": 1.239566315164117e-05, - "loss": 0.0766, - "step": 39510 - }, - { - "epoch": 2.934798752413486, - "grad_norm": 1.234108805656433, - "learning_rate": 1.2391207485519085e-05, - "loss": 0.0883, - "step": 39520 - }, - { - "epoch": 2.9355413634338334, - "grad_norm": 1.0708197355270386, - "learning_rate": 1.2386751819397e-05, - "loss": 0.0661, - "step": 39530 - }, - { - "epoch": 2.936283974454181, - "grad_norm": 0.3668792247772217, - "learning_rate": 1.2382296153274915e-05, - "loss": 0.0474, - "step": 39540 - }, - { - "epoch": 2.9370265854745283, - "grad_norm": 0.8638660311698914, - "learning_rate": 1.237784048715283e-05, - "loss": 0.0541, - "step": 39550 - }, - { - "epoch": 2.937769196494876, - "grad_norm": 2.86773681640625, - "learning_rate": 1.2373384821030744e-05, - "loss": 0.0799, - "step": 39560 - }, - { - "epoch": 2.9385118075152237, - "grad_norm": 3.684232473373413, - "learning_rate": 1.2368929154908659e-05, - "loss": 0.0852, - "step": 39570 - }, - { - "epoch": 2.939254418535571, - "grad_norm": 1.3619695901870728, - "learning_rate": 1.2364473488786574e-05, - "loss": 0.0468, - "step": 39580 - }, - { - "epoch": 2.9399970295559186, - "grad_norm": 1.2432461977005005, - "learning_rate": 1.2360017822664489e-05, - "loss": 0.0836, - "step": 39590 - }, - { - "epoch": 2.940739640576266, - "grad_norm": 0.7977986335754395, - "learning_rate": 1.2355562156542404e-05, - "loss": 0.076, - "step": 39600 - }, - { - "epoch": 2.9414822515966135, - "grad_norm": 1.1581476926803589, - "learning_rate": 1.2351106490420317e-05, - "loss": 0.0622, - "step": 39610 - }, - { - "epoch": 2.9422248626169614, - "grad_norm": 3.3563334941864014, - "learning_rate": 1.2346650824298234e-05, - "loss": 0.0713, - "step": 39620 - }, - { - "epoch": 2.942967473637309, - "grad_norm": 1.305243968963623, - "learning_rate": 1.2342195158176147e-05, - "loss": 0.0886, - "step": 39630 - }, - { - "epoch": 2.9437100846576563, - "grad_norm": 0.5178385376930237, - "learning_rate": 1.2337739492054062e-05, - "loss": 0.0861, - "step": 39640 - }, - { - "epoch": 2.9444526956780037, - "grad_norm": 0.6791629195213318, - "learning_rate": 1.2333283825931977e-05, - "loss": 0.0655, - "step": 39650 - }, - { - "epoch": 2.945195306698351, - "grad_norm": 1.124177098274231, - "learning_rate": 1.2328828159809892e-05, - "loss": 0.08, - "step": 39660 - }, - { - "epoch": 2.945937917718699, - "grad_norm": 1.458204984664917, - "learning_rate": 1.2324372493687807e-05, - "loss": 0.0777, - "step": 39670 - }, - { - "epoch": 2.9466805287390465, - "grad_norm": 2.084416389465332, - "learning_rate": 1.231991682756572e-05, - "loss": 0.0814, - "step": 39680 - }, - { - "epoch": 2.947423139759394, - "grad_norm": 1.0431878566741943, - "learning_rate": 1.2315461161443637e-05, - "loss": 0.0686, - "step": 39690 - }, - { - "epoch": 2.9481657507797414, - "grad_norm": 0.5064348578453064, - "learning_rate": 1.2311005495321552e-05, - "loss": 0.0676, - "step": 39700 - }, - { - "epoch": 2.948908361800089, - "grad_norm": 2.466647148132324, - "learning_rate": 1.2306549829199465e-05, - "loss": 0.072, - "step": 39710 - }, - { - "epoch": 2.9496509728204368, - "grad_norm": 0.9796644449234009, - "learning_rate": 1.230209416307738e-05, - "loss": 0.0493, - "step": 39720 - }, - { - "epoch": 2.950393583840784, - "grad_norm": 1.4864760637283325, - "learning_rate": 1.2297638496955295e-05, - "loss": 0.068, - "step": 39730 - }, - { - "epoch": 2.9511361948611317, - "grad_norm": 1.9162673950195312, - "learning_rate": 1.229318283083321e-05, - "loss": 0.0799, - "step": 39740 - }, - { - "epoch": 2.9518788058814796, - "grad_norm": 1.276904582977295, - "learning_rate": 1.2288727164711124e-05, - "loss": 0.0578, - "step": 39750 - }, - { - "epoch": 2.9526214169018266, - "grad_norm": 1.9684635400772095, - "learning_rate": 1.2284271498589039e-05, - "loss": 0.0608, - "step": 39760 - }, - { - "epoch": 2.9533640279221745, - "grad_norm": 1.3042725324630737, - "learning_rate": 1.2279815832466955e-05, - "loss": 0.0509, - "step": 39770 - }, - { - "epoch": 2.954106638942522, - "grad_norm": 1.1622141599655151, - "learning_rate": 1.2275360166344869e-05, - "loss": 0.0634, - "step": 39780 - }, - { - "epoch": 2.9548492499628694, - "grad_norm": 3.6292014122009277, - "learning_rate": 1.2270904500222784e-05, - "loss": 0.0781, - "step": 39790 - }, - { - "epoch": 2.9555918609832172, - "grad_norm": 0.8196433186531067, - "learning_rate": 1.2266448834100699e-05, - "loss": 0.0627, - "step": 39800 - }, - { - "epoch": 2.9563344720035647, - "grad_norm": 2.011394739151001, - "learning_rate": 1.2261993167978614e-05, - "loss": 0.0516, - "step": 39810 - }, - { - "epoch": 2.957077083023912, - "grad_norm": 0.2600031793117523, - "learning_rate": 1.2257537501856529e-05, - "loss": 0.0698, - "step": 39820 - }, - { - "epoch": 2.9578196940442596, - "grad_norm": 1.4970747232437134, - "learning_rate": 1.2253081835734442e-05, - "loss": 0.0725, - "step": 39830 - }, - { - "epoch": 2.958562305064607, - "grad_norm": 0.6336455941200256, - "learning_rate": 1.2248626169612359e-05, - "loss": 0.0471, - "step": 39840 - }, - { - "epoch": 2.959304916084955, - "grad_norm": 1.8660321235656738, - "learning_rate": 1.2244170503490272e-05, - "loss": 0.0905, - "step": 39850 - }, - { - "epoch": 2.9600475271053024, - "grad_norm": 2.4397964477539062, - "learning_rate": 1.2239714837368187e-05, - "loss": 0.0487, - "step": 39860 - }, - { - "epoch": 2.96079013812565, - "grad_norm": 2.365588426589966, - "learning_rate": 1.22352591712461e-05, - "loss": 0.0774, - "step": 39870 - }, - { - "epoch": 2.9615327491459973, - "grad_norm": 2.236955404281616, - "learning_rate": 1.2230803505124017e-05, - "loss": 0.0712, - "step": 39880 - }, - { - "epoch": 2.9622753601663447, - "grad_norm": 0.8604252338409424, - "learning_rate": 1.2226347839001932e-05, - "loss": 0.0643, - "step": 39890 - }, - { - "epoch": 2.9630179711866926, - "grad_norm": 2.3100244998931885, - "learning_rate": 1.2221892172879845e-05, - "loss": 0.062, - "step": 39900 - }, - { - "epoch": 2.96376058220704, - "grad_norm": 1.03587007522583, - "learning_rate": 1.221743650675776e-05, - "loss": 0.0636, - "step": 39910 - }, - { - "epoch": 2.9645031932273875, - "grad_norm": 0.37145113945007324, - "learning_rate": 1.2212980840635675e-05, - "loss": 0.0633, - "step": 39920 - }, - { - "epoch": 2.965245804247735, - "grad_norm": 2.6632423400878906, - "learning_rate": 1.220852517451359e-05, - "loss": 0.0567, - "step": 39930 - }, - { - "epoch": 2.9659884152680824, - "grad_norm": 1.9122623205184937, - "learning_rate": 1.2204069508391505e-05, - "loss": 0.0449, - "step": 39940 - }, - { - "epoch": 2.9667310262884303, - "grad_norm": 0.8057365417480469, - "learning_rate": 1.219961384226942e-05, - "loss": 0.0468, - "step": 39950 - }, - { - "epoch": 2.967473637308778, - "grad_norm": 0.6970472931861877, - "learning_rate": 1.2195158176147335e-05, - "loss": 0.0478, - "step": 39960 - }, - { - "epoch": 2.9682162483291252, - "grad_norm": 1.0414628982543945, - "learning_rate": 1.2190702510025248e-05, - "loss": 0.0621, - "step": 39970 - }, - { - "epoch": 2.9689588593494727, - "grad_norm": 1.28219735622406, - "learning_rate": 1.2186246843903163e-05, - "loss": 0.0494, - "step": 39980 - }, - { - "epoch": 2.96970147036982, - "grad_norm": 2.46976900100708, - "learning_rate": 1.2181791177781078e-05, - "loss": 0.0879, - "step": 39990 - }, - { - "epoch": 2.970444081390168, - "grad_norm": 1.4787884950637817, - "learning_rate": 1.2177335511658993e-05, - "loss": 0.0699, - "step": 40000 - }, - { - "epoch": 2.9711866924105155, - "grad_norm": 1.21670401096344, - "learning_rate": 1.2172879845536908e-05, - "loss": 0.073, - "step": 40010 - }, - { - "epoch": 2.971929303430863, - "grad_norm": 0.8801470398902893, - "learning_rate": 1.2168424179414822e-05, - "loss": 0.0288, - "step": 40020 - }, - { - "epoch": 2.9726719144512104, - "grad_norm": 1.1613928079605103, - "learning_rate": 1.2163968513292738e-05, - "loss": 0.0737, - "step": 40030 - }, - { - "epoch": 2.973414525471558, - "grad_norm": 1.54849112033844, - "learning_rate": 1.2159512847170652e-05, - "loss": 0.1064, - "step": 40040 - }, - { - "epoch": 2.9741571364919057, - "grad_norm": 0.6201350688934326, - "learning_rate": 1.2155057181048567e-05, - "loss": 0.0259, - "step": 40050 - }, - { - "epoch": 2.974899747512253, - "grad_norm": 1.4951953887939453, - "learning_rate": 1.2150601514926483e-05, - "loss": 0.0576, - "step": 40060 - }, - { - "epoch": 2.9756423585326006, - "grad_norm": 2.557687520980835, - "learning_rate": 1.2146145848804397e-05, - "loss": 0.0804, - "step": 40070 - }, - { - "epoch": 2.976384969552948, - "grad_norm": 1.9214116334915161, - "learning_rate": 1.2141690182682312e-05, - "loss": 0.0941, - "step": 40080 - }, - { - "epoch": 2.9771275805732955, - "grad_norm": 1.7784258127212524, - "learning_rate": 1.2137234516560225e-05, - "loss": 0.0808, - "step": 40090 - }, - { - "epoch": 2.9778701915936434, - "grad_norm": 2.0200514793395996, - "learning_rate": 1.2132778850438142e-05, - "loss": 0.0691, - "step": 40100 - }, - { - "epoch": 2.978612802613991, - "grad_norm": 2.5777747631073, - "learning_rate": 1.2128323184316057e-05, - "loss": 0.079, - "step": 40110 - }, - { - "epoch": 2.9793554136343383, - "grad_norm": 0.5303300619125366, - "learning_rate": 1.212386751819397e-05, - "loss": 0.0507, - "step": 40120 - }, - { - "epoch": 2.9800980246546858, - "grad_norm": 1.3025041818618774, - "learning_rate": 1.2119411852071885e-05, - "loss": 0.0579, - "step": 40130 - }, - { - "epoch": 2.980840635675033, - "grad_norm": 1.2491486072540283, - "learning_rate": 1.21149561859498e-05, - "loss": 0.1129, - "step": 40140 - }, - { - "epoch": 2.981583246695381, - "grad_norm": 1.2781689167022705, - "learning_rate": 1.2110500519827715e-05, - "loss": 0.0695, - "step": 40150 - }, - { - "epoch": 2.9823258577157286, - "grad_norm": 0.6310214400291443, - "learning_rate": 1.2106044853705628e-05, - "loss": 0.0625, - "step": 40160 - }, - { - "epoch": 2.983068468736076, - "grad_norm": 0.8008638024330139, - "learning_rate": 1.2101589187583543e-05, - "loss": 0.0615, - "step": 40170 - }, - { - "epoch": 2.9838110797564235, - "grad_norm": 1.496964454650879, - "learning_rate": 1.209713352146146e-05, - "loss": 0.0569, - "step": 40180 - }, - { - "epoch": 2.984553690776771, - "grad_norm": 1.446394681930542, - "learning_rate": 1.2092677855339373e-05, - "loss": 0.0606, - "step": 40190 - }, - { - "epoch": 2.985296301797119, - "grad_norm": 2.3738341331481934, - "learning_rate": 1.2088222189217288e-05, - "loss": 0.0918, - "step": 40200 - }, - { - "epoch": 2.9860389128174663, - "grad_norm": 2.0323574542999268, - "learning_rate": 1.2083766523095203e-05, - "loss": 0.0548, - "step": 40210 - }, - { - "epoch": 2.9867815238378137, - "grad_norm": 1.84878408908844, - "learning_rate": 1.2079310856973118e-05, - "loss": 0.0715, - "step": 40220 - }, - { - "epoch": 2.987524134858161, - "grad_norm": 3.0860447883605957, - "learning_rate": 1.2074855190851033e-05, - "loss": 0.0663, - "step": 40230 - }, - { - "epoch": 2.9882667458785086, - "grad_norm": 2.3522326946258545, - "learning_rate": 1.2070399524728946e-05, - "loss": 0.0642, - "step": 40240 - }, - { - "epoch": 2.9890093568988565, - "grad_norm": 3.3070790767669678, - "learning_rate": 1.2065943858606863e-05, - "loss": 0.0727, - "step": 40250 - }, - { - "epoch": 2.989751967919204, - "grad_norm": 1.6434651613235474, - "learning_rate": 1.2061488192484776e-05, - "loss": 0.0662, - "step": 40260 - }, - { - "epoch": 2.9904945789395514, - "grad_norm": 1.9186336994171143, - "learning_rate": 1.2057032526362691e-05, - "loss": 0.0882, - "step": 40270 - }, - { - "epoch": 2.991237189959899, - "grad_norm": 2.3194291591644287, - "learning_rate": 1.2052576860240605e-05, - "loss": 0.0749, - "step": 40280 - }, - { - "epoch": 2.9919798009802463, - "grad_norm": 1.5101096630096436, - "learning_rate": 1.2048121194118521e-05, - "loss": 0.0563, - "step": 40290 - }, - { - "epoch": 2.992722412000594, - "grad_norm": 0.4890212118625641, - "learning_rate": 1.2043665527996436e-05, - "loss": 0.0693, - "step": 40300 - }, - { - "epoch": 2.9934650230209416, - "grad_norm": 2.9807426929473877, - "learning_rate": 1.203920986187435e-05, - "loss": 0.0661, - "step": 40310 - }, - { - "epoch": 2.994207634041289, - "grad_norm": 2.466383934020996, - "learning_rate": 1.2034754195752266e-05, - "loss": 0.0578, - "step": 40320 - }, - { - "epoch": 2.994950245061637, - "grad_norm": 2.2628114223480225, - "learning_rate": 1.203029852963018e-05, - "loss": 0.0521, - "step": 40330 - }, - { - "epoch": 2.995692856081984, - "grad_norm": 0.5551472306251526, - "learning_rate": 1.2025842863508095e-05, - "loss": 0.0203, - "step": 40340 - }, - { - "epoch": 2.996435467102332, - "grad_norm": 0.6742831468582153, - "learning_rate": 1.202138719738601e-05, - "loss": 0.0729, - "step": 40350 - }, - { - "epoch": 2.9971780781226793, - "grad_norm": 2.0665626525878906, - "learning_rate": 1.2016931531263925e-05, - "loss": 0.0778, - "step": 40360 - }, - { - "epoch": 2.997920689143027, - "grad_norm": 1.7712310552597046, - "learning_rate": 1.201247586514184e-05, - "loss": 0.0635, - "step": 40370 - }, - { - "epoch": 2.9986633001633747, - "grad_norm": 0.27435049414634705, - "learning_rate": 1.2008020199019753e-05, - "loss": 0.051, - "step": 40380 - }, - { - "epoch": 2.999405911183722, - "grad_norm": 2.0611684322357178, - "learning_rate": 1.2003564532897668e-05, - "loss": 0.0496, - "step": 40390 - }, - { - "epoch": 3.0, - "eval_f1": 0.0, - "eval_loss": 0.055565182119607925, - "eval_runtime": 795.9474, - "eval_samples_per_second": 47.766, - "eval_steps_per_second": 2.986, - "step": 40398 - }, - { - "epoch": 3.0001485222040696, - "grad_norm": 2.1062331199645996, - "learning_rate": 1.1999108866775585e-05, - "loss": 0.0856, - "step": 40400 - }, - { - "epoch": 3.000891133224417, - "grad_norm": 0.7246180772781372, - "learning_rate": 1.1994653200653498e-05, - "loss": 0.0521, - "step": 40410 - }, - { - "epoch": 3.0016337442447645, - "grad_norm": 0.7559748888015747, - "learning_rate": 1.1990197534531413e-05, - "loss": 0.0641, - "step": 40420 - }, - { - "epoch": 3.002376355265112, - "grad_norm": 1.5347598791122437, - "learning_rate": 1.1985741868409326e-05, - "loss": 0.0732, - "step": 40430 - }, - { - "epoch": 3.00311896628546, - "grad_norm": 0.6457234025001526, - "learning_rate": 1.1981286202287243e-05, - "loss": 0.058, - "step": 40440 - }, - { - "epoch": 3.0038615773058073, - "grad_norm": 0.716690719127655, - "learning_rate": 1.1976830536165156e-05, - "loss": 0.0528, - "step": 40450 - }, - { - "epoch": 3.0046041883261547, - "grad_norm": 3.154327630996704, - "learning_rate": 1.1972374870043071e-05, - "loss": 0.09, - "step": 40460 - }, - { - "epoch": 3.005346799346502, - "grad_norm": 0.7682334780693054, - "learning_rate": 1.1967919203920988e-05, - "loss": 0.0764, - "step": 40470 - }, - { - "epoch": 3.0060894103668496, - "grad_norm": 0.4691618084907532, - "learning_rate": 1.1963463537798901e-05, - "loss": 0.0437, - "step": 40480 - }, - { - "epoch": 3.0068320213871975, - "grad_norm": 1.4054096937179565, - "learning_rate": 1.1959007871676816e-05, - "loss": 0.065, - "step": 40490 - }, - { - "epoch": 3.007574632407545, - "grad_norm": 2.800178050994873, - "learning_rate": 1.195455220555473e-05, - "loss": 0.1019, - "step": 40500 - }, - { - "epoch": 3.0083172434278924, - "grad_norm": 1.4787908792495728, - "learning_rate": 1.1950096539432646e-05, - "loss": 0.0959, - "step": 40510 - }, - { - "epoch": 3.00905985444824, - "grad_norm": 0.8632726073265076, - "learning_rate": 1.1945640873310561e-05, - "loss": 0.062, - "step": 40520 - }, - { - "epoch": 3.0098024654685878, - "grad_norm": 0.41822558641433716, - "learning_rate": 1.1941185207188475e-05, - "loss": 0.0676, - "step": 40530 - }, - { - "epoch": 3.010545076488935, - "grad_norm": 1.6045604944229126, - "learning_rate": 1.193672954106639e-05, - "loss": 0.0548, - "step": 40540 - }, - { - "epoch": 3.0112876875092827, - "grad_norm": 4.851860523223877, - "learning_rate": 1.1932273874944304e-05, - "loss": 0.0509, - "step": 40550 - }, - { - "epoch": 3.01203029852963, - "grad_norm": 1.4133330583572388, - "learning_rate": 1.192781820882222e-05, - "loss": 0.0475, - "step": 40560 - }, - { - "epoch": 3.0127729095499776, - "grad_norm": 1.6727176904678345, - "learning_rate": 1.1923362542700133e-05, - "loss": 0.0687, - "step": 40570 - }, - { - "epoch": 3.0135155205703255, - "grad_norm": 1.9212983846664429, - "learning_rate": 1.1918906876578048e-05, - "loss": 0.0982, - "step": 40580 - }, - { - "epoch": 3.014258131590673, - "grad_norm": 2.5236656665802, - "learning_rate": 1.1914451210455964e-05, - "loss": 0.0831, - "step": 40590 - }, - { - "epoch": 3.0150007426110204, - "grad_norm": 0.8461244106292725, - "learning_rate": 1.1909995544333878e-05, - "loss": 0.0635, - "step": 40600 - }, - { - "epoch": 3.015743353631368, - "grad_norm": 1.5794192552566528, - "learning_rate": 1.1905539878211793e-05, - "loss": 0.0718, - "step": 40610 - }, - { - "epoch": 3.0164859646517153, - "grad_norm": 0.7722788453102112, - "learning_rate": 1.1901084212089708e-05, - "loss": 0.0726, - "step": 40620 - }, - { - "epoch": 3.017228575672063, - "grad_norm": 1.9075877666473389, - "learning_rate": 1.1896628545967623e-05, - "loss": 0.0537, - "step": 40630 - }, - { - "epoch": 3.0179711866924106, - "grad_norm": 1.057822585105896, - "learning_rate": 1.1892172879845538e-05, - "loss": 0.0839, - "step": 40640 - }, - { - "epoch": 3.018713797712758, - "grad_norm": 2.4780874252319336, - "learning_rate": 1.1887717213723451e-05, - "loss": 0.0492, - "step": 40650 - }, - { - "epoch": 3.0194564087331055, - "grad_norm": 1.614044189453125, - "learning_rate": 1.1883261547601368e-05, - "loss": 0.0488, - "step": 40660 - }, - { - "epoch": 3.020199019753453, - "grad_norm": 0.8229920268058777, - "learning_rate": 1.1878805881479281e-05, - "loss": 0.0803, - "step": 40670 - }, - { - "epoch": 3.020941630773801, - "grad_norm": 0.7119345664978027, - "learning_rate": 1.1874350215357196e-05, - "loss": 0.048, - "step": 40680 - }, - { - "epoch": 3.0216842417941483, - "grad_norm": 1.8598228693008423, - "learning_rate": 1.1869894549235111e-05, - "loss": 0.0658, - "step": 40690 - }, - { - "epoch": 3.0224268528144957, - "grad_norm": 0.4016118347644806, - "learning_rate": 1.1865438883113026e-05, - "loss": 0.0351, - "step": 40700 - }, - { - "epoch": 3.023169463834843, - "grad_norm": 1.3350958824157715, - "learning_rate": 1.1860983216990941e-05, - "loss": 0.0553, - "step": 40710 - }, - { - "epoch": 3.0239120748551906, - "grad_norm": 0.8724972605705261, - "learning_rate": 1.1856527550868854e-05, - "loss": 0.0677, - "step": 40720 - }, - { - "epoch": 3.0246546858755385, - "grad_norm": 0.5378849506378174, - "learning_rate": 1.1852071884746771e-05, - "loss": 0.0623, - "step": 40730 - }, - { - "epoch": 3.025397296895886, - "grad_norm": 0.3792591094970703, - "learning_rate": 1.1847616218624684e-05, - "loss": 0.0654, - "step": 40740 - }, - { - "epoch": 3.0261399079162334, - "grad_norm": 0.3281194567680359, - "learning_rate": 1.18431605525026e-05, - "loss": 0.0862, - "step": 40750 - }, - { - "epoch": 3.026882518936581, - "grad_norm": 1.0597580671310425, - "learning_rate": 1.1838704886380514e-05, - "loss": 0.0651, - "step": 40760 - }, - { - "epoch": 3.0276251299569283, - "grad_norm": 0.7202780842781067, - "learning_rate": 1.183424922025843e-05, - "loss": 0.0742, - "step": 40770 - }, - { - "epoch": 3.0283677409772762, - "grad_norm": 0.5692980885505676, - "learning_rate": 1.1829793554136344e-05, - "loss": 0.0685, - "step": 40780 - }, - { - "epoch": 3.0291103519976237, - "grad_norm": 0.774348258972168, - "learning_rate": 1.1825337888014258e-05, - "loss": 0.0549, - "step": 40790 - }, - { - "epoch": 3.029852963017971, - "grad_norm": 1.022932529449463, - "learning_rate": 1.1820882221892173e-05, - "loss": 0.0687, - "step": 40800 - }, - { - "epoch": 3.0305955740383186, - "grad_norm": 0.4947283864021301, - "learning_rate": 1.181642655577009e-05, - "loss": 0.056, - "step": 40810 - }, - { - "epoch": 3.0313381850586665, - "grad_norm": 1.4515385627746582, - "learning_rate": 1.1811970889648003e-05, - "loss": 0.0671, - "step": 40820 - }, - { - "epoch": 3.032080796079014, - "grad_norm": 2.044039487838745, - "learning_rate": 1.1807515223525918e-05, - "loss": 0.0713, - "step": 40830 - }, - { - "epoch": 3.0328234070993614, - "grad_norm": 0.6720436215400696, - "learning_rate": 1.1803059557403831e-05, - "loss": 0.0685, - "step": 40840 - }, - { - "epoch": 3.033566018119709, - "grad_norm": 1.2701702117919922, - "learning_rate": 1.1798603891281748e-05, - "loss": 0.0871, - "step": 40850 - }, - { - "epoch": 3.0343086291400563, - "grad_norm": 0.6899190545082092, - "learning_rate": 1.179414822515966e-05, - "loss": 0.0761, - "step": 40860 - }, - { - "epoch": 3.035051240160404, - "grad_norm": 2.306637763977051, - "learning_rate": 1.1789692559037576e-05, - "loss": 0.0697, - "step": 40870 - }, - { - "epoch": 3.0357938511807516, - "grad_norm": 0.6029354333877563, - "learning_rate": 1.1785236892915493e-05, - "loss": 0.0677, - "step": 40880 - }, - { - "epoch": 3.036536462201099, - "grad_norm": 2.0085299015045166, - "learning_rate": 1.1780781226793406e-05, - "loss": 0.0726, - "step": 40890 - }, - { - "epoch": 3.0372790732214465, - "grad_norm": 1.0071104764938354, - "learning_rate": 1.177632556067132e-05, - "loss": 0.0556, - "step": 40900 - }, - { - "epoch": 3.038021684241794, - "grad_norm": 1.3485918045043945, - "learning_rate": 1.1771869894549234e-05, - "loss": 0.0403, - "step": 40910 - }, - { - "epoch": 3.038764295262142, - "grad_norm": 0.6336653232574463, - "learning_rate": 1.176741422842715e-05, - "loss": 0.0556, - "step": 40920 - }, - { - "epoch": 3.0395069062824893, - "grad_norm": 0.9142085909843445, - "learning_rate": 1.1762958562305066e-05, - "loss": 0.0506, - "step": 40930 - }, - { - "epoch": 3.0402495173028368, - "grad_norm": 2.9091007709503174, - "learning_rate": 1.1758502896182979e-05, - "loss": 0.075, - "step": 40940 - }, - { - "epoch": 3.040992128323184, - "grad_norm": 1.0477584600448608, - "learning_rate": 1.1754047230060894e-05, - "loss": 0.0697, - "step": 40950 - }, - { - "epoch": 3.0417347393435317, - "grad_norm": 0.5181246995925903, - "learning_rate": 1.1749591563938809e-05, - "loss": 0.0808, - "step": 40960 - }, - { - "epoch": 3.0424773503638796, - "grad_norm": 2.0747780799865723, - "learning_rate": 1.1745135897816724e-05, - "loss": 0.0793, - "step": 40970 - }, - { - "epoch": 3.043219961384227, - "grad_norm": 1.4035779237747192, - "learning_rate": 1.1740680231694637e-05, - "loss": 0.0503, - "step": 40980 - }, - { - "epoch": 3.0439625724045745, - "grad_norm": 1.336506962776184, - "learning_rate": 1.1736224565572554e-05, - "loss": 0.027, - "step": 40990 - }, - { - "epoch": 3.044705183424922, - "grad_norm": 1.5096478462219238, - "learning_rate": 1.1731768899450469e-05, - "loss": 0.0526, - "step": 41000 - }, - { - "epoch": 3.0454477944452694, - "grad_norm": 0.9661771655082703, - "learning_rate": 1.1727313233328382e-05, - "loss": 0.0593, - "step": 41010 - }, - { - "epoch": 3.0461904054656173, - "grad_norm": 2.019800901412964, - "learning_rate": 1.1722857567206297e-05, - "loss": 0.0584, - "step": 41020 - }, - { - "epoch": 3.0469330164859647, - "grad_norm": 1.9931749105453491, - "learning_rate": 1.1718401901084212e-05, - "loss": 0.0661, - "step": 41030 - }, - { - "epoch": 3.047675627506312, - "grad_norm": 1.6220228672027588, - "learning_rate": 1.1713946234962127e-05, - "loss": 0.0753, - "step": 41040 - }, - { - "epoch": 3.0484182385266596, - "grad_norm": 0.8533129692077637, - "learning_rate": 1.1709490568840042e-05, - "loss": 0.0538, - "step": 41050 - }, - { - "epoch": 3.0491608495470075, - "grad_norm": 2.1048035621643066, - "learning_rate": 1.1705034902717956e-05, - "loss": 0.0699, - "step": 41060 - }, - { - "epoch": 3.049903460567355, - "grad_norm": 1.1397531032562256, - "learning_rate": 1.1700579236595872e-05, - "loss": 0.0385, - "step": 41070 - }, - { - "epoch": 3.0506460715877024, - "grad_norm": 1.218272089958191, - "learning_rate": 1.1696123570473786e-05, - "loss": 0.0511, - "step": 41080 - }, - { - "epoch": 3.05138868260805, - "grad_norm": 1.6609820127487183, - "learning_rate": 1.16916679043517e-05, - "loss": 0.0693, - "step": 41090 - }, - { - "epoch": 3.0521312936283973, - "grad_norm": 0.7362266778945923, - "learning_rate": 1.1687212238229616e-05, - "loss": 0.0834, - "step": 41100 - }, - { - "epoch": 3.052873904648745, - "grad_norm": 1.8617407083511353, - "learning_rate": 1.168275657210753e-05, - "loss": 0.0672, - "step": 41110 - }, - { - "epoch": 3.0536165156690926, - "grad_norm": 1.8097994327545166, - "learning_rate": 1.1678300905985446e-05, - "loss": 0.0693, - "step": 41120 - }, - { - "epoch": 3.05435912668944, - "grad_norm": 0.9621978402137756, - "learning_rate": 1.1673845239863359e-05, - "loss": 0.0697, - "step": 41130 - }, - { - "epoch": 3.0551017377097875, - "grad_norm": 0.6268504858016968, - "learning_rate": 1.1669389573741276e-05, - "loss": 0.0801, - "step": 41140 - }, - { - "epoch": 3.055844348730135, - "grad_norm": 0.5146762132644653, - "learning_rate": 1.1664933907619189e-05, - "loss": 0.0516, - "step": 41150 - }, - { - "epoch": 3.056586959750483, - "grad_norm": 1.8789063692092896, - "learning_rate": 1.1660478241497104e-05, - "loss": 0.0604, - "step": 41160 - }, - { - "epoch": 3.0573295707708303, - "grad_norm": 1.16319739818573, - "learning_rate": 1.1656022575375019e-05, - "loss": 0.0819, - "step": 41170 - }, - { - "epoch": 3.058072181791178, - "grad_norm": 1.3086446523666382, - "learning_rate": 1.1651566909252934e-05, - "loss": 0.0805, - "step": 41180 - }, - { - "epoch": 3.0588147928115252, - "grad_norm": 1.2489312887191772, - "learning_rate": 1.1647111243130849e-05, - "loss": 0.07, - "step": 41190 - }, - { - "epoch": 3.0595574038318727, - "grad_norm": 1.8217955827713013, - "learning_rate": 1.1642655577008762e-05, - "loss": 0.0798, - "step": 41200 - }, - { - "epoch": 3.0603000148522206, - "grad_norm": 1.6634607315063477, - "learning_rate": 1.1638199910886677e-05, - "loss": 0.0426, - "step": 41210 - }, - { - "epoch": 3.061042625872568, - "grad_norm": 2.512523889541626, - "learning_rate": 1.1633744244764594e-05, - "loss": 0.0639, - "step": 41220 - }, - { - "epoch": 3.0617852368929155, - "grad_norm": 1.9372293949127197, - "learning_rate": 1.1629288578642507e-05, - "loss": 0.1061, - "step": 41230 - }, - { - "epoch": 3.062527847913263, - "grad_norm": 1.2639974355697632, - "learning_rate": 1.1624832912520422e-05, - "loss": 0.0663, - "step": 41240 - }, - { - "epoch": 3.0632704589336104, - "grad_norm": 1.19036066532135, - "learning_rate": 1.1620377246398337e-05, - "loss": 0.0901, - "step": 41250 - }, - { - "epoch": 3.0640130699539583, - "grad_norm": 2.261476993560791, - "learning_rate": 1.1615921580276252e-05, - "loss": 0.1083, - "step": 41260 - }, - { - "epoch": 3.0647556809743057, - "grad_norm": 1.9970070123672485, - "learning_rate": 1.1611465914154165e-05, - "loss": 0.0546, - "step": 41270 - }, - { - "epoch": 3.065498291994653, - "grad_norm": 1.31303870677948, - "learning_rate": 1.160701024803208e-05, - "loss": 0.0756, - "step": 41280 - }, - { - "epoch": 3.0662409030150006, - "grad_norm": 1.9843600988388062, - "learning_rate": 1.1602554581909997e-05, - "loss": 0.0427, - "step": 41290 - }, - { - "epoch": 3.066983514035348, - "grad_norm": 2.369338035583496, - "learning_rate": 1.159809891578791e-05, - "loss": 0.0688, - "step": 41300 - }, - { - "epoch": 3.067726125055696, - "grad_norm": 1.3789310455322266, - "learning_rate": 1.1593643249665825e-05, - "loss": 0.0755, - "step": 41310 - }, - { - "epoch": 3.0684687360760434, - "grad_norm": 2.209085702896118, - "learning_rate": 1.1589187583543739e-05, - "loss": 0.08, - "step": 41320 - }, - { - "epoch": 3.069211347096391, - "grad_norm": 1.1442301273345947, - "learning_rate": 1.1584731917421655e-05, - "loss": 0.0652, - "step": 41330 - }, - { - "epoch": 3.0699539581167383, - "grad_norm": 1.0715082883834839, - "learning_rate": 1.158027625129957e-05, - "loss": 0.0595, - "step": 41340 - }, - { - "epoch": 3.0706965691370858, - "grad_norm": 2.274426221847534, - "learning_rate": 1.1575820585177484e-05, - "loss": 0.0624, - "step": 41350 - }, - { - "epoch": 3.0714391801574337, - "grad_norm": 2.2110979557037354, - "learning_rate": 1.1571364919055399e-05, - "loss": 0.0696, - "step": 41360 - }, - { - "epoch": 3.072181791177781, - "grad_norm": 0.7689408659934998, - "learning_rate": 1.1566909252933314e-05, - "loss": 0.0498, - "step": 41370 - }, - { - "epoch": 3.0729244021981286, - "grad_norm": 1.7183451652526855, - "learning_rate": 1.1562453586811229e-05, - "loss": 0.0641, - "step": 41380 - }, - { - "epoch": 3.073667013218476, - "grad_norm": 0.8684793710708618, - "learning_rate": 1.1557997920689144e-05, - "loss": 0.0747, - "step": 41390 - }, - { - "epoch": 3.074409624238824, - "grad_norm": 1.1358157396316528, - "learning_rate": 1.1553542254567059e-05, - "loss": 0.0625, - "step": 41400 - }, - { - "epoch": 3.0751522352591714, - "grad_norm": 1.4486446380615234, - "learning_rate": 1.1549086588444974e-05, - "loss": 0.0625, - "step": 41410 - }, - { - "epoch": 3.075894846279519, - "grad_norm": 0.8137945532798767, - "learning_rate": 1.1544630922322887e-05, - "loss": 0.0589, - "step": 41420 - }, - { - "epoch": 3.0766374572998663, - "grad_norm": 2.614501476287842, - "learning_rate": 1.1540175256200802e-05, - "loss": 0.0617, - "step": 41430 - }, - { - "epoch": 3.0773800683202137, - "grad_norm": 1.8231887817382812, - "learning_rate": 1.1535719590078717e-05, - "loss": 0.0626, - "step": 41440 - }, - { - "epoch": 3.0781226793405616, - "grad_norm": 1.6901496648788452, - "learning_rate": 1.1531263923956632e-05, - "loss": 0.0533, - "step": 41450 - }, - { - "epoch": 3.078865290360909, - "grad_norm": 2.780428171157837, - "learning_rate": 1.1526808257834547e-05, - "loss": 0.0599, - "step": 41460 - }, - { - "epoch": 3.0796079013812565, - "grad_norm": 0.7014702558517456, - "learning_rate": 1.152235259171246e-05, - "loss": 0.0325, - "step": 41470 - }, - { - "epoch": 3.080350512401604, - "grad_norm": 1.5613539218902588, - "learning_rate": 1.1517896925590377e-05, - "loss": 0.0723, - "step": 41480 - }, - { - "epoch": 3.0810931234219514, - "grad_norm": 0.42727401852607727, - "learning_rate": 1.151344125946829e-05, - "loss": 0.0387, - "step": 41490 - }, - { - "epoch": 3.0818357344422993, - "grad_norm": 0.7476786971092224, - "learning_rate": 1.1508985593346205e-05, - "loss": 0.0964, - "step": 41500 - }, - { - "epoch": 3.0825783454626468, - "grad_norm": 0.780316948890686, - "learning_rate": 1.1504529927224122e-05, - "loss": 0.0734, - "step": 41510 - }, - { - "epoch": 3.083320956482994, - "grad_norm": 1.4033887386322021, - "learning_rate": 1.1500074261102035e-05, - "loss": 0.068, - "step": 41520 - }, - { - "epoch": 3.0840635675033417, - "grad_norm": 2.3569159507751465, - "learning_rate": 1.149561859497995e-05, - "loss": 0.0773, - "step": 41530 - }, - { - "epoch": 3.084806178523689, - "grad_norm": 2.889099359512329, - "learning_rate": 1.1491162928857864e-05, - "loss": 0.0708, - "step": 41540 - }, - { - "epoch": 3.085548789544037, - "grad_norm": 0.9511964321136475, - "learning_rate": 1.148670726273578e-05, - "loss": 0.0688, - "step": 41550 - }, - { - "epoch": 3.0862914005643844, - "grad_norm": 0.7817783951759338, - "learning_rate": 1.1482251596613693e-05, - "loss": 0.0624, - "step": 41560 - }, - { - "epoch": 3.087034011584732, - "grad_norm": 1.0421441793441772, - "learning_rate": 1.1477795930491608e-05, - "loss": 0.0506, - "step": 41570 - }, - { - "epoch": 3.0877766226050793, - "grad_norm": 1.8338831663131714, - "learning_rate": 1.1473340264369523e-05, - "loss": 0.0341, - "step": 41580 - }, - { - "epoch": 3.088519233625427, - "grad_norm": 0.8225168585777283, - "learning_rate": 1.1468884598247438e-05, - "loss": 0.0541, - "step": 41590 - }, - { - "epoch": 3.0892618446457747, - "grad_norm": 0.7224980592727661, - "learning_rate": 1.1464428932125353e-05, - "loss": 0.0588, - "step": 41600 - }, - { - "epoch": 3.090004455666122, - "grad_norm": 1.8559728860855103, - "learning_rate": 1.1459973266003267e-05, - "loss": 0.0867, - "step": 41610 - }, - { - "epoch": 3.0907470666864696, - "grad_norm": 2.965693712234497, - "learning_rate": 1.1455517599881182e-05, - "loss": 0.0735, - "step": 41620 - }, - { - "epoch": 3.091489677706817, - "grad_norm": 1.150742530822754, - "learning_rate": 1.1451061933759098e-05, - "loss": 0.0847, - "step": 41630 - }, - { - "epoch": 3.092232288727165, - "grad_norm": 2.492800712585449, - "learning_rate": 1.1446606267637012e-05, - "loss": 0.0813, - "step": 41640 - }, - { - "epoch": 3.0929748997475124, - "grad_norm": 1.1938602924346924, - "learning_rate": 1.1442150601514927e-05, - "loss": 0.0843, - "step": 41650 - }, - { - "epoch": 3.09371751076786, - "grad_norm": 1.2840536832809448, - "learning_rate": 1.1437694935392842e-05, - "loss": 0.0537, - "step": 41660 - }, - { - "epoch": 3.0944601217882073, - "grad_norm": 0.45311644673347473, - "learning_rate": 1.1433239269270757e-05, - "loss": 0.0419, - "step": 41670 - }, - { - "epoch": 3.0952027328085547, - "grad_norm": 1.7243297100067139, - "learning_rate": 1.142878360314867e-05, - "loss": 0.0568, - "step": 41680 - }, - { - "epoch": 3.0959453438289026, - "grad_norm": 1.793365240097046, - "learning_rate": 1.1424327937026585e-05, - "loss": 0.0749, - "step": 41690 - }, - { - "epoch": 3.09668795484925, - "grad_norm": 3.0934557914733887, - "learning_rate": 1.1419872270904502e-05, - "loss": 0.0775, - "step": 41700 - }, - { - "epoch": 3.0974305658695975, - "grad_norm": 0.9718724489212036, - "learning_rate": 1.1415416604782415e-05, - "loss": 0.0614, - "step": 41710 - }, - { - "epoch": 3.098173176889945, - "grad_norm": 1.1722973585128784, - "learning_rate": 1.141096093866033e-05, - "loss": 0.0926, - "step": 41720 - }, - { - "epoch": 3.0989157879102924, - "grad_norm": 2.2873425483703613, - "learning_rate": 1.1406505272538243e-05, - "loss": 0.0894, - "step": 41730 - }, - { - "epoch": 3.0996583989306403, - "grad_norm": 0.9070118069648743, - "learning_rate": 1.140204960641616e-05, - "loss": 0.0558, - "step": 41740 - }, - { - "epoch": 3.1004010099509878, - "grad_norm": 2.142220973968506, - "learning_rate": 1.1397593940294075e-05, - "loss": 0.0754, - "step": 41750 - }, - { - "epoch": 3.1011436209713352, - "grad_norm": 0.7095875144004822, - "learning_rate": 1.1393138274171988e-05, - "loss": 0.0474, - "step": 41760 - }, - { - "epoch": 3.1018862319916827, - "grad_norm": 0.9688817262649536, - "learning_rate": 1.1388682608049905e-05, - "loss": 0.0508, - "step": 41770 - }, - { - "epoch": 3.10262884301203, - "grad_norm": 1.3128563165664673, - "learning_rate": 1.1384226941927818e-05, - "loss": 0.0785, - "step": 41780 - }, - { - "epoch": 3.103371454032378, - "grad_norm": 1.9782556295394897, - "learning_rate": 1.1379771275805733e-05, - "loss": 0.0642, - "step": 41790 - }, - { - "epoch": 3.1041140650527255, - "grad_norm": 2.7178022861480713, - "learning_rate": 1.1375315609683648e-05, - "loss": 0.0747, - "step": 41800 - }, - { - "epoch": 3.104856676073073, - "grad_norm": 0.8021518588066101, - "learning_rate": 1.1370859943561563e-05, - "loss": 0.0784, - "step": 41810 - }, - { - "epoch": 3.1055992870934204, - "grad_norm": 1.1395660638809204, - "learning_rate": 1.1366404277439478e-05, - "loss": 0.0693, - "step": 41820 - }, - { - "epoch": 3.106341898113768, - "grad_norm": 0.3719038963317871, - "learning_rate": 1.1361948611317392e-05, - "loss": 0.0576, - "step": 41830 - }, - { - "epoch": 3.1070845091341157, - "grad_norm": 3.742137908935547, - "learning_rate": 1.1357492945195307e-05, - "loss": 0.0891, - "step": 41840 - }, - { - "epoch": 3.107827120154463, - "grad_norm": 1.4700413942337036, - "learning_rate": 1.1353037279073222e-05, - "loss": 0.0648, - "step": 41850 - }, - { - "epoch": 3.1085697311748106, - "grad_norm": 1.799091100692749, - "learning_rate": 1.1348581612951137e-05, - "loss": 0.0811, - "step": 41860 - }, - { - "epoch": 3.109312342195158, - "grad_norm": 0.4236965477466583, - "learning_rate": 1.1344125946829052e-05, - "loss": 0.0569, - "step": 41870 - }, - { - "epoch": 3.1100549532155055, - "grad_norm": 0.9053602814674377, - "learning_rate": 1.1339670280706965e-05, - "loss": 0.0476, - "step": 41880 - }, - { - "epoch": 3.1107975642358534, - "grad_norm": 1.6792991161346436, - "learning_rate": 1.1335214614584882e-05, - "loss": 0.0675, - "step": 41890 - }, - { - "epoch": 3.111540175256201, - "grad_norm": 0.48913243412971497, - "learning_rate": 1.1330758948462795e-05, - "loss": 0.0632, - "step": 41900 - }, - { - "epoch": 3.1122827862765483, - "grad_norm": 2.1380555629730225, - "learning_rate": 1.132630328234071e-05, - "loss": 0.0699, - "step": 41910 - }, - { - "epoch": 3.1130253972968958, - "grad_norm": 3.3821558952331543, - "learning_rate": 1.1321847616218627e-05, - "loss": 0.0529, - "step": 41920 - }, - { - "epoch": 3.113768008317243, - "grad_norm": 1.7938247919082642, - "learning_rate": 1.131739195009654e-05, - "loss": 0.0707, - "step": 41930 - }, - { - "epoch": 3.114510619337591, - "grad_norm": 1.2361664772033691, - "learning_rate": 1.1312936283974455e-05, - "loss": 0.0633, - "step": 41940 - }, - { - "epoch": 3.1152532303579386, - "grad_norm": 1.2739269733428955, - "learning_rate": 1.1308480617852368e-05, - "loss": 0.0466, - "step": 41950 - }, - { - "epoch": 3.115995841378286, - "grad_norm": 2.4915049076080322, - "learning_rate": 1.1304024951730285e-05, - "loss": 0.0761, - "step": 41960 - }, - { - "epoch": 3.1167384523986335, - "grad_norm": 1.7226744890213013, - "learning_rate": 1.1299569285608198e-05, - "loss": 0.0604, - "step": 41970 - }, - { - "epoch": 3.1174810634189813, - "grad_norm": 1.8600285053253174, - "learning_rate": 1.1295113619486113e-05, - "loss": 0.0596, - "step": 41980 - }, - { - "epoch": 3.118223674439329, - "grad_norm": 2.710045099258423, - "learning_rate": 1.1290657953364028e-05, - "loss": 0.0905, - "step": 41990 - }, - { - "epoch": 3.1189662854596762, - "grad_norm": 0.9297268390655518, - "learning_rate": 1.1286202287241943e-05, - "loss": 0.0513, - "step": 42000 - }, - { - "epoch": 3.1197088964800237, - "grad_norm": 2.148306131362915, - "learning_rate": 1.1281746621119858e-05, - "loss": 0.0729, - "step": 42010 - }, - { - "epoch": 3.120451507500371, - "grad_norm": 1.510986328125, - "learning_rate": 1.1277290954997771e-05, - "loss": 0.082, - "step": 42020 - }, - { - "epoch": 3.121194118520719, - "grad_norm": 2.6012394428253174, - "learning_rate": 1.1272835288875686e-05, - "loss": 0.0769, - "step": 42030 - }, - { - "epoch": 3.1219367295410665, - "grad_norm": 2.248591184616089, - "learning_rate": 1.1268379622753603e-05, - "loss": 0.0889, - "step": 42040 - }, - { - "epoch": 3.122679340561414, - "grad_norm": 1.4806914329528809, - "learning_rate": 1.1263923956631516e-05, - "loss": 0.0694, - "step": 42050 - }, - { - "epoch": 3.1234219515817614, - "grad_norm": 2.7519147396087646, - "learning_rate": 1.1259468290509431e-05, - "loss": 0.0503, - "step": 42060 - }, - { - "epoch": 3.124164562602109, - "grad_norm": 2.996624708175659, - "learning_rate": 1.1255012624387346e-05, - "loss": 0.0769, - "step": 42070 - }, - { - "epoch": 3.1249071736224567, - "grad_norm": 0.765396237373352, - "learning_rate": 1.1250556958265261e-05, - "loss": 0.0529, - "step": 42080 - }, - { - "epoch": 3.125649784642804, - "grad_norm": 0.4686828553676605, - "learning_rate": 1.1246101292143176e-05, - "loss": 0.0445, - "step": 42090 - }, - { - "epoch": 3.1263923956631516, - "grad_norm": 0.8828471899032593, - "learning_rate": 1.124164562602109e-05, - "loss": 0.0721, - "step": 42100 - }, - { - "epoch": 3.127135006683499, - "grad_norm": 3.2565178871154785, - "learning_rate": 1.1237189959899006e-05, - "loss": 0.0749, - "step": 42110 - }, - { - "epoch": 3.1278776177038465, - "grad_norm": 1.6550853252410889, - "learning_rate": 1.123273429377692e-05, - "loss": 0.0714, - "step": 42120 - }, - { - "epoch": 3.1286202287241944, - "grad_norm": 0.7988404631614685, - "learning_rate": 1.1228278627654835e-05, - "loss": 0.0889, - "step": 42130 - }, - { - "epoch": 3.129362839744542, - "grad_norm": 0.8032083511352539, - "learning_rate": 1.1223822961532748e-05, - "loss": 0.0761, - "step": 42140 - }, - { - "epoch": 3.1301054507648893, - "grad_norm": 1.479958176612854, - "learning_rate": 1.1219367295410665e-05, - "loss": 0.0505, - "step": 42150 - }, - { - "epoch": 3.130848061785237, - "grad_norm": 1.5908888578414917, - "learning_rate": 1.121491162928858e-05, - "loss": 0.0723, - "step": 42160 - }, - { - "epoch": 3.1315906728055842, - "grad_norm": 0.9829261302947998, - "learning_rate": 1.1210455963166493e-05, - "loss": 0.0565, - "step": 42170 - }, - { - "epoch": 3.132333283825932, - "grad_norm": 0.9932742118835449, - "learning_rate": 1.120600029704441e-05, - "loss": 0.0773, - "step": 42180 - }, - { - "epoch": 3.1330758948462796, - "grad_norm": 1.5291361808776855, - "learning_rate": 1.1201544630922323e-05, - "loss": 0.0945, - "step": 42190 - }, - { - "epoch": 3.133818505866627, - "grad_norm": 0.7759218215942383, - "learning_rate": 1.1197088964800238e-05, - "loss": 0.0567, - "step": 42200 - }, - { - "epoch": 3.1345611168869745, - "grad_norm": 1.5172669887542725, - "learning_rate": 1.1192633298678153e-05, - "loss": 0.041, - "step": 42210 - }, - { - "epoch": 3.1353037279073224, - "grad_norm": 2.5315630435943604, - "learning_rate": 1.1188177632556068e-05, - "loss": 0.0596, - "step": 42220 - }, - { - "epoch": 3.13604633892767, - "grad_norm": 1.723301649093628, - "learning_rate": 1.1183721966433983e-05, - "loss": 0.0474, - "step": 42230 - }, - { - "epoch": 3.1367889499480173, - "grad_norm": 1.322437047958374, - "learning_rate": 1.1179266300311896e-05, - "loss": 0.0513, - "step": 42240 - }, - { - "epoch": 3.1375315609683647, - "grad_norm": 1.4333685636520386, - "learning_rate": 1.1174810634189811e-05, - "loss": 0.0602, - "step": 42250 - }, - { - "epoch": 3.138274171988712, - "grad_norm": 1.3001521825790405, - "learning_rate": 1.1170354968067726e-05, - "loss": 0.0504, - "step": 42260 - }, - { - "epoch": 3.13901678300906, - "grad_norm": 0.24273203313350677, - "learning_rate": 1.1165899301945641e-05, - "loss": 0.033, - "step": 42270 - }, - { - "epoch": 3.1397593940294075, - "grad_norm": 2.059208393096924, - "learning_rate": 1.1161443635823556e-05, - "loss": 0.0636, - "step": 42280 - }, - { - "epoch": 3.140502005049755, - "grad_norm": 1.3567062616348267, - "learning_rate": 1.115698796970147e-05, - "loss": 0.0693, - "step": 42290 - }, - { - "epoch": 3.1412446160701024, - "grad_norm": 0.29561829566955566, - "learning_rate": 1.1152532303579386e-05, - "loss": 0.0617, - "step": 42300 - }, - { - "epoch": 3.14198722709045, - "grad_norm": 1.4350517988204956, - "learning_rate": 1.11480766374573e-05, - "loss": 0.0671, - "step": 42310 - }, - { - "epoch": 3.1427298381107978, - "grad_norm": 1.5640891790390015, - "learning_rate": 1.1143620971335214e-05, - "loss": 0.069, - "step": 42320 - }, - { - "epoch": 3.143472449131145, - "grad_norm": 1.763852596282959, - "learning_rate": 1.1139165305213131e-05, - "loss": 0.0703, - "step": 42330 - }, - { - "epoch": 3.1442150601514927, - "grad_norm": 2.7091476917266846, - "learning_rate": 1.1134709639091044e-05, - "loss": 0.0564, - "step": 42340 - }, - { - "epoch": 3.14495767117184, - "grad_norm": 1.8148163557052612, - "learning_rate": 1.113025397296896e-05, - "loss": 0.066, - "step": 42350 - }, - { - "epoch": 3.1457002821921876, - "grad_norm": 1.7284859418869019, - "learning_rate": 1.1125798306846873e-05, - "loss": 0.0552, - "step": 42360 - }, - { - "epoch": 3.1464428932125355, - "grad_norm": 0.8898233771324158, - "learning_rate": 1.112134264072479e-05, - "loss": 0.0448, - "step": 42370 - }, - { - "epoch": 3.147185504232883, - "grad_norm": 0.5898759365081787, - "learning_rate": 1.1116886974602703e-05, - "loss": 0.0547, - "step": 42380 - }, - { - "epoch": 3.1479281152532304, - "grad_norm": 0.21336278319358826, - "learning_rate": 1.1112431308480618e-05, - "loss": 0.0413, - "step": 42390 - }, - { - "epoch": 3.148670726273578, - "grad_norm": 1.7114509344100952, - "learning_rate": 1.1107975642358533e-05, - "loss": 0.099, - "step": 42400 - }, - { - "epoch": 3.1494133372939253, - "grad_norm": 0.577693521976471, - "learning_rate": 1.1103519976236448e-05, - "loss": 0.0631, - "step": 42410 - }, - { - "epoch": 3.150155948314273, - "grad_norm": 0.7681446075439453, - "learning_rate": 1.1099064310114363e-05, - "loss": 0.0605, - "step": 42420 - }, - { - "epoch": 3.1508985593346206, - "grad_norm": 0.7933735251426697, - "learning_rate": 1.1094608643992276e-05, - "loss": 0.0827, - "step": 42430 - }, - { - "epoch": 3.151641170354968, - "grad_norm": 0.5828210115432739, - "learning_rate": 1.1090152977870193e-05, - "loss": 0.0485, - "step": 42440 - }, - { - "epoch": 3.1523837813753155, - "grad_norm": 0.2343250811100006, - "learning_rate": 1.1085697311748108e-05, - "loss": 0.0362, - "step": 42450 - }, - { - "epoch": 3.153126392395663, - "grad_norm": 2.2434585094451904, - "learning_rate": 1.1081241645626021e-05, - "loss": 0.0569, - "step": 42460 - }, - { - "epoch": 3.153869003416011, - "grad_norm": 1.9830187559127808, - "learning_rate": 1.1076785979503936e-05, - "loss": 0.0823, - "step": 42470 - }, - { - "epoch": 3.1546116144363583, - "grad_norm": 1.5530307292938232, - "learning_rate": 1.1072330313381851e-05, - "loss": 0.0859, - "step": 42480 - }, - { - "epoch": 3.1553542254567057, - "grad_norm": 1.3739688396453857, - "learning_rate": 1.1067874647259766e-05, - "loss": 0.0796, - "step": 42490 - }, - { - "epoch": 3.156096836477053, - "grad_norm": 0.9068493247032166, - "learning_rate": 1.1063418981137681e-05, - "loss": 0.0461, - "step": 42500 - }, - { - "epoch": 3.1568394474974006, - "grad_norm": 1.6886134147644043, - "learning_rate": 1.1058963315015594e-05, - "loss": 0.093, - "step": 42510 - }, - { - "epoch": 3.1575820585177485, - "grad_norm": 2.079350233078003, - "learning_rate": 1.1054507648893511e-05, - "loss": 0.0642, - "step": 42520 - }, - { - "epoch": 3.158324669538096, - "grad_norm": 1.6049011945724487, - "learning_rate": 1.1050051982771424e-05, - "loss": 0.0613, - "step": 42530 - }, - { - "epoch": 3.1590672805584434, - "grad_norm": 0.36164718866348267, - "learning_rate": 1.104559631664934e-05, - "loss": 0.0669, - "step": 42540 - }, - { - "epoch": 3.159809891578791, - "grad_norm": 1.008207082748413, - "learning_rate": 1.1041140650527253e-05, - "loss": 0.0988, - "step": 42550 - }, - { - "epoch": 3.1605525025991383, - "grad_norm": 2.440133571624756, - "learning_rate": 1.103668498440517e-05, - "loss": 0.0465, - "step": 42560 - }, - { - "epoch": 3.1612951136194862, - "grad_norm": 0.5878588557243347, - "learning_rate": 1.1032229318283084e-05, - "loss": 0.04, - "step": 42570 - }, - { - "epoch": 3.1620377246398337, - "grad_norm": 1.898363709449768, - "learning_rate": 1.1027773652160997e-05, - "loss": 0.0494, - "step": 42580 - }, - { - "epoch": 3.162780335660181, - "grad_norm": 1.029447078704834, - "learning_rate": 1.1023317986038914e-05, - "loss": 0.0658, - "step": 42590 - }, - { - "epoch": 3.1635229466805286, - "grad_norm": 1.3765453100204468, - "learning_rate": 1.1018862319916827e-05, - "loss": 0.0479, - "step": 42600 - }, - { - "epoch": 3.1642655577008765, - "grad_norm": 0.8187096118927002, - "learning_rate": 1.1014406653794742e-05, - "loss": 0.0677, - "step": 42610 - }, - { - "epoch": 3.165008168721224, - "grad_norm": 3.0219662189483643, - "learning_rate": 1.1009950987672657e-05, - "loss": 0.0544, - "step": 42620 - }, - { - "epoch": 3.1657507797415714, - "grad_norm": 0.4135105609893799, - "learning_rate": 1.1005495321550572e-05, - "loss": 0.0379, - "step": 42630 - }, - { - "epoch": 3.166493390761919, - "grad_norm": 1.9586702585220337, - "learning_rate": 1.1001039655428487e-05, - "loss": 0.0644, - "step": 42640 - }, - { - "epoch": 3.1672360017822663, - "grad_norm": 0.3655850887298584, - "learning_rate": 1.09965839893064e-05, - "loss": 0.0825, - "step": 42650 - }, - { - "epoch": 3.167978612802614, - "grad_norm": 0.6566202640533447, - "learning_rate": 1.0992128323184316e-05, - "loss": 0.0597, - "step": 42660 - }, - { - "epoch": 3.1687212238229616, - "grad_norm": 1.0529264211654663, - "learning_rate": 1.098767265706223e-05, - "loss": 0.0569, - "step": 42670 - }, - { - "epoch": 3.169463834843309, - "grad_norm": 1.0065066814422607, - "learning_rate": 1.0983216990940146e-05, - "loss": 0.0674, - "step": 42680 - }, - { - "epoch": 3.1702064458636565, - "grad_norm": 3.295680046081543, - "learning_rate": 1.097876132481806e-05, - "loss": 0.0754, - "step": 42690 - }, - { - "epoch": 3.170949056884004, - "grad_norm": 1.2959693670272827, - "learning_rate": 1.0974305658695976e-05, - "loss": 0.0668, - "step": 42700 - }, - { - "epoch": 3.171691667904352, - "grad_norm": 1.669705867767334, - "learning_rate": 1.096984999257389e-05, - "loss": 0.0788, - "step": 42710 - }, - { - "epoch": 3.1724342789246993, - "grad_norm": 1.6456496715545654, - "learning_rate": 1.0965394326451804e-05, - "loss": 0.0564, - "step": 42720 - }, - { - "epoch": 3.1731768899450468, - "grad_norm": 1.2554987668991089, - "learning_rate": 1.0960938660329719e-05, - "loss": 0.0777, - "step": 42730 - }, - { - "epoch": 3.173919500965394, - "grad_norm": 2.1584696769714355, - "learning_rate": 1.0956482994207636e-05, - "loss": 0.0614, - "step": 42740 - }, - { - "epoch": 3.1746621119857417, - "grad_norm": 1.2832754850387573, - "learning_rate": 1.0952027328085549e-05, - "loss": 0.0692, - "step": 42750 - }, - { - "epoch": 3.1754047230060896, - "grad_norm": 0.7738613486289978, - "learning_rate": 1.0947571661963464e-05, - "loss": 0.0628, - "step": 42760 - }, - { - "epoch": 3.176147334026437, - "grad_norm": 1.1968348026275635, - "learning_rate": 1.0943115995841377e-05, - "loss": 0.0578, - "step": 42770 - }, - { - "epoch": 3.1768899450467845, - "grad_norm": 0.8434922099113464, - "learning_rate": 1.0938660329719294e-05, - "loss": 0.0698, - "step": 42780 - }, - { - "epoch": 3.177632556067132, - "grad_norm": 0.869853675365448, - "learning_rate": 1.0934204663597209e-05, - "loss": 0.0595, - "step": 42790 - }, - { - "epoch": 3.17837516708748, - "grad_norm": 1.2317126989364624, - "learning_rate": 1.0929748997475122e-05, - "loss": 0.0731, - "step": 42800 - }, - { - "epoch": 3.1791177781078273, - "grad_norm": 2.5130200386047363, - "learning_rate": 1.0925293331353037e-05, - "loss": 0.0744, - "step": 42810 - }, - { - "epoch": 3.1798603891281747, - "grad_norm": 1.538057565689087, - "learning_rate": 1.0920837665230952e-05, - "loss": 0.0544, - "step": 42820 - }, - { - "epoch": 3.180603000148522, - "grad_norm": 2.4751596450805664, - "learning_rate": 1.0916381999108867e-05, - "loss": 0.0568, - "step": 42830 - }, - { - "epoch": 3.1813456111688696, - "grad_norm": 0.298840194940567, - "learning_rate": 1.091192633298678e-05, - "loss": 0.0391, - "step": 42840 - }, - { - "epoch": 3.1820882221892175, - "grad_norm": 1.3424323797225952, - "learning_rate": 1.0907470666864697e-05, - "loss": 0.0854, - "step": 42850 - }, - { - "epoch": 3.182830833209565, - "grad_norm": 1.4192628860473633, - "learning_rate": 1.0903015000742612e-05, - "loss": 0.0538, - "step": 42860 - }, - { - "epoch": 3.1835734442299124, - "grad_norm": 1.6128898859024048, - "learning_rate": 1.0898559334620526e-05, - "loss": 0.0526, - "step": 42870 - }, - { - "epoch": 3.18431605525026, - "grad_norm": 1.4316538572311401, - "learning_rate": 1.089410366849844e-05, - "loss": 0.0555, - "step": 42880 - }, - { - "epoch": 3.1850586662706073, - "grad_norm": 0.824476420879364, - "learning_rate": 1.0889648002376356e-05, - "loss": 0.0546, - "step": 42890 - }, - { - "epoch": 3.185801277290955, - "grad_norm": 1.2334526777267456, - "learning_rate": 1.088519233625427e-05, - "loss": 0.0352, - "step": 42900 - }, - { - "epoch": 3.1865438883113026, - "grad_norm": 1.7368484735488892, - "learning_rate": 1.0880736670132186e-05, - "loss": 0.0406, - "step": 42910 - }, - { - "epoch": 3.18728649933165, - "grad_norm": 1.9491029977798462, - "learning_rate": 1.0876281004010099e-05, - "loss": 0.0542, - "step": 42920 - }, - { - "epoch": 3.1880291103519975, - "grad_norm": 2.460498809814453, - "learning_rate": 1.0871825337888016e-05, - "loss": 0.0532, - "step": 42930 - }, - { - "epoch": 3.188771721372345, - "grad_norm": 1.1754149198532104, - "learning_rate": 1.0867369671765929e-05, - "loss": 0.1044, - "step": 42940 - }, - { - "epoch": 3.189514332392693, - "grad_norm": 1.4648820161819458, - "learning_rate": 1.0862914005643844e-05, - "loss": 0.0671, - "step": 42950 - }, - { - "epoch": 3.1902569434130403, - "grad_norm": 1.5888352394104004, - "learning_rate": 1.0858458339521759e-05, - "loss": 0.054, - "step": 42960 - }, - { - "epoch": 3.190999554433388, - "grad_norm": 0.9326488971710205, - "learning_rate": 1.0854002673399674e-05, - "loss": 0.067, - "step": 42970 - }, - { - "epoch": 3.1917421654537352, - "grad_norm": 0.8771650195121765, - "learning_rate": 1.0849547007277589e-05, - "loss": 0.0449, - "step": 42980 - }, - { - "epoch": 3.1924847764740827, - "grad_norm": 1.608896017074585, - "learning_rate": 1.0845091341155502e-05, - "loss": 0.0408, - "step": 42990 - }, - { - "epoch": 3.1932273874944306, - "grad_norm": 1.1456100940704346, - "learning_rate": 1.0840635675033419e-05, - "loss": 0.0858, - "step": 43000 - }, - { - "epoch": 3.193969998514778, - "grad_norm": 0.9099137187004089, - "learning_rate": 1.0836180008911332e-05, - "loss": 0.075, - "step": 43010 - }, - { - "epoch": 3.1947126095351255, - "grad_norm": 3.812081813812256, - "learning_rate": 1.0831724342789247e-05, - "loss": 0.0499, - "step": 43020 - }, - { - "epoch": 3.195455220555473, - "grad_norm": 3.0689592361450195, - "learning_rate": 1.0827268676667162e-05, - "loss": 0.0763, - "step": 43030 - }, - { - "epoch": 3.1961978315758204, - "grad_norm": 1.6192634105682373, - "learning_rate": 1.0822813010545077e-05, - "loss": 0.0643, - "step": 43040 - }, - { - "epoch": 3.1969404425961683, - "grad_norm": 2.0305936336517334, - "learning_rate": 1.0818357344422992e-05, - "loss": 0.0602, - "step": 43050 - }, - { - "epoch": 3.1976830536165157, - "grad_norm": 3.326087713241577, - "learning_rate": 1.0813901678300905e-05, - "loss": 0.0737, - "step": 43060 - }, - { - "epoch": 3.198425664636863, - "grad_norm": 1.6276055574417114, - "learning_rate": 1.080944601217882e-05, - "loss": 0.0827, - "step": 43070 - }, - { - "epoch": 3.1991682756572106, - "grad_norm": 0.9700911641120911, - "learning_rate": 1.0804990346056735e-05, - "loss": 0.0455, - "step": 43080 - }, - { - "epoch": 3.199910886677558, - "grad_norm": 0.9960930347442627, - "learning_rate": 1.080053467993465e-05, - "loss": 0.061, - "step": 43090 - }, - { - "epoch": 3.200653497697906, - "grad_norm": 1.0367248058319092, - "learning_rate": 1.0796079013812565e-05, - "loss": 0.0802, - "step": 43100 - }, - { - "epoch": 3.2013961087182534, - "grad_norm": 2.8516721725463867, - "learning_rate": 1.079162334769048e-05, - "loss": 0.0529, - "step": 43110 - }, - { - "epoch": 3.202138719738601, - "grad_norm": 0.391720712184906, - "learning_rate": 1.0787167681568395e-05, - "loss": 0.0704, - "step": 43120 - }, - { - "epoch": 3.2028813307589483, - "grad_norm": 2.677454710006714, - "learning_rate": 1.0782712015446309e-05, - "loss": 0.0784, - "step": 43130 - }, - { - "epoch": 3.2036239417792958, - "grad_norm": 2.6573688983917236, - "learning_rate": 1.0778256349324224e-05, - "loss": 0.0293, - "step": 43140 - }, - { - "epoch": 3.2043665527996437, - "grad_norm": 2.1728572845458984, - "learning_rate": 1.077380068320214e-05, - "loss": 0.0788, - "step": 43150 - }, - { - "epoch": 3.205109163819991, - "grad_norm": 1.871918797492981, - "learning_rate": 1.0769345017080054e-05, - "loss": 0.0722, - "step": 43160 - }, - { - "epoch": 3.2058517748403386, - "grad_norm": 0.8562690615653992, - "learning_rate": 1.0764889350957969e-05, - "loss": 0.0808, - "step": 43170 - }, - { - "epoch": 3.206594385860686, - "grad_norm": 0.7276735305786133, - "learning_rate": 1.0760433684835882e-05, - "loss": 0.0452, - "step": 43180 - }, - { - "epoch": 3.207336996881034, - "grad_norm": 1.5723731517791748, - "learning_rate": 1.0755978018713799e-05, - "loss": 0.0914, - "step": 43190 - }, - { - "epoch": 3.2080796079013814, - "grad_norm": 2.1610429286956787, - "learning_rate": 1.0751522352591714e-05, - "loss": 0.069, - "step": 43200 - }, - { - "epoch": 3.208822218921729, - "grad_norm": 2.077439069747925, - "learning_rate": 1.0747066686469627e-05, - "loss": 0.0626, - "step": 43210 - }, - { - "epoch": 3.2095648299420763, - "grad_norm": 1.1080368757247925, - "learning_rate": 1.0742611020347544e-05, - "loss": 0.0603, - "step": 43220 - }, - { - "epoch": 3.2103074409624237, - "grad_norm": 1.089012622833252, - "learning_rate": 1.0738155354225457e-05, - "loss": 0.046, - "step": 43230 - }, - { - "epoch": 3.2110500519827716, - "grad_norm": 1.0629624128341675, - "learning_rate": 1.0733699688103372e-05, - "loss": 0.065, - "step": 43240 - }, - { - "epoch": 3.211792663003119, - "grad_norm": 2.318930149078369, - "learning_rate": 1.0729244021981285e-05, - "loss": 0.0856, - "step": 43250 - }, - { - "epoch": 3.2125352740234665, - "grad_norm": 0.6202425360679626, - "learning_rate": 1.0724788355859202e-05, - "loss": 0.0659, - "step": 43260 - }, - { - "epoch": 3.213277885043814, - "grad_norm": 1.3956732749938965, - "learning_rate": 1.0720332689737117e-05, - "loss": 0.0679, - "step": 43270 - }, - { - "epoch": 3.2140204960641614, - "grad_norm": 1.315119743347168, - "learning_rate": 1.071587702361503e-05, - "loss": 0.0776, - "step": 43280 - }, - { - "epoch": 3.2147631070845093, - "grad_norm": 1.4344345331192017, - "learning_rate": 1.0711421357492945e-05, - "loss": 0.0826, - "step": 43290 - }, - { - "epoch": 3.2155057181048567, - "grad_norm": 1.0249013900756836, - "learning_rate": 1.070696569137086e-05, - "loss": 0.052, - "step": 43300 - }, - { - "epoch": 3.216248329125204, - "grad_norm": 0.4725293815135956, - "learning_rate": 1.0702510025248775e-05, - "loss": 0.0762, - "step": 43310 - }, - { - "epoch": 3.2169909401455516, - "grad_norm": 2.4270472526550293, - "learning_rate": 1.069805435912669e-05, - "loss": 0.0802, - "step": 43320 - }, - { - "epoch": 3.217733551165899, - "grad_norm": 1.2253289222717285, - "learning_rate": 1.0693598693004603e-05, - "loss": 0.0569, - "step": 43330 - }, - { - "epoch": 3.218476162186247, - "grad_norm": 0.7013288140296936, - "learning_rate": 1.068914302688252e-05, - "loss": 0.0446, - "step": 43340 - }, - { - "epoch": 3.2192187732065944, - "grad_norm": 1.2379825115203857, - "learning_rate": 1.0684687360760433e-05, - "loss": 0.0667, - "step": 43350 - }, - { - "epoch": 3.219961384226942, - "grad_norm": 1.6265310049057007, - "learning_rate": 1.0680231694638348e-05, - "loss": 0.0971, - "step": 43360 - }, - { - "epoch": 3.2207039952472893, - "grad_norm": 1.87282133102417, - "learning_rate": 1.0675776028516263e-05, - "loss": 0.0718, - "step": 43370 - }, - { - "epoch": 3.2214466062676372, - "grad_norm": 1.285352349281311, - "learning_rate": 1.0671320362394178e-05, - "loss": 0.0982, - "step": 43380 - }, - { - "epoch": 3.2221892172879847, - "grad_norm": 1.9336485862731934, - "learning_rate": 1.0666864696272093e-05, - "loss": 0.0469, - "step": 43390 - }, - { - "epoch": 3.222931828308332, - "grad_norm": 2.0391123294830322, - "learning_rate": 1.0662409030150007e-05, - "loss": 0.0731, - "step": 43400 - }, - { - "epoch": 3.2236744393286796, - "grad_norm": 0.8943817019462585, - "learning_rate": 1.0657953364027923e-05, - "loss": 0.0598, - "step": 43410 - }, - { - "epoch": 3.224417050349027, - "grad_norm": 0.8489885330200195, - "learning_rate": 1.0653497697905837e-05, - "loss": 0.0584, - "step": 43420 - }, - { - "epoch": 3.225159661369375, - "grad_norm": 0.9959657192230225, - "learning_rate": 1.0649042031783752e-05, - "loss": 0.064, - "step": 43430 - }, - { - "epoch": 3.2259022723897224, - "grad_norm": 1.330881953239441, - "learning_rate": 1.0644586365661667e-05, - "loss": 0.069, - "step": 43440 - }, - { - "epoch": 3.22664488341007, - "grad_norm": 2.326178550720215, - "learning_rate": 1.0640130699539582e-05, - "loss": 0.0519, - "step": 43450 - }, - { - "epoch": 3.2273874944304173, - "grad_norm": 0.8001874089241028, - "learning_rate": 1.0635675033417497e-05, - "loss": 0.059, - "step": 43460 - }, - { - "epoch": 3.2281301054507647, - "grad_norm": 1.8835375308990479, - "learning_rate": 1.063121936729541e-05, - "loss": 0.1137, - "step": 43470 - }, - { - "epoch": 3.2288727164711126, - "grad_norm": 1.0874513387680054, - "learning_rate": 1.0626763701173325e-05, - "loss": 0.0764, - "step": 43480 - }, - { - "epoch": 3.22961532749146, - "grad_norm": 0.6599858403205872, - "learning_rate": 1.0622308035051242e-05, - "loss": 0.0516, - "step": 43490 - }, - { - "epoch": 3.2303579385118075, - "grad_norm": 0.5097527503967285, - "learning_rate": 1.0617852368929155e-05, - "loss": 0.0429, - "step": 43500 - }, - { - "epoch": 3.231100549532155, - "grad_norm": 0.7686970829963684, - "learning_rate": 1.061339670280707e-05, - "loss": 0.0492, - "step": 43510 - }, - { - "epoch": 3.2318431605525024, - "grad_norm": 1.8468888998031616, - "learning_rate": 1.0608941036684985e-05, - "loss": 0.0558, - "step": 43520 - }, - { - "epoch": 3.2325857715728503, - "grad_norm": 0.26282399892807007, - "learning_rate": 1.06044853705629e-05, - "loss": 0.069, - "step": 43530 - }, - { - "epoch": 3.2333283825931978, - "grad_norm": 2.4941744804382324, - "learning_rate": 1.0600029704440813e-05, - "loss": 0.053, - "step": 43540 - }, - { - "epoch": 3.234070993613545, - "grad_norm": 2.5920748710632324, - "learning_rate": 1.0595574038318728e-05, - "loss": 0.0638, - "step": 43550 - }, - { - "epoch": 3.2348136046338927, - "grad_norm": 0.590412437915802, - "learning_rate": 1.0591118372196645e-05, - "loss": 0.0606, - "step": 43560 - }, - { - "epoch": 3.23555621565424, - "grad_norm": 1.5013396739959717, - "learning_rate": 1.0586662706074558e-05, - "loss": 0.0688, - "step": 43570 - }, - { - "epoch": 3.236298826674588, - "grad_norm": 1.6271650791168213, - "learning_rate": 1.0582207039952473e-05, - "loss": 0.0675, - "step": 43580 - }, - { - "epoch": 3.2370414376949355, - "grad_norm": 0.9938003420829773, - "learning_rate": 1.0577751373830386e-05, - "loss": 0.081, - "step": 43590 - }, - { - "epoch": 3.237784048715283, - "grad_norm": 1.7118418216705322, - "learning_rate": 1.0573295707708303e-05, - "loss": 0.0595, - "step": 43600 - }, - { - "epoch": 3.2385266597356304, - "grad_norm": 0.5836747884750366, - "learning_rate": 1.0568840041586218e-05, - "loss": 0.0638, - "step": 43610 - }, - { - "epoch": 3.239269270755978, - "grad_norm": 0.8831083178520203, - "learning_rate": 1.0564384375464131e-05, - "loss": 0.0438, - "step": 43620 - }, - { - "epoch": 3.2400118817763257, - "grad_norm": 1.3930721282958984, - "learning_rate": 1.0559928709342048e-05, - "loss": 0.0442, - "step": 43630 - }, - { - "epoch": 3.240754492796673, - "grad_norm": 5.538564682006836, - "learning_rate": 1.0555473043219961e-05, - "loss": 0.0557, - "step": 43640 - }, - { - "epoch": 3.2414971038170206, - "grad_norm": 1.8036948442459106, - "learning_rate": 1.0551017377097876e-05, - "loss": 0.0664, - "step": 43650 - }, - { - "epoch": 3.242239714837368, - "grad_norm": 0.252446711063385, - "learning_rate": 1.054656171097579e-05, - "loss": 0.0578, - "step": 43660 - }, - { - "epoch": 3.2429823258577155, - "grad_norm": 1.6370078325271606, - "learning_rate": 1.0542106044853706e-05, - "loss": 0.0668, - "step": 43670 - }, - { - "epoch": 3.2437249368780634, - "grad_norm": 0.6032128930091858, - "learning_rate": 1.0537650378731621e-05, - "loss": 0.0697, - "step": 43680 - }, - { - "epoch": 3.244467547898411, - "grad_norm": 1.6264759302139282, - "learning_rate": 1.0533194712609535e-05, - "loss": 0.0407, - "step": 43690 - }, - { - "epoch": 3.2452101589187583, - "grad_norm": 1.4826754331588745, - "learning_rate": 1.052873904648745e-05, - "loss": 0.0342, - "step": 43700 - }, - { - "epoch": 3.2459527699391058, - "grad_norm": 0.2272782027721405, - "learning_rate": 1.0524283380365365e-05, - "loss": 0.0744, - "step": 43710 - }, - { - "epoch": 3.246695380959453, - "grad_norm": 2.538353204727173, - "learning_rate": 1.051982771424328e-05, - "loss": 0.063, - "step": 43720 - }, - { - "epoch": 3.247437991979801, - "grad_norm": 1.0268744230270386, - "learning_rate": 1.0515372048121195e-05, - "loss": 0.0528, - "step": 43730 - }, - { - "epoch": 3.2481806030001485, - "grad_norm": 0.812559962272644, - "learning_rate": 1.0510916381999108e-05, - "loss": 0.0557, - "step": 43740 - }, - { - "epoch": 3.248923214020496, - "grad_norm": 0.4809117317199707, - "learning_rate": 1.0506460715877025e-05, - "loss": 0.0452, - "step": 43750 - }, - { - "epoch": 3.2496658250408434, - "grad_norm": 2.713081121444702, - "learning_rate": 1.0502005049754938e-05, - "loss": 0.0444, - "step": 43760 - }, - { - "epoch": 3.2504084360611913, - "grad_norm": 1.6045153141021729, - "learning_rate": 1.0497549383632853e-05, - "loss": 0.0835, - "step": 43770 - }, - { - "epoch": 3.251151047081539, - "grad_norm": 1.182823657989502, - "learning_rate": 1.049309371751077e-05, - "loss": 0.0358, - "step": 43780 - }, - { - "epoch": 3.2518936581018862, - "grad_norm": 1.261793613433838, - "learning_rate": 1.0488638051388683e-05, - "loss": 0.059, - "step": 43790 - }, - { - "epoch": 3.2526362691222337, - "grad_norm": 1.5234590768814087, - "learning_rate": 1.0484182385266598e-05, - "loss": 0.0675, - "step": 43800 - }, - { - "epoch": 3.253378880142581, - "grad_norm": 3.1365156173706055, - "learning_rate": 1.0479726719144511e-05, - "loss": 0.0543, - "step": 43810 - }, - { - "epoch": 3.254121491162929, - "grad_norm": 2.5033278465270996, - "learning_rate": 1.0475271053022428e-05, - "loss": 0.0883, - "step": 43820 - }, - { - "epoch": 3.2548641021832765, - "grad_norm": 1.3353326320648193, - "learning_rate": 1.0470815386900341e-05, - "loss": 0.0692, - "step": 43830 - }, - { - "epoch": 3.255606713203624, - "grad_norm": 1.0387260913848877, - "learning_rate": 1.0466359720778256e-05, - "loss": 0.0863, - "step": 43840 - }, - { - "epoch": 3.2563493242239714, - "grad_norm": 1.1819405555725098, - "learning_rate": 1.0461904054656171e-05, - "loss": 0.0635, - "step": 43850 - }, - { - "epoch": 3.257091935244319, - "grad_norm": 0.8391949534416199, - "learning_rate": 1.0457448388534086e-05, - "loss": 0.0419, - "step": 43860 - }, - { - "epoch": 3.2578345462646667, - "grad_norm": 3.538817882537842, - "learning_rate": 1.0452992722412001e-05, - "loss": 0.0757, - "step": 43870 - }, - { - "epoch": 3.258577157285014, - "grad_norm": 1.986291766166687, - "learning_rate": 1.0448537056289915e-05, - "loss": 0.0663, - "step": 43880 - }, - { - "epoch": 3.2593197683053616, - "grad_norm": 2.091001510620117, - "learning_rate": 1.0444081390167831e-05, - "loss": 0.0641, - "step": 43890 - }, - { - "epoch": 3.260062379325709, - "grad_norm": 1.032757043838501, - "learning_rate": 1.0439625724045746e-05, - "loss": 0.0629, - "step": 43900 - }, - { - "epoch": 3.260804990346057, - "grad_norm": 0.7859309911727905, - "learning_rate": 1.043517005792366e-05, - "loss": 0.0354, - "step": 43910 - }, - { - "epoch": 3.2615476013664044, - "grad_norm": 0.43140801787376404, - "learning_rate": 1.0430714391801575e-05, - "loss": 0.0709, - "step": 43920 - }, - { - "epoch": 3.262290212386752, - "grad_norm": 1.7433068752288818, - "learning_rate": 1.042625872567949e-05, - "loss": 0.088, - "step": 43930 - }, - { - "epoch": 3.2630328234070993, - "grad_norm": 2.8857994079589844, - "learning_rate": 1.0421803059557405e-05, - "loss": 0.0876, - "step": 43940 - }, - { - "epoch": 3.2637754344274468, - "grad_norm": 0.6673837304115295, - "learning_rate": 1.0417347393435318e-05, - "loss": 0.0525, - "step": 43950 - }, - { - "epoch": 3.2645180454477947, - "grad_norm": 0.9741489887237549, - "learning_rate": 1.0412891727313233e-05, - "loss": 0.0626, - "step": 43960 - }, - { - "epoch": 3.265260656468142, - "grad_norm": 0.6886597275733948, - "learning_rate": 1.040843606119115e-05, - "loss": 0.0607, - "step": 43970 - }, - { - "epoch": 3.2660032674884896, - "grad_norm": 0.6309210062026978, - "learning_rate": 1.0403980395069063e-05, - "loss": 0.0695, - "step": 43980 - }, - { - "epoch": 3.266745878508837, - "grad_norm": 3.206247329711914, - "learning_rate": 1.0399524728946978e-05, - "loss": 0.0573, - "step": 43990 - }, - { - "epoch": 3.2674884895291845, - "grad_norm": 0.590715765953064, - "learning_rate": 1.0395069062824891e-05, - "loss": 0.0443, - "step": 44000 - }, - { - "epoch": 3.2682311005495324, - "grad_norm": 3.0547935962677, - "learning_rate": 1.0390613396702808e-05, - "loss": 0.062, - "step": 44010 - }, - { - "epoch": 3.26897371156988, - "grad_norm": 5.862617015838623, - "learning_rate": 1.0386157730580723e-05, - "loss": 0.0543, - "step": 44020 - }, - { - "epoch": 3.2697163225902273, - "grad_norm": 0.2635265588760376, - "learning_rate": 1.0381702064458636e-05, - "loss": 0.0307, - "step": 44030 - }, - { - "epoch": 3.2704589336105747, - "grad_norm": 1.2568199634552002, - "learning_rate": 1.0377246398336553e-05, - "loss": 0.0388, - "step": 44040 - }, - { - "epoch": 3.271201544630922, - "grad_norm": 0.4090416133403778, - "learning_rate": 1.0372790732214466e-05, - "loss": 0.0327, - "step": 44050 - }, - { - "epoch": 3.27194415565127, - "grad_norm": 1.8070887327194214, - "learning_rate": 1.0368335066092381e-05, - "loss": 0.0626, - "step": 44060 - }, - { - "epoch": 3.2726867666716175, - "grad_norm": 1.7493088245391846, - "learning_rate": 1.0363879399970296e-05, - "loss": 0.0763, - "step": 44070 - }, - { - "epoch": 3.273429377691965, - "grad_norm": 1.4830260276794434, - "learning_rate": 1.0359423733848211e-05, - "loss": 0.0501, - "step": 44080 - }, - { - "epoch": 3.2741719887123124, - "grad_norm": 0.5901992917060852, - "learning_rate": 1.0354968067726126e-05, - "loss": 0.0614, - "step": 44090 - }, - { - "epoch": 3.27491459973266, - "grad_norm": 2.098797082901001, - "learning_rate": 1.035051240160404e-05, - "loss": 0.058, - "step": 44100 - }, - { - "epoch": 3.2756572107530078, - "grad_norm": 1.220317006111145, - "learning_rate": 1.0346056735481954e-05, - "loss": 0.0958, - "step": 44110 - }, - { - "epoch": 3.276399821773355, - "grad_norm": 0.6847271919250488, - "learning_rate": 1.034160106935987e-05, - "loss": 0.0539, - "step": 44120 - }, - { - "epoch": 3.2771424327937027, - "grad_norm": 1.1545261144638062, - "learning_rate": 1.0337145403237784e-05, - "loss": 0.0644, - "step": 44130 - }, - { - "epoch": 3.27788504381405, - "grad_norm": 2.0466902256011963, - "learning_rate": 1.03326897371157e-05, - "loss": 0.0595, - "step": 44140 - }, - { - "epoch": 3.2786276548343976, - "grad_norm": 2.453418493270874, - "learning_rate": 1.0328234070993614e-05, - "loss": 0.0978, - "step": 44150 - }, - { - "epoch": 3.2793702658547454, - "grad_norm": 3.9171948432922363, - "learning_rate": 1.032377840487153e-05, - "loss": 0.0583, - "step": 44160 - }, - { - "epoch": 3.280112876875093, - "grad_norm": 2.847308874130249, - "learning_rate": 1.0319322738749443e-05, - "loss": 0.0792, - "step": 44170 - }, - { - "epoch": 3.2808554878954403, - "grad_norm": 1.2531743049621582, - "learning_rate": 1.0314867072627358e-05, - "loss": 0.0833, - "step": 44180 - }, - { - "epoch": 3.281598098915788, - "grad_norm": 3.184342384338379, - "learning_rate": 1.0310411406505274e-05, - "loss": 0.0575, - "step": 44190 - }, - { - "epoch": 3.2823407099361352, - "grad_norm": 1.0278104543685913, - "learning_rate": 1.0305955740383188e-05, - "loss": 0.0633, - "step": 44200 - }, - { - "epoch": 3.283083320956483, - "grad_norm": 1.105556607246399, - "learning_rate": 1.0301500074261103e-05, - "loss": 0.0774, - "step": 44210 - }, - { - "epoch": 3.2838259319768306, - "grad_norm": 2.209592342376709, - "learning_rate": 1.0297044408139016e-05, - "loss": 0.0427, - "step": 44220 - }, - { - "epoch": 3.284568542997178, - "grad_norm": 1.6247801780700684, - "learning_rate": 1.0292588742016933e-05, - "loss": 0.0552, - "step": 44230 - }, - { - "epoch": 3.2853111540175255, - "grad_norm": 1.5469924211502075, - "learning_rate": 1.0288133075894846e-05, - "loss": 0.0543, - "step": 44240 - }, - { - "epoch": 3.286053765037873, - "grad_norm": 1.353393316268921, - "learning_rate": 1.0283677409772761e-05, - "loss": 0.0592, - "step": 44250 - }, - { - "epoch": 3.286796376058221, - "grad_norm": 1.046172022819519, - "learning_rate": 1.0279221743650676e-05, - "loss": 0.0504, - "step": 44260 - }, - { - "epoch": 3.2875389870785683, - "grad_norm": 0.5917429327964783, - "learning_rate": 1.0274766077528591e-05, - "loss": 0.0429, - "step": 44270 - }, - { - "epoch": 3.2882815980989157, - "grad_norm": 1.0659432411193848, - "learning_rate": 1.0270310411406506e-05, - "loss": 0.0625, - "step": 44280 - }, - { - "epoch": 3.289024209119263, - "grad_norm": 1.2359225749969482, - "learning_rate": 1.0265854745284419e-05, - "loss": 0.0623, - "step": 44290 - }, - { - "epoch": 3.2897668201396106, - "grad_norm": 1.3712466955184937, - "learning_rate": 1.0261399079162336e-05, - "loss": 0.0367, - "step": 44300 - }, - { - "epoch": 3.2905094311599585, - "grad_norm": 0.6402594447135925, - "learning_rate": 1.025694341304025e-05, - "loss": 0.06, - "step": 44310 - }, - { - "epoch": 3.291252042180306, - "grad_norm": 3.1848597526550293, - "learning_rate": 1.0252487746918164e-05, - "loss": 0.0566, - "step": 44320 - }, - { - "epoch": 3.2919946532006534, - "grad_norm": 0.769212543964386, - "learning_rate": 1.0248032080796079e-05, - "loss": 0.0579, - "step": 44330 - }, - { - "epoch": 3.292737264221001, - "grad_norm": 2.3581175804138184, - "learning_rate": 1.0243576414673994e-05, - "loss": 0.0666, - "step": 44340 - }, - { - "epoch": 3.2934798752413488, - "grad_norm": 0.7175013422966003, - "learning_rate": 1.0239120748551909e-05, - "loss": 0.0463, - "step": 44350 - }, - { - "epoch": 3.2942224862616962, - "grad_norm": 1.026248574256897, - "learning_rate": 1.0234665082429822e-05, - "loss": 0.06, - "step": 44360 - }, - { - "epoch": 3.2949650972820437, - "grad_norm": 1.5957810878753662, - "learning_rate": 1.0230209416307737e-05, - "loss": 0.0462, - "step": 44370 - }, - { - "epoch": 3.295707708302391, - "grad_norm": 0.7169394493103027, - "learning_rate": 1.0225753750185654e-05, - "loss": 0.0872, - "step": 44380 - }, - { - "epoch": 3.2964503193227386, - "grad_norm": 1.4847623109817505, - "learning_rate": 1.0221298084063567e-05, - "loss": 0.0513, - "step": 44390 - }, - { - "epoch": 3.2971929303430865, - "grad_norm": 1.8581526279449463, - "learning_rate": 1.0216842417941482e-05, - "loss": 0.0663, - "step": 44400 - }, - { - "epoch": 3.297935541363434, - "grad_norm": 0.3895215690135956, - "learning_rate": 1.0212386751819397e-05, - "loss": 0.0684, - "step": 44410 - }, - { - "epoch": 3.2986781523837814, - "grad_norm": 0.4079320728778839, - "learning_rate": 1.0207931085697312e-05, - "loss": 0.0593, - "step": 44420 - }, - { - "epoch": 3.299420763404129, - "grad_norm": 1.5425752401351929, - "learning_rate": 1.0203475419575227e-05, - "loss": 0.0927, - "step": 44430 - }, - { - "epoch": 3.3001633744244763, - "grad_norm": 1.9693676233291626, - "learning_rate": 1.019901975345314e-05, - "loss": 0.091, - "step": 44440 - }, - { - "epoch": 3.300905985444824, - "grad_norm": 1.5233045816421509, - "learning_rate": 1.0194564087331057e-05, - "loss": 0.081, - "step": 44450 - }, - { - "epoch": 3.3016485964651716, - "grad_norm": 1.4039238691329956, - "learning_rate": 1.019010842120897e-05, - "loss": 0.0609, - "step": 44460 - }, - { - "epoch": 3.302391207485519, - "grad_norm": 1.4866162538528442, - "learning_rate": 1.0185652755086886e-05, - "loss": 0.0607, - "step": 44470 - }, - { - "epoch": 3.3031338185058665, - "grad_norm": 2.2782821655273438, - "learning_rate": 1.01811970889648e-05, - "loss": 0.0506, - "step": 44480 - }, - { - "epoch": 3.3038764295262144, - "grad_norm": 1.1079455614089966, - "learning_rate": 1.0176741422842716e-05, - "loss": 0.0785, - "step": 44490 - }, - { - "epoch": 3.304619040546562, - "grad_norm": 2.9163553714752197, - "learning_rate": 1.017228575672063e-05, - "loss": 0.0635, - "step": 44500 - }, - { - "epoch": 3.3053616515669093, - "grad_norm": 2.6138124465942383, - "learning_rate": 1.0167830090598544e-05, - "loss": 0.0539, - "step": 44510 - }, - { - "epoch": 3.3061042625872568, - "grad_norm": 1.3911471366882324, - "learning_rate": 1.0163374424476459e-05, - "loss": 0.0717, - "step": 44520 - }, - { - "epoch": 3.306846873607604, - "grad_norm": 1.2493705749511719, - "learning_rate": 1.0158918758354374e-05, - "loss": 0.0833, - "step": 44530 - }, - { - "epoch": 3.307589484627952, - "grad_norm": 0.6470643877983093, - "learning_rate": 1.0154463092232289e-05, - "loss": 0.0834, - "step": 44540 - }, - { - "epoch": 3.3083320956482996, - "grad_norm": 1.346933126449585, - "learning_rate": 1.0150007426110204e-05, - "loss": 0.0417, - "step": 44550 - }, - { - "epoch": 3.309074706668647, - "grad_norm": 1.7907460927963257, - "learning_rate": 1.0145551759988119e-05, - "loss": 0.0613, - "step": 44560 - }, - { - "epoch": 3.3098173176889945, - "grad_norm": 3.1035265922546387, - "learning_rate": 1.0141096093866034e-05, - "loss": 0.0773, - "step": 44570 - }, - { - "epoch": 3.310559928709342, - "grad_norm": 1.8176069259643555, - "learning_rate": 1.0136640427743947e-05, - "loss": 0.0679, - "step": 44580 - }, - { - "epoch": 3.31130253972969, - "grad_norm": 1.3476163148880005, - "learning_rate": 1.0132184761621862e-05, - "loss": 0.066, - "step": 44590 - }, - { - "epoch": 3.3120451507500372, - "grad_norm": 1.3628586530685425, - "learning_rate": 1.0127729095499779e-05, - "loss": 0.0671, - "step": 44600 - }, - { - "epoch": 3.3127877617703847, - "grad_norm": 0.9032784104347229, - "learning_rate": 1.0123273429377692e-05, - "loss": 0.0828, - "step": 44610 - }, - { - "epoch": 3.313530372790732, - "grad_norm": 1.4668526649475098, - "learning_rate": 1.0118817763255607e-05, - "loss": 0.0669, - "step": 44620 - }, - { - "epoch": 3.3142729838110796, - "grad_norm": 0.5808708071708679, - "learning_rate": 1.011436209713352e-05, - "loss": 0.0926, - "step": 44630 - }, - { - "epoch": 3.3150155948314275, - "grad_norm": 0.9334034323692322, - "learning_rate": 1.0109906431011437e-05, - "loss": 0.0695, - "step": 44640 - }, - { - "epoch": 3.315758205851775, - "grad_norm": 2.833603858947754, - "learning_rate": 1.010545076488935e-05, - "loss": 0.0559, - "step": 44650 - }, - { - "epoch": 3.3165008168721224, - "grad_norm": 0.9554916620254517, - "learning_rate": 1.0100995098767265e-05, - "loss": 0.0612, - "step": 44660 - }, - { - "epoch": 3.31724342789247, - "grad_norm": 2.4863193035125732, - "learning_rate": 1.0096539432645182e-05, - "loss": 0.0585, - "step": 44670 - }, - { - "epoch": 3.3179860389128173, - "grad_norm": 1.2843139171600342, - "learning_rate": 1.0092083766523095e-05, - "loss": 0.057, - "step": 44680 - }, - { - "epoch": 3.318728649933165, - "grad_norm": 3.115098237991333, - "learning_rate": 1.008762810040101e-05, - "loss": 0.062, - "step": 44690 - }, - { - "epoch": 3.3194712609535126, - "grad_norm": 1.55138099193573, - "learning_rate": 1.0083172434278924e-05, - "loss": 0.0757, - "step": 44700 - }, - { - "epoch": 3.32021387197386, - "grad_norm": 2.5275778770446777, - "learning_rate": 1.007871676815684e-05, - "loss": 0.0794, - "step": 44710 - }, - { - "epoch": 3.3209564829942075, - "grad_norm": 2.697035551071167, - "learning_rate": 1.0074261102034755e-05, - "loss": 0.0467, - "step": 44720 - }, - { - "epoch": 3.321699094014555, - "grad_norm": 0.9199452996253967, - "learning_rate": 1.0069805435912669e-05, - "loss": 0.0822, - "step": 44730 - }, - { - "epoch": 3.322441705034903, - "grad_norm": 2.4956889152526855, - "learning_rate": 1.0065349769790584e-05, - "loss": 0.0608, - "step": 44740 - }, - { - "epoch": 3.3231843160552503, - "grad_norm": 2.050920248031616, - "learning_rate": 1.0060894103668499e-05, - "loss": 0.0895, - "step": 44750 - }, - { - "epoch": 3.323926927075598, - "grad_norm": 1.369531273841858, - "learning_rate": 1.0056438437546414e-05, - "loss": 0.0515, - "step": 44760 - }, - { - "epoch": 3.3246695380959452, - "grad_norm": 3.23797345161438, - "learning_rate": 1.0051982771424329e-05, - "loss": 0.0678, - "step": 44770 - }, - { - "epoch": 3.3254121491162927, - "grad_norm": 0.7047389149665833, - "learning_rate": 1.0047527105302242e-05, - "loss": 0.0508, - "step": 44780 - }, - { - "epoch": 3.3261547601366406, - "grad_norm": 0.7370045781135559, - "learning_rate": 1.0043071439180159e-05, - "loss": 0.0548, - "step": 44790 - }, - { - "epoch": 3.326897371156988, - "grad_norm": 1.6927924156188965, - "learning_rate": 1.0038615773058072e-05, - "loss": 0.0859, - "step": 44800 - }, - { - "epoch": 3.3276399821773355, - "grad_norm": 0.9936562180519104, - "learning_rate": 1.0034160106935987e-05, - "loss": 0.0641, - "step": 44810 - }, - { - "epoch": 3.328382593197683, - "grad_norm": 1.1975995302200317, - "learning_rate": 1.0029704440813902e-05, - "loss": 0.0758, - "step": 44820 - }, - { - "epoch": 3.3291252042180304, - "grad_norm": 0.6930918097496033, - "learning_rate": 1.0025248774691817e-05, - "loss": 0.0674, - "step": 44830 - }, - { - "epoch": 3.3298678152383783, - "grad_norm": 1.631554365158081, - "learning_rate": 1.0020793108569732e-05, - "loss": 0.0528, - "step": 44840 - }, - { - "epoch": 3.3306104262587257, - "grad_norm": 0.4644923508167267, - "learning_rate": 1.0016337442447645e-05, - "loss": 0.0543, - "step": 44850 - }, - { - "epoch": 3.331353037279073, - "grad_norm": 0.8077749013900757, - "learning_rate": 1.0011881776325562e-05, - "loss": 0.0411, - "step": 44860 - }, - { - "epoch": 3.3320956482994206, - "grad_norm": 0.5882359743118286, - "learning_rate": 1.0007426110203475e-05, - "loss": 0.038, - "step": 44870 - }, - { - "epoch": 3.332838259319768, - "grad_norm": 2.1609537601470947, - "learning_rate": 1.000297044408139e-05, - "loss": 0.0709, - "step": 44880 - }, - { - "epoch": 3.333580870340116, - "grad_norm": 2.9716951847076416, - "learning_rate": 9.998514777959305e-06, - "loss": 0.0759, - "step": 44890 - }, - { - "epoch": 3.3343234813604634, - "grad_norm": 1.4051735401153564, - "learning_rate": 9.99405911183722e-06, - "loss": 0.0842, - "step": 44900 - }, - { - "epoch": 3.335066092380811, - "grad_norm": 1.0370116233825684, - "learning_rate": 9.989603445715135e-06, - "loss": 0.0574, - "step": 44910 - }, - { - "epoch": 3.3358087034011583, - "grad_norm": 2.2001404762268066, - "learning_rate": 9.985147779593049e-06, - "loss": 0.0711, - "step": 44920 - }, - { - "epoch": 3.336551314421506, - "grad_norm": 1.836188554763794, - "learning_rate": 9.980692113470964e-06, - "loss": 0.0672, - "step": 44930 - }, - { - "epoch": 3.3372939254418537, - "grad_norm": 3.0078184604644775, - "learning_rate": 9.976236447348879e-06, - "loss": 0.073, - "step": 44940 - }, - { - "epoch": 3.338036536462201, - "grad_norm": 0.9165183305740356, - "learning_rate": 9.971780781226794e-06, - "loss": 0.0768, - "step": 44950 - }, - { - "epoch": 3.3387791474825486, - "grad_norm": 1.1523520946502686, - "learning_rate": 9.967325115104709e-06, - "loss": 0.0325, - "step": 44960 - }, - { - "epoch": 3.339521758502896, - "grad_norm": 0.701426088809967, - "learning_rate": 9.962869448982624e-06, - "loss": 0.0557, - "step": 44970 - }, - { - "epoch": 3.340264369523244, - "grad_norm": 0.9896045327186584, - "learning_rate": 9.958413782860539e-06, - "loss": 0.0438, - "step": 44980 - }, - { - "epoch": 3.3410069805435914, - "grad_norm": 1.784203290939331, - "learning_rate": 9.953958116738452e-06, - "loss": 0.0509, - "step": 44990 - }, - { - "epoch": 3.341749591563939, - "grad_norm": 1.9945133924484253, - "learning_rate": 9.949502450616367e-06, - "loss": 0.0682, - "step": 45000 - }, - { - "epoch": 3.3424922025842863, - "grad_norm": 2.4926345348358154, - "learning_rate": 9.945046784494283e-06, - "loss": 0.0784, - "step": 45010 - }, - { - "epoch": 3.3432348136046337, - "grad_norm": 1.1978400945663452, - "learning_rate": 9.940591118372197e-06, - "loss": 0.0644, - "step": 45020 - }, - { - "epoch": 3.3439774246249816, - "grad_norm": 0.44274619221687317, - "learning_rate": 9.936135452250112e-06, - "loss": 0.0558, - "step": 45030 - }, - { - "epoch": 3.344720035645329, - "grad_norm": 0.9496433138847351, - "learning_rate": 9.931679786128025e-06, - "loss": 0.0406, - "step": 45040 - }, - { - "epoch": 3.3454626466656765, - "grad_norm": 0.4227916896343231, - "learning_rate": 9.927224120005942e-06, - "loss": 0.0446, - "step": 45050 - }, - { - "epoch": 3.346205257686024, - "grad_norm": 1.6861997842788696, - "learning_rate": 9.922768453883855e-06, - "loss": 0.0576, - "step": 45060 - }, - { - "epoch": 3.346947868706372, - "grad_norm": 2.8230645656585693, - "learning_rate": 9.91831278776177e-06, - "loss": 0.055, - "step": 45070 - }, - { - "epoch": 3.3476904797267193, - "grad_norm": 1.0160224437713623, - "learning_rate": 9.913857121639687e-06, - "loss": 0.09, - "step": 45080 - }, - { - "epoch": 3.3484330907470667, - "grad_norm": 2.2444396018981934, - "learning_rate": 9.9094014555176e-06, - "loss": 0.0685, - "step": 45090 - }, - { - "epoch": 3.349175701767414, - "grad_norm": 1.4811400175094604, - "learning_rate": 9.904945789395515e-06, - "loss": 0.0576, - "step": 45100 - }, - { - "epoch": 3.3499183127877616, - "grad_norm": 2.1645710468292236, - "learning_rate": 9.900490123273428e-06, - "loss": 0.0926, - "step": 45110 - }, - { - "epoch": 3.3506609238081095, - "grad_norm": 2.3082311153411865, - "learning_rate": 9.896034457151345e-06, - "loss": 0.0536, - "step": 45120 - }, - { - "epoch": 3.351403534828457, - "grad_norm": 1.518615961074829, - "learning_rate": 9.89157879102926e-06, - "loss": 0.0877, - "step": 45130 - }, - { - "epoch": 3.3521461458488044, - "grad_norm": 0.9290609955787659, - "learning_rate": 9.887123124907173e-06, - "loss": 0.0549, - "step": 45140 - }, - { - "epoch": 3.352888756869152, - "grad_norm": 2.0847578048706055, - "learning_rate": 9.882667458785088e-06, - "loss": 0.0656, - "step": 45150 - }, - { - "epoch": 3.3536313678894993, - "grad_norm": 2.6729955673217773, - "learning_rate": 9.878211792663003e-06, - "loss": 0.0772, - "step": 45160 - }, - { - "epoch": 3.3543739789098472, - "grad_norm": 2.263134241104126, - "learning_rate": 9.873756126540918e-06, - "loss": 0.0689, - "step": 45170 - }, - { - "epoch": 3.3551165899301947, - "grad_norm": 0.8726534247398376, - "learning_rate": 9.869300460418833e-06, - "loss": 0.0652, - "step": 45180 - }, - { - "epoch": 3.355859200950542, - "grad_norm": 1.9389985799789429, - "learning_rate": 9.864844794296747e-06, - "loss": 0.0787, - "step": 45190 - }, - { - "epoch": 3.3566018119708896, - "grad_norm": 2.6896302700042725, - "learning_rate": 9.860389128174663e-06, - "loss": 0.0622, - "step": 45200 - }, - { - "epoch": 3.357344422991237, - "grad_norm": 1.6283527612686157, - "learning_rate": 9.855933462052577e-06, - "loss": 0.052, - "step": 45210 - }, - { - "epoch": 3.358087034011585, - "grad_norm": 1.3546130657196045, - "learning_rate": 9.851477795930492e-06, - "loss": 0.0797, - "step": 45220 - }, - { - "epoch": 3.3588296450319324, - "grad_norm": 1.457862377166748, - "learning_rate": 9.847022129808407e-06, - "loss": 0.0727, - "step": 45230 - }, - { - "epoch": 3.35957225605228, - "grad_norm": 0.8017680644989014, - "learning_rate": 9.842566463686322e-06, - "loss": 0.0362, - "step": 45240 - }, - { - "epoch": 3.3603148670726273, - "grad_norm": 1.1099777221679688, - "learning_rate": 9.838110797564237e-06, - "loss": 0.0549, - "step": 45250 - }, - { - "epoch": 3.3610574780929747, - "grad_norm": 2.5298869609832764, - "learning_rate": 9.83365513144215e-06, - "loss": 0.0445, - "step": 45260 - }, - { - "epoch": 3.3618000891133226, - "grad_norm": 0.5401008725166321, - "learning_rate": 9.829199465320067e-06, - "loss": 0.0526, - "step": 45270 - }, - { - "epoch": 3.36254270013367, - "grad_norm": 1.3315315246582031, - "learning_rate": 9.82474379919798e-06, - "loss": 0.0675, - "step": 45280 - }, - { - "epoch": 3.3632853111540175, - "grad_norm": 0.9110653400421143, - "learning_rate": 9.820288133075895e-06, - "loss": 0.0365, - "step": 45290 - }, - { - "epoch": 3.364027922174365, - "grad_norm": 1.0673272609710693, - "learning_rate": 9.81583246695381e-06, - "loss": 0.0603, - "step": 45300 - }, - { - "epoch": 3.3647705331947124, - "grad_norm": 1.7336030006408691, - "learning_rate": 9.811376800831725e-06, - "loss": 0.0634, - "step": 45310 - }, - { - "epoch": 3.3655131442150603, - "grad_norm": 0.638027548789978, - "learning_rate": 9.80692113470964e-06, - "loss": 0.041, - "step": 45320 - }, - { - "epoch": 3.3662557552354078, - "grad_norm": 0.6306934356689453, - "learning_rate": 9.802465468587553e-06, - "loss": 0.092, - "step": 45330 - }, - { - "epoch": 3.366998366255755, - "grad_norm": 1.2354300022125244, - "learning_rate": 9.79800980246547e-06, - "loss": 0.0729, - "step": 45340 - }, - { - "epoch": 3.3677409772761027, - "grad_norm": 0.46772605180740356, - "learning_rate": 9.793554136343383e-06, - "loss": 0.039, - "step": 45350 - }, - { - "epoch": 3.36848358829645, - "grad_norm": 0.28647175431251526, - "learning_rate": 9.789098470221298e-06, - "loss": 0.0544, - "step": 45360 - }, - { - "epoch": 3.369226199316798, - "grad_norm": 0.0804813876748085, - "learning_rate": 9.784642804099213e-06, - "loss": 0.0483, - "step": 45370 - }, - { - "epoch": 3.3699688103371455, - "grad_norm": 2.941643476486206, - "learning_rate": 9.780187137977128e-06, - "loss": 0.0728, - "step": 45380 - }, - { - "epoch": 3.370711421357493, - "grad_norm": 1.7482622861862183, - "learning_rate": 9.775731471855043e-06, - "loss": 0.0857, - "step": 45390 - }, - { - "epoch": 3.3714540323778404, - "grad_norm": 3.30426025390625, - "learning_rate": 9.771275805732956e-06, - "loss": 0.0671, - "step": 45400 - }, - { - "epoch": 3.372196643398188, - "grad_norm": 1.9652279615402222, - "learning_rate": 9.766820139610871e-06, - "loss": 0.0693, - "step": 45410 - }, - { - "epoch": 3.3729392544185357, - "grad_norm": 0.7707439064979553, - "learning_rate": 9.762364473488788e-06, - "loss": 0.0572, - "step": 45420 - }, - { - "epoch": 3.373681865438883, - "grad_norm": 0.6071941256523132, - "learning_rate": 9.757908807366701e-06, - "loss": 0.0451, - "step": 45430 - }, - { - "epoch": 3.3744244764592306, - "grad_norm": 1.8582054376602173, - "learning_rate": 9.753453141244616e-06, - "loss": 0.0391, - "step": 45440 - }, - { - "epoch": 3.375167087479578, - "grad_norm": 1.5152584314346313, - "learning_rate": 9.74899747512253e-06, - "loss": 0.0941, - "step": 45450 - }, - { - "epoch": 3.3759096984999255, - "grad_norm": 0.9906954169273376, - "learning_rate": 9.744541809000446e-06, - "loss": 0.0514, - "step": 45460 - }, - { - "epoch": 3.3766523095202734, - "grad_norm": 1.4278010129928589, - "learning_rate": 9.740086142878361e-06, - "loss": 0.0492, - "step": 45470 - }, - { - "epoch": 3.377394920540621, - "grad_norm": 2.468226671218872, - "learning_rate": 9.735630476756275e-06, - "loss": 0.0692, - "step": 45480 - }, - { - "epoch": 3.3781375315609683, - "grad_norm": 0.5829160809516907, - "learning_rate": 9.731174810634191e-06, - "loss": 0.0661, - "step": 45490 - }, - { - "epoch": 3.3788801425813157, - "grad_norm": 1.5764065980911255, - "learning_rate": 9.726719144512105e-06, - "loss": 0.0695, - "step": 45500 - }, - { - "epoch": 3.3796227536016636, - "grad_norm": 0.9860460162162781, - "learning_rate": 9.72226347839002e-06, - "loss": 0.0474, - "step": 45510 - }, - { - "epoch": 3.380365364622011, - "grad_norm": 1.8442999124526978, - "learning_rate": 9.717807812267933e-06, - "loss": 0.1209, - "step": 45520 - }, - { - "epoch": 3.3811079756423585, - "grad_norm": 3.0390632152557373, - "learning_rate": 9.71335214614585e-06, - "loss": 0.0604, - "step": 45530 - }, - { - "epoch": 3.381850586662706, - "grad_norm": 1.5547683238983154, - "learning_rate": 9.708896480023765e-06, - "loss": 0.093, - "step": 45540 - }, - { - "epoch": 3.3825931976830534, - "grad_norm": 2.504794120788574, - "learning_rate": 9.704440813901678e-06, - "loss": 0.0385, - "step": 45550 - }, - { - "epoch": 3.3833358087034013, - "grad_norm": 2.069444417953491, - "learning_rate": 9.699985147779593e-06, - "loss": 0.0766, - "step": 45560 - }, - { - "epoch": 3.384078419723749, - "grad_norm": 1.7017885446548462, - "learning_rate": 9.695529481657508e-06, - "loss": 0.066, - "step": 45570 - }, - { - "epoch": 3.3848210307440962, - "grad_norm": 1.3779213428497314, - "learning_rate": 9.691073815535423e-06, - "loss": 0.0581, - "step": 45580 - }, - { - "epoch": 3.3855636417644437, - "grad_norm": 2.676387071609497, - "learning_rate": 9.686618149413338e-06, - "loss": 0.0898, - "step": 45590 - }, - { - "epoch": 3.386306252784791, - "grad_norm": 1.7272534370422363, - "learning_rate": 9.682162483291253e-06, - "loss": 0.0493, - "step": 45600 - }, - { - "epoch": 3.387048863805139, - "grad_norm": 0.43985655903816223, - "learning_rate": 9.677706817169168e-06, - "loss": 0.0536, - "step": 45610 - }, - { - "epoch": 3.3877914748254865, - "grad_norm": 2.7680649757385254, - "learning_rate": 9.673251151047081e-06, - "loss": 0.0442, - "step": 45620 - }, - { - "epoch": 3.388534085845834, - "grad_norm": 2.375917911529541, - "learning_rate": 9.668795484924996e-06, - "loss": 0.0681, - "step": 45630 - }, - { - "epoch": 3.3892766968661814, - "grad_norm": 1.5520646572113037, - "learning_rate": 9.664339818802911e-06, - "loss": 0.0533, - "step": 45640 - }, - { - "epoch": 3.3900193078865293, - "grad_norm": 0.7106296420097351, - "learning_rate": 9.659884152680826e-06, - "loss": 0.0527, - "step": 45650 - }, - { - "epoch": 3.3907619189068767, - "grad_norm": 1.5558034181594849, - "learning_rate": 9.655428486558741e-06, - "loss": 0.0664, - "step": 45660 - }, - { - "epoch": 3.391504529927224, - "grad_norm": 2.0336802005767822, - "learning_rate": 9.650972820436654e-06, - "loss": 0.0744, - "step": 45670 - }, - { - "epoch": 3.3922471409475716, - "grad_norm": 2.6379876136779785, - "learning_rate": 9.646517154314571e-06, - "loss": 0.0752, - "step": 45680 - }, - { - "epoch": 3.392989751967919, - "grad_norm": 1.7118732929229736, - "learning_rate": 9.642061488192484e-06, - "loss": 0.0775, - "step": 45690 - }, - { - "epoch": 3.393732362988267, - "grad_norm": 0.5552663803100586, - "learning_rate": 9.6376058220704e-06, - "loss": 0.0601, - "step": 45700 - }, - { - "epoch": 3.3944749740086144, - "grad_norm": 1.9017750024795532, - "learning_rate": 9.633150155948314e-06, - "loss": 0.0687, - "step": 45710 - }, - { - "epoch": 3.395217585028962, - "grad_norm": 0.6380416750907898, - "learning_rate": 9.62869448982623e-06, - "loss": 0.0657, - "step": 45720 - }, - { - "epoch": 3.3959601960493093, - "grad_norm": 0.7644681930541992, - "learning_rate": 9.624238823704144e-06, - "loss": 0.0563, - "step": 45730 - }, - { - "epoch": 3.3967028070696568, - "grad_norm": 3.4050753116607666, - "learning_rate": 9.619783157582058e-06, - "loss": 0.0557, - "step": 45740 - }, - { - "epoch": 3.3974454180900047, - "grad_norm": 2.2620880603790283, - "learning_rate": 9.615327491459974e-06, - "loss": 0.0667, - "step": 45750 - }, - { - "epoch": 3.398188029110352, - "grad_norm": 1.442107915878296, - "learning_rate": 9.610871825337888e-06, - "loss": 0.0644, - "step": 45760 - }, - { - "epoch": 3.3989306401306996, - "grad_norm": 0.9981054067611694, - "learning_rate": 9.606416159215803e-06, - "loss": 0.0412, - "step": 45770 - }, - { - "epoch": 3.399673251151047, - "grad_norm": 0.9032704830169678, - "learning_rate": 9.601960493093718e-06, - "loss": 0.0779, - "step": 45780 - }, - { - "epoch": 3.4004158621713945, - "grad_norm": 1.9194782972335815, - "learning_rate": 9.597504826971633e-06, - "loss": 0.0679, - "step": 45790 - }, - { - "epoch": 3.4011584731917424, - "grad_norm": 2.2495553493499756, - "learning_rate": 9.593049160849548e-06, - "loss": 0.0495, - "step": 45800 - }, - { - "epoch": 3.40190108421209, - "grad_norm": 0.7401419878005981, - "learning_rate": 9.588593494727461e-06, - "loss": 0.0372, - "step": 45810 - }, - { - "epoch": 3.4026436952324373, - "grad_norm": 2.4702985286712646, - "learning_rate": 9.584137828605376e-06, - "loss": 0.0629, - "step": 45820 - }, - { - "epoch": 3.4033863062527847, - "grad_norm": 1.2022415399551392, - "learning_rate": 9.579682162483293e-06, - "loss": 0.0573, - "step": 45830 - }, - { - "epoch": 3.404128917273132, - "grad_norm": 1.8239872455596924, - "learning_rate": 9.575226496361206e-06, - "loss": 0.0513, - "step": 45840 - }, - { - "epoch": 3.40487152829348, - "grad_norm": 2.117016553878784, - "learning_rate": 9.570770830239121e-06, - "loss": 0.0671, - "step": 45850 - }, - { - "epoch": 3.4056141393138275, - "grad_norm": 1.195753574371338, - "learning_rate": 9.566315164117036e-06, - "loss": 0.0544, - "step": 45860 - }, - { - "epoch": 3.406356750334175, - "grad_norm": 0.719028890132904, - "learning_rate": 9.561859497994951e-06, - "loss": 0.058, - "step": 45870 - }, - { - "epoch": 3.4070993613545224, - "grad_norm": 0.7662678360939026, - "learning_rate": 9.557403831872866e-06, - "loss": 0.06, - "step": 45880 - }, - { - "epoch": 3.40784197237487, - "grad_norm": 1.3117008209228516, - "learning_rate": 9.55294816575078e-06, - "loss": 0.0367, - "step": 45890 - }, - { - "epoch": 3.4085845833952177, - "grad_norm": 0.3628579080104828, - "learning_rate": 9.548492499628696e-06, - "loss": 0.0582, - "step": 45900 - }, - { - "epoch": 3.409327194415565, - "grad_norm": 3.9150137901306152, - "learning_rate": 9.54403683350661e-06, - "loss": 0.0684, - "step": 45910 - }, - { - "epoch": 3.4100698054359126, - "grad_norm": 1.5000407695770264, - "learning_rate": 9.539581167384524e-06, - "loss": 0.0871, - "step": 45920 - }, - { - "epoch": 3.41081241645626, - "grad_norm": 1.804474115371704, - "learning_rate": 9.535125501262438e-06, - "loss": 0.0469, - "step": 45930 - }, - { - "epoch": 3.4115550274766075, - "grad_norm": 1.7255808115005493, - "learning_rate": 9.530669835140354e-06, - "loss": 0.0358, - "step": 45940 - }, - { - "epoch": 3.4122976384969554, - "grad_norm": 1.890074372291565, - "learning_rate": 9.52621416901827e-06, - "loss": 0.0541, - "step": 45950 - }, - { - "epoch": 3.413040249517303, - "grad_norm": 1.5225468873977661, - "learning_rate": 9.521758502896183e-06, - "loss": 0.074, - "step": 45960 - }, - { - "epoch": 3.4137828605376503, - "grad_norm": 1.2024898529052734, - "learning_rate": 9.517302836774098e-06, - "loss": 0.0931, - "step": 45970 - }, - { - "epoch": 3.414525471557998, - "grad_norm": 5.313319683074951, - "learning_rate": 9.512847170652013e-06, - "loss": 0.0663, - "step": 45980 - }, - { - "epoch": 3.4152680825783452, - "grad_norm": 1.3511769771575928, - "learning_rate": 9.508391504529928e-06, - "loss": 0.0887, - "step": 45990 - }, - { - "epoch": 3.416010693598693, - "grad_norm": 2.674060344696045, - "learning_rate": 9.503935838407843e-06, - "loss": 0.0518, - "step": 46000 - }, - { - "epoch": 3.4167533046190406, - "grad_norm": 0.3742149770259857, - "learning_rate": 9.499480172285757e-06, - "loss": 0.0483, - "step": 46010 - }, - { - "epoch": 3.417495915639388, - "grad_norm": 0.4566430151462555, - "learning_rate": 9.495024506163672e-06, - "loss": 0.0641, - "step": 46020 - }, - { - "epoch": 3.4182385266597355, - "grad_norm": 1.1455416679382324, - "learning_rate": 9.490568840041586e-06, - "loss": 0.0723, - "step": 46030 - }, - { - "epoch": 3.418981137680083, - "grad_norm": 0.7190825343132019, - "learning_rate": 9.4861131739195e-06, - "loss": 0.0622, - "step": 46040 - }, - { - "epoch": 3.419723748700431, - "grad_norm": 0.6445209383964539, - "learning_rate": 9.481657507797416e-06, - "loss": 0.0357, - "step": 46050 - }, - { - "epoch": 3.4204663597207783, - "grad_norm": 2.212388038635254, - "learning_rate": 9.47720184167533e-06, - "loss": 0.0446, - "step": 46060 - }, - { - "epoch": 3.4212089707411257, - "grad_norm": 1.0653600692749023, - "learning_rate": 9.472746175553246e-06, - "loss": 0.099, - "step": 46070 - }, - { - "epoch": 3.421951581761473, - "grad_norm": 2.42584228515625, - "learning_rate": 9.468290509431159e-06, - "loss": 0.0587, - "step": 46080 - }, - { - "epoch": 3.422694192781821, - "grad_norm": 2.0575003623962402, - "learning_rate": 9.463834843309076e-06, - "loss": 0.0777, - "step": 46090 - }, - { - "epoch": 3.4234368038021685, - "grad_norm": 2.0597803592681885, - "learning_rate": 9.459379177186989e-06, - "loss": 0.0569, - "step": 46100 - }, - { - "epoch": 3.424179414822516, - "grad_norm": 1.2137857675552368, - "learning_rate": 9.454923511064904e-06, - "loss": 0.0696, - "step": 46110 - }, - { - "epoch": 3.4249220258428634, - "grad_norm": 1.2219140529632568, - "learning_rate": 9.45046784494282e-06, - "loss": 0.0644, - "step": 46120 - }, - { - "epoch": 3.425664636863211, - "grad_norm": 0.3102966845035553, - "learning_rate": 9.446012178820734e-06, - "loss": 0.0485, - "step": 46130 - }, - { - "epoch": 3.4264072478835588, - "grad_norm": 3.548948287963867, - "learning_rate": 9.441556512698649e-06, - "loss": 0.0802, - "step": 46140 - }, - { - "epoch": 3.427149858903906, - "grad_norm": 1.5830320119857788, - "learning_rate": 9.437100846576562e-06, - "loss": 0.0685, - "step": 46150 - }, - { - "epoch": 3.4278924699242537, - "grad_norm": 0.3730054795742035, - "learning_rate": 9.432645180454479e-06, - "loss": 0.0555, - "step": 46160 - }, - { - "epoch": 3.428635080944601, - "grad_norm": 2.1286816596984863, - "learning_rate": 9.428189514332394e-06, - "loss": 0.0546, - "step": 46170 - }, - { - "epoch": 3.4293776919649486, - "grad_norm": 0.7147573828697205, - "learning_rate": 9.423733848210307e-06, - "loss": 0.0516, - "step": 46180 - }, - { - "epoch": 3.4301203029852965, - "grad_norm": 2.009298324584961, - "learning_rate": 9.419278182088222e-06, - "loss": 0.0667, - "step": 46190 - }, - { - "epoch": 3.430862914005644, - "grad_norm": 1.3036754131317139, - "learning_rate": 9.414822515966137e-06, - "loss": 0.0738, - "step": 46200 - }, - { - "epoch": 3.4316055250259914, - "grad_norm": 1.2708832025527954, - "learning_rate": 9.410366849844052e-06, - "loss": 0.0378, - "step": 46210 - }, - { - "epoch": 3.432348136046339, - "grad_norm": 1.8291300535202026, - "learning_rate": 9.405911183721966e-06, - "loss": 0.0484, - "step": 46220 - }, - { - "epoch": 3.4330907470666867, - "grad_norm": 3.277575731277466, - "learning_rate": 9.40145551759988e-06, - "loss": 0.0766, - "step": 46230 - }, - { - "epoch": 3.433833358087034, - "grad_norm": 2.973456859588623, - "learning_rate": 9.396999851477797e-06, - "loss": 0.0464, - "step": 46240 - }, - { - "epoch": 3.4345759691073816, - "grad_norm": 1.2118042707443237, - "learning_rate": 9.39254418535571e-06, - "loss": 0.0573, - "step": 46250 - }, - { - "epoch": 3.435318580127729, - "grad_norm": 1.3770191669464111, - "learning_rate": 9.388088519233626e-06, - "loss": 0.0598, - "step": 46260 - }, - { - "epoch": 3.4360611911480765, - "grad_norm": 2.709092855453491, - "learning_rate": 9.38363285311154e-06, - "loss": 0.0743, - "step": 46270 - }, - { - "epoch": 3.4368038021684244, - "grad_norm": 0.41751283407211304, - "learning_rate": 9.379177186989456e-06, - "loss": 0.0687, - "step": 46280 - }, - { - "epoch": 3.437546413188772, - "grad_norm": 0.8437953591346741, - "learning_rate": 9.37472152086737e-06, - "loss": 0.047, - "step": 46290 - }, - { - "epoch": 3.4382890242091193, - "grad_norm": 2.391899824142456, - "learning_rate": 9.370265854745284e-06, - "loss": 0.0499, - "step": 46300 - }, - { - "epoch": 3.4390316352294668, - "grad_norm": 1.9807353019714355, - "learning_rate": 9.3658101886232e-06, - "loss": 0.0596, - "step": 46310 - }, - { - "epoch": 3.439774246249814, - "grad_norm": 0.2767632007598877, - "learning_rate": 9.361354522501114e-06, - "loss": 0.0419, - "step": 46320 - }, - { - "epoch": 3.440516857270162, - "grad_norm": 2.447248697280884, - "learning_rate": 9.356898856379029e-06, - "loss": 0.0679, - "step": 46330 - }, - { - "epoch": 3.4412594682905095, - "grad_norm": 1.484784722328186, - "learning_rate": 9.352443190256942e-06, - "loss": 0.039, - "step": 46340 - }, - { - "epoch": 3.442002079310857, - "grad_norm": 1.6514251232147217, - "learning_rate": 9.347987524134859e-06, - "loss": 0.0346, - "step": 46350 - }, - { - "epoch": 3.4427446903312044, - "grad_norm": 1.2701706886291504, - "learning_rate": 9.343531858012774e-06, - "loss": 0.0298, - "step": 46360 - }, - { - "epoch": 3.443487301351552, - "grad_norm": 6.292190074920654, - "learning_rate": 9.339076191890687e-06, - "loss": 0.0471, - "step": 46370 - }, - { - "epoch": 3.4442299123719, - "grad_norm": 1.1822702884674072, - "learning_rate": 9.334620525768604e-06, - "loss": 0.088, - "step": 46380 - }, - { - "epoch": 3.4449725233922472, - "grad_norm": 2.342872142791748, - "learning_rate": 9.330164859646517e-06, - "loss": 0.0844, - "step": 46390 - }, - { - "epoch": 3.4457151344125947, - "grad_norm": 0.5191331505775452, - "learning_rate": 9.325709193524432e-06, - "loss": 0.0737, - "step": 46400 - }, - { - "epoch": 3.446457745432942, - "grad_norm": 2.7693073749542236, - "learning_rate": 9.321253527402347e-06, - "loss": 0.0634, - "step": 46410 - }, - { - "epoch": 3.4472003564532896, - "grad_norm": 1.8164703845977783, - "learning_rate": 9.316797861280262e-06, - "loss": 0.0518, - "step": 46420 - }, - { - "epoch": 3.4479429674736375, - "grad_norm": 0.6304923295974731, - "learning_rate": 9.312342195158177e-06, - "loss": 0.0483, - "step": 46430 - }, - { - "epoch": 3.448685578493985, - "grad_norm": 0.35260432958602905, - "learning_rate": 9.30788652903609e-06, - "loss": 0.0711, - "step": 46440 - }, - { - "epoch": 3.4494281895143324, - "grad_norm": 1.1337438821792603, - "learning_rate": 9.303430862914005e-06, - "loss": 0.0628, - "step": 46450 - }, - { - "epoch": 3.45017080053468, - "grad_norm": 1.7715853452682495, - "learning_rate": 9.29897519679192e-06, - "loss": 0.0582, - "step": 46460 - }, - { - "epoch": 3.4509134115550273, - "grad_norm": 2.768024206161499, - "learning_rate": 9.294519530669835e-06, - "loss": 0.0729, - "step": 46470 - }, - { - "epoch": 3.451656022575375, - "grad_norm": 0.7756059169769287, - "learning_rate": 9.29006386454775e-06, - "loss": 0.0466, - "step": 46480 - }, - { - "epoch": 3.4523986335957226, - "grad_norm": 1.0126781463623047, - "learning_rate": 9.285608198425664e-06, - "loss": 0.0615, - "step": 46490 - }, - { - "epoch": 3.45314124461607, - "grad_norm": 1.432900071144104, - "learning_rate": 9.28115253230358e-06, - "loss": 0.0481, - "step": 46500 - }, - { - "epoch": 3.4538838556364175, - "grad_norm": 1.1251558065414429, - "learning_rate": 9.276696866181494e-06, - "loss": 0.0573, - "step": 46510 - }, - { - "epoch": 3.454626466656765, - "grad_norm": 1.9688692092895508, - "learning_rate": 9.272241200059409e-06, - "loss": 0.0425, - "step": 46520 - }, - { - "epoch": 3.455369077677113, - "grad_norm": 0.8410460352897644, - "learning_rate": 9.267785533937325e-06, - "loss": 0.0555, - "step": 46530 - }, - { - "epoch": 3.4561116886974603, - "grad_norm": 1.3958379030227661, - "learning_rate": 9.263329867815239e-06, - "loss": 0.0733, - "step": 46540 - }, - { - "epoch": 3.4568542997178078, - "grad_norm": 0.4909604489803314, - "learning_rate": 9.258874201693154e-06, - "loss": 0.0681, - "step": 46550 - }, - { - "epoch": 3.4575969107381552, - "grad_norm": 0.4906344413757324, - "learning_rate": 9.254418535571067e-06, - "loss": 0.0965, - "step": 46560 - }, - { - "epoch": 3.4583395217585027, - "grad_norm": 0.9761192202568054, - "learning_rate": 9.249962869448984e-06, - "loss": 0.049, - "step": 46570 - }, - { - "epoch": 3.4590821327788506, - "grad_norm": 0.2913404107093811, - "learning_rate": 9.245507203326899e-06, - "loss": 0.0506, - "step": 46580 - }, - { - "epoch": 3.459824743799198, - "grad_norm": 1.4044737815856934, - "learning_rate": 9.241051537204812e-06, - "loss": 0.0496, - "step": 46590 - }, - { - "epoch": 3.4605673548195455, - "grad_norm": 0.5915066599845886, - "learning_rate": 9.236595871082727e-06, - "loss": 0.0476, - "step": 46600 - }, - { - "epoch": 3.461309965839893, - "grad_norm": 0.6072288751602173, - "learning_rate": 9.232140204960642e-06, - "loss": 0.0433, - "step": 46610 - }, - { - "epoch": 3.4620525768602404, - "grad_norm": 1.144883632659912, - "learning_rate": 9.227684538838557e-06, - "loss": 0.0593, - "step": 46620 - }, - { - "epoch": 3.4627951878805883, - "grad_norm": 1.200415015220642, - "learning_rate": 9.22322887271647e-06, - "loss": 0.0611, - "step": 46630 - }, - { - "epoch": 3.4635377989009357, - "grad_norm": 1.8944742679595947, - "learning_rate": 9.218773206594385e-06, - "loss": 0.0679, - "step": 46640 - }, - { - "epoch": 3.464280409921283, - "grad_norm": 1.1931655406951904, - "learning_rate": 9.214317540472302e-06, - "loss": 0.0392, - "step": 46650 - }, - { - "epoch": 3.4650230209416306, - "grad_norm": 0.9544970989227295, - "learning_rate": 9.209861874350215e-06, - "loss": 0.0808, - "step": 46660 - }, - { - "epoch": 3.465765631961978, - "grad_norm": 0.6240988373756409, - "learning_rate": 9.20540620822813e-06, - "loss": 0.0622, - "step": 46670 - }, - { - "epoch": 3.466508242982326, - "grad_norm": 0.9956406950950623, - "learning_rate": 9.200950542106045e-06, - "loss": 0.0536, - "step": 46680 - }, - { - "epoch": 3.4672508540026734, - "grad_norm": 1.983014702796936, - "learning_rate": 9.19649487598396e-06, - "loss": 0.0661, - "step": 46690 - }, - { - "epoch": 3.467993465023021, - "grad_norm": 1.8986002206802368, - "learning_rate": 9.192039209861875e-06, - "loss": 0.0885, - "step": 46700 - }, - { - "epoch": 3.4687360760433683, - "grad_norm": 1.382034182548523, - "learning_rate": 9.187583543739788e-06, - "loss": 0.0768, - "step": 46710 - }, - { - "epoch": 3.469478687063716, - "grad_norm": 2.0056140422821045, - "learning_rate": 9.183127877617705e-06, - "loss": 0.0564, - "step": 46720 - }, - { - "epoch": 3.4702212980840637, - "grad_norm": 0.6202702522277832, - "learning_rate": 9.178672211495618e-06, - "loss": 0.0412, - "step": 46730 - }, - { - "epoch": 3.470963909104411, - "grad_norm": 2.408010959625244, - "learning_rate": 9.174216545373533e-06, - "loss": 0.0654, - "step": 46740 - }, - { - "epoch": 3.4717065201247586, - "grad_norm": 1.9785070419311523, - "learning_rate": 9.169760879251447e-06, - "loss": 0.0879, - "step": 46750 - }, - { - "epoch": 3.472449131145106, - "grad_norm": 1.066245675086975, - "learning_rate": 9.165305213129363e-06, - "loss": 0.0434, - "step": 46760 - }, - { - "epoch": 3.473191742165454, - "grad_norm": 0.1916639357805252, - "learning_rate": 9.160849547007278e-06, - "loss": 0.0524, - "step": 46770 - }, - { - "epoch": 3.4739343531858013, - "grad_norm": 2.890707492828369, - "learning_rate": 9.156393880885192e-06, - "loss": 0.081, - "step": 46780 - }, - { - "epoch": 3.474676964206149, - "grad_norm": 1.435386061668396, - "learning_rate": 9.151938214763108e-06, - "loss": 0.0458, - "step": 46790 - }, - { - "epoch": 3.4754195752264962, - "grad_norm": 1.8877719640731812, - "learning_rate": 9.147482548641022e-06, - "loss": 0.0704, - "step": 46800 - }, - { - "epoch": 3.476162186246844, - "grad_norm": 2.4223110675811768, - "learning_rate": 9.143026882518937e-06, - "loss": 0.0615, - "step": 46810 - }, - { - "epoch": 3.4769047972671916, - "grad_norm": 0.5216322541236877, - "learning_rate": 9.138571216396852e-06, - "loss": 0.0584, - "step": 46820 - }, - { - "epoch": 3.477647408287539, - "grad_norm": 2.4628725051879883, - "learning_rate": 9.134115550274767e-06, - "loss": 0.0849, - "step": 46830 - }, - { - "epoch": 3.4783900193078865, - "grad_norm": 1.662915825843811, - "learning_rate": 9.129659884152682e-06, - "loss": 0.0587, - "step": 46840 - }, - { - "epoch": 3.479132630328234, - "grad_norm": 2.0761513710021973, - "learning_rate": 9.125204218030595e-06, - "loss": 0.0419, - "step": 46850 - }, - { - "epoch": 3.479875241348582, - "grad_norm": 0.843273401260376, - "learning_rate": 9.12074855190851e-06, - "loss": 0.0633, - "step": 46860 - }, - { - "epoch": 3.4806178523689293, - "grad_norm": 2.208324432373047, - "learning_rate": 9.116292885786427e-06, - "loss": 0.0575, - "step": 46870 - }, - { - "epoch": 3.4813604633892767, - "grad_norm": 2.6127047538757324, - "learning_rate": 9.11183721966434e-06, - "loss": 0.0774, - "step": 46880 - }, - { - "epoch": 3.482103074409624, - "grad_norm": 0.6786608099937439, - "learning_rate": 9.107381553542255e-06, - "loss": 0.0691, - "step": 46890 - }, - { - "epoch": 3.4828456854299716, - "grad_norm": 3.497749090194702, - "learning_rate": 9.102925887420168e-06, - "loss": 0.0899, - "step": 46900 - }, - { - "epoch": 3.4835882964503195, - "grad_norm": 0.41955363750457764, - "learning_rate": 9.098470221298085e-06, - "loss": 0.0617, - "step": 46910 - }, - { - "epoch": 3.484330907470667, - "grad_norm": 0.7046768069267273, - "learning_rate": 9.094014555175998e-06, - "loss": 0.0789, - "step": 46920 - }, - { - "epoch": 3.4850735184910144, - "grad_norm": 1.7392123937606812, - "learning_rate": 9.089558889053913e-06, - "loss": 0.0574, - "step": 46930 - }, - { - "epoch": 3.485816129511362, - "grad_norm": 0.9724016189575195, - "learning_rate": 9.08510322293183e-06, - "loss": 0.0793, - "step": 46940 - }, - { - "epoch": 3.4865587405317093, - "grad_norm": 1.3222392797470093, - "learning_rate": 9.080647556809743e-06, - "loss": 0.0675, - "step": 46950 - }, - { - "epoch": 3.4873013515520572, - "grad_norm": 1.0925577878952026, - "learning_rate": 9.076191890687658e-06, - "loss": 0.0428, - "step": 46960 - }, - { - "epoch": 3.4880439625724047, - "grad_norm": 0.7402594685554504, - "learning_rate": 9.071736224565572e-06, - "loss": 0.0559, - "step": 46970 - }, - { - "epoch": 3.488786573592752, - "grad_norm": 2.0264675617218018, - "learning_rate": 9.067280558443488e-06, - "loss": 0.0425, - "step": 46980 - }, - { - "epoch": 3.4895291846130996, - "grad_norm": 1.4667295217514038, - "learning_rate": 9.062824892321403e-06, - "loss": 0.0659, - "step": 46990 - }, - { - "epoch": 3.490271795633447, - "grad_norm": 2.150097131729126, - "learning_rate": 9.058369226199317e-06, - "loss": 0.0656, - "step": 47000 - }, - { - "epoch": 3.491014406653795, - "grad_norm": 0.9886521100997925, - "learning_rate": 9.053913560077232e-06, - "loss": 0.0598, - "step": 47010 - }, - { - "epoch": 3.4917570176741424, - "grad_norm": 1.4608570337295532, - "learning_rate": 9.049457893955147e-06, - "loss": 0.1056, - "step": 47020 - }, - { - "epoch": 3.49249962869449, - "grad_norm": 0.7766015529632568, - "learning_rate": 9.045002227833061e-06, - "loss": 0.0371, - "step": 47030 - }, - { - "epoch": 3.4932422397148373, - "grad_norm": 0.7282046675682068, - "learning_rate": 9.040546561710975e-06, - "loss": 0.0582, - "step": 47040 - }, - { - "epoch": 3.4939848507351847, - "grad_norm": 2.043895721435547, - "learning_rate": 9.036090895588891e-06, - "loss": 0.0516, - "step": 47050 - }, - { - "epoch": 3.4947274617555326, - "grad_norm": 0.4283212721347809, - "learning_rate": 9.031635229466806e-06, - "loss": 0.0603, - "step": 47060 - }, - { - "epoch": 3.49547007277588, - "grad_norm": 1.2481293678283691, - "learning_rate": 9.02717956334472e-06, - "loss": 0.0381, - "step": 47070 - }, - { - "epoch": 3.4962126837962275, - "grad_norm": 1.1199532747268677, - "learning_rate": 9.022723897222635e-06, - "loss": 0.0837, - "step": 47080 - }, - { - "epoch": 3.496955294816575, - "grad_norm": 1.4884730577468872, - "learning_rate": 9.01826823110055e-06, - "loss": 0.0265, - "step": 47090 - }, - { - "epoch": 3.4976979058369224, - "grad_norm": 0.7303683757781982, - "learning_rate": 9.013812564978465e-06, - "loss": 0.0711, - "step": 47100 - }, - { - "epoch": 3.4984405168572703, - "grad_norm": 1.0589395761489868, - "learning_rate": 9.00935689885638e-06, - "loss": 0.0711, - "step": 47110 - }, - { - "epoch": 3.4991831278776178, - "grad_norm": 0.7774037718772888, - "learning_rate": 9.004901232734293e-06, - "loss": 0.0543, - "step": 47120 - }, - { - "epoch": 3.499925738897965, - "grad_norm": 1.6601343154907227, - "learning_rate": 9.00044556661221e-06, - "loss": 0.0608, - "step": 47130 - }, - { - "epoch": 3.5006683499183127, - "grad_norm": 0.5749710202217102, - "learning_rate": 8.995989900490123e-06, - "loss": 0.0565, - "step": 47140 - }, - { - "epoch": 3.50141096093866, - "grad_norm": 1.1628215312957764, - "learning_rate": 8.991534234368038e-06, - "loss": 0.0382, - "step": 47150 - }, - { - "epoch": 3.502153571959008, - "grad_norm": 1.716511607170105, - "learning_rate": 8.987078568245953e-06, - "loss": 0.0662, - "step": 47160 - }, - { - "epoch": 3.5028961829793555, - "grad_norm": 1.7500982284545898, - "learning_rate": 8.982622902123868e-06, - "loss": 0.0447, - "step": 47170 - }, - { - "epoch": 3.503638793999703, - "grad_norm": 1.2955238819122314, - "learning_rate": 8.978167236001783e-06, - "loss": 0.0674, - "step": 47180 - }, - { - "epoch": 3.5043814050200504, - "grad_norm": 0.32319340109825134, - "learning_rate": 8.973711569879696e-06, - "loss": 0.0294, - "step": 47190 - }, - { - "epoch": 3.505124016040398, - "grad_norm": 1.2012195587158203, - "learning_rate": 8.969255903757613e-06, - "loss": 0.085, - "step": 47200 - }, - { - "epoch": 3.5058666270607457, - "grad_norm": 1.1960065364837646, - "learning_rate": 8.964800237635526e-06, - "loss": 0.0697, - "step": 47210 - }, - { - "epoch": 3.506609238081093, - "grad_norm": 0.929478108882904, - "learning_rate": 8.960344571513441e-06, - "loss": 0.0367, - "step": 47220 - }, - { - "epoch": 3.5073518491014406, - "grad_norm": 0.9691451787948608, - "learning_rate": 8.955888905391356e-06, - "loss": 0.0622, - "step": 47230 - }, - { - "epoch": 3.508094460121788, - "grad_norm": 1.1364026069641113, - "learning_rate": 8.951433239269271e-06, - "loss": 0.0581, - "step": 47240 - }, - { - "epoch": 3.5088370711421355, - "grad_norm": 1.0724977254867554, - "learning_rate": 8.946977573147186e-06, - "loss": 0.06, - "step": 47250 - }, - { - "epoch": 3.5095796821624834, - "grad_norm": 2.1212868690490723, - "learning_rate": 8.9425219070251e-06, - "loss": 0.0663, - "step": 47260 - }, - { - "epoch": 3.510322293182831, - "grad_norm": 0.43843552470207214, - "learning_rate": 8.938066240903015e-06, - "loss": 0.0727, - "step": 47270 - }, - { - "epoch": 3.5110649042031783, - "grad_norm": 1.7042205333709717, - "learning_rate": 8.933610574780931e-06, - "loss": 0.0521, - "step": 47280 - }, - { - "epoch": 3.511807515223526, - "grad_norm": 1.1995595693588257, - "learning_rate": 8.929154908658845e-06, - "loss": 0.0867, - "step": 47290 - }, - { - "epoch": 3.512550126243873, - "grad_norm": 0.9248149394989014, - "learning_rate": 8.92469924253676e-06, - "loss": 0.0627, - "step": 47300 - }, - { - "epoch": 3.513292737264221, - "grad_norm": 0.9813995957374573, - "learning_rate": 8.920243576414675e-06, - "loss": 0.0924, - "step": 47310 - }, - { - "epoch": 3.5140353482845685, - "grad_norm": 1.9305483102798462, - "learning_rate": 8.91578791029259e-06, - "loss": 0.054, - "step": 47320 - }, - { - "epoch": 3.514777959304916, - "grad_norm": 0.959563672542572, - "learning_rate": 8.911332244170503e-06, - "loss": 0.0512, - "step": 47330 - }, - { - "epoch": 3.515520570325264, - "grad_norm": 0.4607963263988495, - "learning_rate": 8.906876578048418e-06, - "loss": 0.0822, - "step": 47340 - }, - { - "epoch": 3.5162631813456113, - "grad_norm": 0.6391094923019409, - "learning_rate": 8.902420911926335e-06, - "loss": 0.0722, - "step": 47350 - }, - { - "epoch": 3.517005792365959, - "grad_norm": 1.5800341367721558, - "learning_rate": 8.897965245804248e-06, - "loss": 0.0268, - "step": 47360 - }, - { - "epoch": 3.5177484033863062, - "grad_norm": 0.609835147857666, - "learning_rate": 8.893509579682163e-06, - "loss": 0.0625, - "step": 47370 - }, - { - "epoch": 3.5184910144066537, - "grad_norm": 2.046144723892212, - "learning_rate": 8.889053913560076e-06, - "loss": 0.0691, - "step": 47380 - }, - { - "epoch": 3.5192336254270016, - "grad_norm": 1.5540525913238525, - "learning_rate": 8.884598247437993e-06, - "loss": 0.0637, - "step": 47390 - }, - { - "epoch": 3.519976236447349, - "grad_norm": 0.6543061137199402, - "learning_rate": 8.880142581315908e-06, - "loss": 0.0655, - "step": 47400 - }, - { - "epoch": 3.5207188474676965, - "grad_norm": 0.7677350640296936, - "learning_rate": 8.875686915193821e-06, - "loss": 0.0481, - "step": 47410 - }, - { - "epoch": 3.521461458488044, - "grad_norm": 0.8277533054351807, - "learning_rate": 8.871231249071736e-06, - "loss": 0.0858, - "step": 47420 - }, - { - "epoch": 3.5222040695083914, - "grad_norm": 0.4589194059371948, - "learning_rate": 8.866775582949651e-06, - "loss": 0.0333, - "step": 47430 - }, - { - "epoch": 3.5229466805287393, - "grad_norm": 1.8496214151382446, - "learning_rate": 8.862319916827566e-06, - "loss": 0.0732, - "step": 47440 - }, - { - "epoch": 3.5236892915490867, - "grad_norm": 2.326258659362793, - "learning_rate": 8.85786425070548e-06, - "loss": 0.0875, - "step": 47450 - }, - { - "epoch": 3.524431902569434, - "grad_norm": 3.8180079460144043, - "learning_rate": 8.853408584583396e-06, - "loss": 0.0932, - "step": 47460 - }, - { - "epoch": 3.5251745135897816, - "grad_norm": 2.513268232345581, - "learning_rate": 8.848952918461311e-06, - "loss": 0.065, - "step": 47470 - }, - { - "epoch": 3.525917124610129, - "grad_norm": 0.91473788022995, - "learning_rate": 8.844497252339224e-06, - "loss": 0.068, - "step": 47480 - }, - { - "epoch": 3.526659735630477, - "grad_norm": 1.168892502784729, - "learning_rate": 8.84004158621714e-06, - "loss": 0.0351, - "step": 47490 - }, - { - "epoch": 3.5274023466508244, - "grad_norm": 0.857007622718811, - "learning_rate": 8.835585920095054e-06, - "loss": 0.08, - "step": 47500 - }, - { - "epoch": 3.528144957671172, - "grad_norm": 1.0127304792404175, - "learning_rate": 8.83113025397297e-06, - "loss": 0.0736, - "step": 47510 - }, - { - "epoch": 3.5288875686915193, - "grad_norm": 1.726643681526184, - "learning_rate": 8.826674587850884e-06, - "loss": 0.0578, - "step": 47520 - }, - { - "epoch": 3.5296301797118668, - "grad_norm": 1.7665687799453735, - "learning_rate": 8.822218921728798e-06, - "loss": 0.061, - "step": 47530 - }, - { - "epoch": 3.5303727907322147, - "grad_norm": 0.5315186977386475, - "learning_rate": 8.817763255606714e-06, - "loss": 0.0641, - "step": 47540 - }, - { - "epoch": 3.531115401752562, - "grad_norm": 1.4870011806488037, - "learning_rate": 8.813307589484628e-06, - "loss": 0.0726, - "step": 47550 - }, - { - "epoch": 3.5318580127729096, - "grad_norm": 1.0774098634719849, - "learning_rate": 8.808851923362543e-06, - "loss": 0.0726, - "step": 47560 - }, - { - "epoch": 3.532600623793257, - "grad_norm": 2.8988194465637207, - "learning_rate": 8.80439625724046e-06, - "loss": 0.0568, - "step": 47570 - }, - { - "epoch": 3.5333432348136045, - "grad_norm": 0.4730290472507477, - "learning_rate": 8.799940591118373e-06, - "loss": 0.053, - "step": 47580 - }, - { - "epoch": 3.5340858458339524, - "grad_norm": 1.4914735555648804, - "learning_rate": 8.795484924996288e-06, - "loss": 0.0601, - "step": 47590 - }, - { - "epoch": 3.5348284568543, - "grad_norm": 1.6329556703567505, - "learning_rate": 8.791029258874201e-06, - "loss": 0.0553, - "step": 47600 - }, - { - "epoch": 3.5355710678746473, - "grad_norm": 2.3614673614501953, - "learning_rate": 8.786573592752118e-06, - "loss": 0.0744, - "step": 47610 - }, - { - "epoch": 3.5363136788949947, - "grad_norm": 2.2033894062042236, - "learning_rate": 8.782117926630031e-06, - "loss": 0.0615, - "step": 47620 - }, - { - "epoch": 3.537056289915342, - "grad_norm": 0.42361772060394287, - "learning_rate": 8.777662260507946e-06, - "loss": 0.0494, - "step": 47630 - }, - { - "epoch": 3.53779890093569, - "grad_norm": 1.1795815229415894, - "learning_rate": 8.773206594385861e-06, - "loss": 0.0633, - "step": 47640 - }, - { - "epoch": 3.5385415119560375, - "grad_norm": 1.4586540460586548, - "learning_rate": 8.768750928263776e-06, - "loss": 0.0525, - "step": 47650 - }, - { - "epoch": 3.539284122976385, - "grad_norm": 0.6656326055526733, - "learning_rate": 8.764295262141691e-06, - "loss": 0.0831, - "step": 47660 - }, - { - "epoch": 3.5400267339967324, - "grad_norm": 0.5945910215377808, - "learning_rate": 8.759839596019604e-06, - "loss": 0.0444, - "step": 47670 - }, - { - "epoch": 3.54076934501708, - "grad_norm": 1.4914181232452393, - "learning_rate": 8.75538392989752e-06, - "loss": 0.0344, - "step": 47680 - }, - { - "epoch": 3.5415119560374277, - "grad_norm": 1.5932813882827759, - "learning_rate": 8.750928263775436e-06, - "loss": 0.0624, - "step": 47690 - }, - { - "epoch": 3.542254567057775, - "grad_norm": 1.4807531833648682, - "learning_rate": 8.74647259765335e-06, - "loss": 0.0671, - "step": 47700 - }, - { - "epoch": 3.5429971780781226, - "grad_norm": 3.4547231197357178, - "learning_rate": 8.742016931531264e-06, - "loss": 0.0489, - "step": 47710 - }, - { - "epoch": 3.54373978909847, - "grad_norm": 1.1375788450241089, - "learning_rate": 8.73756126540918e-06, - "loss": 0.0641, - "step": 47720 - }, - { - "epoch": 3.5444824001188175, - "grad_norm": 1.481046199798584, - "learning_rate": 8.733105599287094e-06, - "loss": 0.0676, - "step": 47730 - }, - { - "epoch": 3.5452250111391654, - "grad_norm": 0.4229665994644165, - "learning_rate": 8.728649933165007e-06, - "loss": 0.0647, - "step": 47740 - }, - { - "epoch": 3.545967622159513, - "grad_norm": 1.467894196510315, - "learning_rate": 8.724194267042922e-06, - "loss": 0.0597, - "step": 47750 - }, - { - "epoch": 3.5467102331798603, - "grad_norm": 0.5830600261688232, - "learning_rate": 8.719738600920839e-06, - "loss": 0.0649, - "step": 47760 - }, - { - "epoch": 3.547452844200208, - "grad_norm": 1.7029845714569092, - "learning_rate": 8.715282934798752e-06, - "loss": 0.104, - "step": 47770 - }, - { - "epoch": 3.5481954552205552, - "grad_norm": 0.429775208234787, - "learning_rate": 8.710827268676667e-06, - "loss": 0.0556, - "step": 47780 - }, - { - "epoch": 3.548938066240903, - "grad_norm": 2.0149717330932617, - "learning_rate": 8.70637160255458e-06, - "loss": 0.0864, - "step": 47790 - }, - { - "epoch": 3.5496806772612506, - "grad_norm": 3.289201498031616, - "learning_rate": 8.701915936432497e-06, - "loss": 0.077, - "step": 47800 - }, - { - "epoch": 3.550423288281598, - "grad_norm": 0.7623452544212341, - "learning_rate": 8.697460270310412e-06, - "loss": 0.063, - "step": 47810 - }, - { - "epoch": 3.5511658993019455, - "grad_norm": 1.59382164478302, - "learning_rate": 8.693004604188326e-06, - "loss": 0.0783, - "step": 47820 - }, - { - "epoch": 3.551908510322293, - "grad_norm": 1.199479579925537, - "learning_rate": 8.688548938066242e-06, - "loss": 0.0654, - "step": 47830 - }, - { - "epoch": 3.552651121342641, - "grad_norm": 0.7450114488601685, - "learning_rate": 8.684093271944156e-06, - "loss": 0.0612, - "step": 47840 - }, - { - "epoch": 3.5533937323629883, - "grad_norm": 2.706019163131714, - "learning_rate": 8.67963760582207e-06, - "loss": 0.0717, - "step": 47850 - }, - { - "epoch": 3.5541363433833357, - "grad_norm": 1.1404179334640503, - "learning_rate": 8.675181939699986e-06, - "loss": 0.0612, - "step": 47860 - }, - { - "epoch": 3.5548789544036836, - "grad_norm": 4.893725872039795, - "learning_rate": 8.6707262735779e-06, - "loss": 0.0612, - "step": 47870 - }, - { - "epoch": 3.5556215654240306, - "grad_norm": 2.2265067100524902, - "learning_rate": 8.666270607455816e-06, - "loss": 0.0517, - "step": 47880 - }, - { - "epoch": 3.5563641764443785, - "grad_norm": 0.6842568516731262, - "learning_rate": 8.661814941333729e-06, - "loss": 0.0562, - "step": 47890 - }, - { - "epoch": 3.557106787464726, - "grad_norm": 0.7433666586875916, - "learning_rate": 8.657359275211644e-06, - "loss": 0.0586, - "step": 47900 - }, - { - "epoch": 3.5578493984850734, - "grad_norm": 1.5881272554397583, - "learning_rate": 8.652903609089559e-06, - "loss": 0.0667, - "step": 47910 - }, - { - "epoch": 3.5585920095054213, - "grad_norm": 1.4796943664550781, - "learning_rate": 8.648447942967474e-06, - "loss": 0.0797, - "step": 47920 - }, - { - "epoch": 3.5593346205257688, - "grad_norm": 1.468156337738037, - "learning_rate": 8.643992276845389e-06, - "loss": 0.0655, - "step": 47930 - }, - { - "epoch": 3.560077231546116, - "grad_norm": 1.8605856895446777, - "learning_rate": 8.639536610723302e-06, - "loss": 0.0929, - "step": 47940 - }, - { - "epoch": 3.5608198425664637, - "grad_norm": 0.9194366335868835, - "learning_rate": 8.635080944601219e-06, - "loss": 0.0498, - "step": 47950 - }, - { - "epoch": 3.561562453586811, - "grad_norm": 1.5396569967269897, - "learning_rate": 8.630625278479132e-06, - "loss": 0.0706, - "step": 47960 - }, - { - "epoch": 3.562305064607159, - "grad_norm": 2.437840700149536, - "learning_rate": 8.626169612357047e-06, - "loss": 0.0877, - "step": 47970 - }, - { - "epoch": 3.5630476756275065, - "grad_norm": 0.9188141822814941, - "learning_rate": 8.621713946234964e-06, - "loss": 0.0793, - "step": 47980 - }, - { - "epoch": 3.563790286647854, - "grad_norm": 1.7149858474731445, - "learning_rate": 8.617258280112877e-06, - "loss": 0.0615, - "step": 47990 - }, - { - "epoch": 3.5645328976682014, - "grad_norm": 0.9212315082550049, - "learning_rate": 8.612802613990792e-06, - "loss": 0.0857, - "step": 48000 - }, - { - "epoch": 3.565275508688549, - "grad_norm": 1.6391431093215942, - "learning_rate": 8.608346947868706e-06, - "loss": 0.0434, - "step": 48010 - }, - { - "epoch": 3.5660181197088967, - "grad_norm": 0.9591582417488098, - "learning_rate": 8.603891281746622e-06, - "loss": 0.0665, - "step": 48020 - }, - { - "epoch": 3.566760730729244, - "grad_norm": 0.856239914894104, - "learning_rate": 8.599435615624536e-06, - "loss": 0.0416, - "step": 48030 - }, - { - "epoch": 3.5675033417495916, - "grad_norm": 0.6518556475639343, - "learning_rate": 8.59497994950245e-06, - "loss": 0.0264, - "step": 48040 - }, - { - "epoch": 3.568245952769939, - "grad_norm": 1.1841713190078735, - "learning_rate": 8.590524283380365e-06, - "loss": 0.0806, - "step": 48050 - }, - { - "epoch": 3.5689885637902865, - "grad_norm": 0.7646443247795105, - "learning_rate": 8.58606861725828e-06, - "loss": 0.0474, - "step": 48060 - }, - { - "epoch": 3.5697311748106344, - "grad_norm": 2.25919246673584, - "learning_rate": 8.581612951136195e-06, - "loss": 0.0511, - "step": 48070 - }, - { - "epoch": 3.570473785830982, - "grad_norm": 2.300975799560547, - "learning_rate": 8.577157285014109e-06, - "loss": 0.0626, - "step": 48080 - }, - { - "epoch": 3.5712163968513293, - "grad_norm": 0.9677648544311523, - "learning_rate": 8.572701618892024e-06, - "loss": 0.0506, - "step": 48090 - }, - { - "epoch": 3.5719590078716768, - "grad_norm": 2.2813456058502197, - "learning_rate": 8.56824595276994e-06, - "loss": 0.0981, - "step": 48100 - }, - { - "epoch": 3.572701618892024, - "grad_norm": 1.1389786005020142, - "learning_rate": 8.563790286647854e-06, - "loss": 0.045, - "step": 48110 - }, - { - "epoch": 3.573444229912372, - "grad_norm": 0.6446773409843445, - "learning_rate": 8.559334620525769e-06, - "loss": 0.0623, - "step": 48120 - }, - { - "epoch": 3.5741868409327195, - "grad_norm": 0.8095260858535767, - "learning_rate": 8.554878954403684e-06, - "loss": 0.039, - "step": 48130 - }, - { - "epoch": 3.574929451953067, - "grad_norm": 1.6221411228179932, - "learning_rate": 8.550423288281599e-06, - "loss": 0.0335, - "step": 48140 - }, - { - "epoch": 3.5756720629734144, - "grad_norm": 2.590031385421753, - "learning_rate": 8.545967622159512e-06, - "loss": 0.0626, - "step": 48150 - }, - { - "epoch": 3.576414673993762, - "grad_norm": 1.4469174146652222, - "learning_rate": 8.541511956037427e-06, - "loss": 0.0582, - "step": 48160 - }, - { - "epoch": 3.57715728501411, - "grad_norm": 1.5746777057647705, - "learning_rate": 8.537056289915344e-06, - "loss": 0.0842, - "step": 48170 - }, - { - "epoch": 3.5778998960344572, - "grad_norm": 1.6506327390670776, - "learning_rate": 8.532600623793257e-06, - "loss": 0.0394, - "step": 48180 - }, - { - "epoch": 3.5786425070548047, - "grad_norm": 1.0401891469955444, - "learning_rate": 8.528144957671172e-06, - "loss": 0.0653, - "step": 48190 - }, - { - "epoch": 3.579385118075152, - "grad_norm": 1.626905918121338, - "learning_rate": 8.523689291549085e-06, - "loss": 0.0546, - "step": 48200 - }, - { - "epoch": 3.5801277290954996, - "grad_norm": 1.3999053239822388, - "learning_rate": 8.519233625427002e-06, - "loss": 0.0506, - "step": 48210 - }, - { - "epoch": 3.5808703401158475, - "grad_norm": 1.21640944480896, - "learning_rate": 8.514777959304917e-06, - "loss": 0.0772, - "step": 48220 - }, - { - "epoch": 3.581612951136195, - "grad_norm": 0.853153645992279, - "learning_rate": 8.51032229318283e-06, - "loss": 0.0399, - "step": 48230 - }, - { - "epoch": 3.5823555621565424, - "grad_norm": 1.5424240827560425, - "learning_rate": 8.505866627060747e-06, - "loss": 0.0593, - "step": 48240 - }, - { - "epoch": 3.58309817317689, - "grad_norm": 0.3190561830997467, - "learning_rate": 8.50141096093866e-06, - "loss": 0.0463, - "step": 48250 - }, - { - "epoch": 3.5838407841972373, - "grad_norm": 1.145538568496704, - "learning_rate": 8.496955294816575e-06, - "loss": 0.044, - "step": 48260 - }, - { - "epoch": 3.584583395217585, - "grad_norm": 1.7267332077026367, - "learning_rate": 8.49249962869449e-06, - "loss": 0.065, - "step": 48270 - }, - { - "epoch": 3.5853260062379326, - "grad_norm": 0.45990875363349915, - "learning_rate": 8.488043962572405e-06, - "loss": 0.0485, - "step": 48280 - }, - { - "epoch": 3.58606861725828, - "grad_norm": 0.5736406445503235, - "learning_rate": 8.48358829645032e-06, - "loss": 0.0597, - "step": 48290 - }, - { - "epoch": 3.5868112282786275, - "grad_norm": 0.34725332260131836, - "learning_rate": 8.479132630328234e-06, - "loss": 0.0634, - "step": 48300 - }, - { - "epoch": 3.587553839298975, - "grad_norm": 2.8280141353607178, - "learning_rate": 8.474676964206149e-06, - "loss": 0.0553, - "step": 48310 - }, - { - "epoch": 3.588296450319323, - "grad_norm": 0.43756672739982605, - "learning_rate": 8.470221298084064e-06, - "loss": 0.0565, - "step": 48320 - }, - { - "epoch": 3.5890390613396703, - "grad_norm": 4.297688961029053, - "learning_rate": 8.465765631961979e-06, - "loss": 0.0723, - "step": 48330 - }, - { - "epoch": 3.5897816723600178, - "grad_norm": 1.2074358463287354, - "learning_rate": 8.461309965839894e-06, - "loss": 0.0601, - "step": 48340 - }, - { - "epoch": 3.5905242833803652, - "grad_norm": 1.4937044382095337, - "learning_rate": 8.456854299717807e-06, - "loss": 0.0968, - "step": 48350 - }, - { - "epoch": 3.5912668944007127, - "grad_norm": 1.5273339748382568, - "learning_rate": 8.452398633595724e-06, - "loss": 0.0736, - "step": 48360 - }, - { - "epoch": 3.5920095054210606, - "grad_norm": 2.0641915798187256, - "learning_rate": 8.447942967473637e-06, - "loss": 0.0649, - "step": 48370 - }, - { - "epoch": 3.592752116441408, - "grad_norm": 1.619729995727539, - "learning_rate": 8.443487301351552e-06, - "loss": 0.0689, - "step": 48380 - }, - { - "epoch": 3.5934947274617555, - "grad_norm": 2.8663530349731445, - "learning_rate": 8.439031635229469e-06, - "loss": 0.0823, - "step": 48390 - }, - { - "epoch": 3.594237338482103, - "grad_norm": 0.41677528619766235, - "learning_rate": 8.434575969107382e-06, - "loss": 0.0444, - "step": 48400 - }, - { - "epoch": 3.5949799495024504, - "grad_norm": 1.2534587383270264, - "learning_rate": 8.430120302985297e-06, - "loss": 0.046, - "step": 48410 - }, - { - "epoch": 3.5957225605227983, - "grad_norm": 1.1408332586288452, - "learning_rate": 8.42566463686321e-06, - "loss": 0.0431, - "step": 48420 - }, - { - "epoch": 3.5964651715431457, - "grad_norm": 1.304916262626648, - "learning_rate": 8.421208970741127e-06, - "loss": 0.0525, - "step": 48430 - }, - { - "epoch": 3.597207782563493, - "grad_norm": 1.2031378746032715, - "learning_rate": 8.41675330461904e-06, - "loss": 0.0647, - "step": 48440 - }, - { - "epoch": 3.597950393583841, - "grad_norm": 1.5209531784057617, - "learning_rate": 8.412297638496955e-06, - "loss": 0.0614, - "step": 48450 - }, - { - "epoch": 3.598693004604188, - "grad_norm": 1.3445301055908203, - "learning_rate": 8.40784197237487e-06, - "loss": 0.0793, - "step": 48460 - }, - { - "epoch": 3.599435615624536, - "grad_norm": 0.48478415608406067, - "learning_rate": 8.403386306252785e-06, - "loss": 0.0781, - "step": 48470 - }, - { - "epoch": 3.6001782266448834, - "grad_norm": 0.9039621353149414, - "learning_rate": 8.3989306401307e-06, - "loss": 0.0734, - "step": 48480 - }, - { - "epoch": 3.600920837665231, - "grad_norm": 1.992552638053894, - "learning_rate": 8.394474974008613e-06, - "loss": 0.0582, - "step": 48490 - }, - { - "epoch": 3.6016634486855788, - "grad_norm": 1.8207603693008423, - "learning_rate": 8.39001930788653e-06, - "loss": 0.0443, - "step": 48500 - }, - { - "epoch": 3.602406059705926, - "grad_norm": 1.5029350519180298, - "learning_rate": 8.385563641764445e-06, - "loss": 0.0553, - "step": 48510 - }, - { - "epoch": 3.6031486707262737, - "grad_norm": 1.8282309770584106, - "learning_rate": 8.381107975642358e-06, - "loss": 0.0408, - "step": 48520 - }, - { - "epoch": 3.603891281746621, - "grad_norm": 1.2360031604766846, - "learning_rate": 8.376652309520273e-06, - "loss": 0.0606, - "step": 48530 - }, - { - "epoch": 3.6046338927669686, - "grad_norm": 2.7400107383728027, - "learning_rate": 8.372196643398188e-06, - "loss": 0.0532, - "step": 48540 - }, - { - "epoch": 3.6053765037873164, - "grad_norm": 1.5960508584976196, - "learning_rate": 8.367740977276103e-06, - "loss": 0.0641, - "step": 48550 - }, - { - "epoch": 3.606119114807664, - "grad_norm": 1.4296334981918335, - "learning_rate": 8.363285311154018e-06, - "loss": 0.0749, - "step": 48560 - }, - { - "epoch": 3.6068617258280113, - "grad_norm": 2.3336386680603027, - "learning_rate": 8.358829645031932e-06, - "loss": 0.0739, - "step": 48570 - }, - { - "epoch": 3.607604336848359, - "grad_norm": 1.2750415802001953, - "learning_rate": 8.354373978909848e-06, - "loss": 0.0661, - "step": 48580 - }, - { - "epoch": 3.6083469478687062, - "grad_norm": 1.1788580417633057, - "learning_rate": 8.349918312787762e-06, - "loss": 0.0629, - "step": 48590 - }, - { - "epoch": 3.609089558889054, - "grad_norm": 1.36868155002594, - "learning_rate": 8.345462646665677e-06, - "loss": 0.0625, - "step": 48600 - }, - { - "epoch": 3.6098321699094016, - "grad_norm": 1.373689889907837, - "learning_rate": 8.34100698054359e-06, - "loss": 0.0562, - "step": 48610 - }, - { - "epoch": 3.610574780929749, - "grad_norm": 2.2986576557159424, - "learning_rate": 8.336551314421507e-06, - "loss": 0.0644, - "step": 48620 - }, - { - "epoch": 3.6113173919500965, - "grad_norm": 0.9667069911956787, - "learning_rate": 8.332095648299422e-06, - "loss": 0.0964, - "step": 48630 - }, - { - "epoch": 3.612060002970444, - "grad_norm": 1.5480561256408691, - "learning_rate": 8.327639982177335e-06, - "loss": 0.0456, - "step": 48640 - }, - { - "epoch": 3.612802613990792, - "grad_norm": 2.361801862716675, - "learning_rate": 8.323184316055252e-06, - "loss": 0.0599, - "step": 48650 - }, - { - "epoch": 3.6135452250111393, - "grad_norm": 0.5613052845001221, - "learning_rate": 8.318728649933165e-06, - "loss": 0.0428, - "step": 48660 - }, - { - "epoch": 3.6142878360314867, - "grad_norm": 2.242196559906006, - "learning_rate": 8.31427298381108e-06, - "loss": 0.0627, - "step": 48670 - }, - { - "epoch": 3.615030447051834, - "grad_norm": 0.8085373640060425, - "learning_rate": 8.309817317688995e-06, - "loss": 0.0531, - "step": 48680 - }, - { - "epoch": 3.6157730580721816, - "grad_norm": 1.4560467004776, - "learning_rate": 8.30536165156691e-06, - "loss": 0.054, - "step": 48690 - }, - { - "epoch": 3.6165156690925295, - "grad_norm": 1.3971903324127197, - "learning_rate": 8.300905985444825e-06, - "loss": 0.039, - "step": 48700 - }, - { - "epoch": 3.617258280112877, - "grad_norm": 1.9902868270874023, - "learning_rate": 8.296450319322738e-06, - "loss": 0.0625, - "step": 48710 - }, - { - "epoch": 3.6180008911332244, - "grad_norm": 2.5948617458343506, - "learning_rate": 8.291994653200653e-06, - "loss": 0.0437, - "step": 48720 - }, - { - "epoch": 3.618743502153572, - "grad_norm": 1.1913443803787231, - "learning_rate": 8.287538987078568e-06, - "loss": 0.0699, - "step": 48730 - }, - { - "epoch": 3.6194861131739193, - "grad_norm": 1.5906063318252563, - "learning_rate": 8.283083320956483e-06, - "loss": 0.0839, - "step": 48740 - }, - { - "epoch": 3.6202287241942672, - "grad_norm": 1.980405569076538, - "learning_rate": 8.278627654834398e-06, - "loss": 0.0694, - "step": 48750 - }, - { - "epoch": 3.6209713352146147, - "grad_norm": 0.5400698781013489, - "learning_rate": 8.274171988712313e-06, - "loss": 0.065, - "step": 48760 - }, - { - "epoch": 3.621713946234962, - "grad_norm": 2.1848058700561523, - "learning_rate": 8.269716322590228e-06, - "loss": 0.0694, - "step": 48770 - }, - { - "epoch": 3.6224565572553096, - "grad_norm": 1.3808951377868652, - "learning_rate": 8.265260656468141e-06, - "loss": 0.0496, - "step": 48780 - }, - { - "epoch": 3.623199168275657, - "grad_norm": 1.0675551891326904, - "learning_rate": 8.260804990346056e-06, - "loss": 0.0807, - "step": 48790 - }, - { - "epoch": 3.623941779296005, - "grad_norm": 0.58694988489151, - "learning_rate": 8.256349324223973e-06, - "loss": 0.0794, - "step": 48800 - }, - { - "epoch": 3.6246843903163524, - "grad_norm": 1.7244460582733154, - "learning_rate": 8.251893658101886e-06, - "loss": 0.0631, - "step": 48810 - }, - { - "epoch": 3.6254270013367, - "grad_norm": 0.8975712060928345, - "learning_rate": 8.247437991979801e-06, - "loss": 0.0395, - "step": 48820 - }, - { - "epoch": 3.6261696123570473, - "grad_norm": 0.6970779299736023, - "learning_rate": 8.242982325857715e-06, - "loss": 0.0548, - "step": 48830 - }, - { - "epoch": 3.6269122233773947, - "grad_norm": 3.2898783683776855, - "learning_rate": 8.238526659735631e-06, - "loss": 0.0594, - "step": 48840 - }, - { - "epoch": 3.6276548343977426, - "grad_norm": 1.9671893119812012, - "learning_rate": 8.234070993613545e-06, - "loss": 0.0702, - "step": 48850 - }, - { - "epoch": 3.62839744541809, - "grad_norm": 0.6699275970458984, - "learning_rate": 8.22961532749146e-06, - "loss": 0.0674, - "step": 48860 - }, - { - "epoch": 3.6291400564384375, - "grad_norm": 1.7382103204727173, - "learning_rate": 8.225159661369375e-06, - "loss": 0.0578, - "step": 48870 - }, - { - "epoch": 3.629882667458785, - "grad_norm": 3.2885706424713135, - "learning_rate": 8.22070399524729e-06, - "loss": 0.0565, - "step": 48880 - }, - { - "epoch": 3.6306252784791324, - "grad_norm": 1.1703078746795654, - "learning_rate": 8.216248329125205e-06, - "loss": 0.0696, - "step": 48890 - }, - { - "epoch": 3.6313678894994803, - "grad_norm": 0.3696410059928894, - "learning_rate": 8.211792663003118e-06, - "loss": 0.0693, - "step": 48900 - }, - { - "epoch": 3.6321105005198278, - "grad_norm": 2.525710105895996, - "learning_rate": 8.207336996881035e-06, - "loss": 0.0648, - "step": 48910 - }, - { - "epoch": 3.632853111540175, - "grad_norm": 1.2038122415542603, - "learning_rate": 8.20288133075895e-06, - "loss": 0.0612, - "step": 48920 - }, - { - "epoch": 3.6335957225605227, - "grad_norm": 2.1405258178710938, - "learning_rate": 8.198425664636863e-06, - "loss": 0.0607, - "step": 48930 - }, - { - "epoch": 3.63433833358087, - "grad_norm": 1.3878464698791504, - "learning_rate": 8.193969998514778e-06, - "loss": 0.0556, - "step": 48940 - }, - { - "epoch": 3.635080944601218, - "grad_norm": 0.8806987404823303, - "learning_rate": 8.189514332392693e-06, - "loss": 0.0496, - "step": 48950 - }, - { - "epoch": 3.6358235556215655, - "grad_norm": 0.7001392841339111, - "learning_rate": 8.185058666270608e-06, - "loss": 0.0623, - "step": 48960 - }, - { - "epoch": 3.636566166641913, - "grad_norm": 1.2697521448135376, - "learning_rate": 8.180603000148523e-06, - "loss": 0.0804, - "step": 48970 - }, - { - "epoch": 3.6373087776622604, - "grad_norm": 0.7916688323020935, - "learning_rate": 8.176147334026436e-06, - "loss": 0.0562, - "step": 48980 - }, - { - "epoch": 3.638051388682608, - "grad_norm": 1.4177652597427368, - "learning_rate": 8.171691667904353e-06, - "loss": 0.0454, - "step": 48990 - }, - { - "epoch": 3.6387939997029557, - "grad_norm": 2.105250120162964, - "learning_rate": 8.167236001782266e-06, - "loss": 0.0614, - "step": 49000 - }, - { - "epoch": 3.639536610723303, - "grad_norm": 1.3366777896881104, - "learning_rate": 8.162780335660181e-06, - "loss": 0.0743, - "step": 49010 - }, - { - "epoch": 3.6402792217436506, - "grad_norm": 0.6253573298454285, - "learning_rate": 8.158324669538096e-06, - "loss": 0.0649, - "step": 49020 - }, - { - "epoch": 3.6410218327639985, - "grad_norm": 0.4928571879863739, - "learning_rate": 8.153869003416011e-06, - "loss": 0.069, - "step": 49030 - }, - { - "epoch": 3.6417644437843455, - "grad_norm": 0.9008927345275879, - "learning_rate": 8.149413337293926e-06, - "loss": 0.0508, - "step": 49040 - }, - { - "epoch": 3.6425070548046934, - "grad_norm": 0.6431163549423218, - "learning_rate": 8.14495767117184e-06, - "loss": 0.0758, - "step": 49050 - }, - { - "epoch": 3.643249665825041, - "grad_norm": 3.3478305339813232, - "learning_rate": 8.140502005049756e-06, - "loss": 0.0692, - "step": 49060 - }, - { - "epoch": 3.6439922768453883, - "grad_norm": 1.4182404279708862, - "learning_rate": 8.13604633892767e-06, - "loss": 0.0813, - "step": 49070 - }, - { - "epoch": 3.644734887865736, - "grad_norm": 0.8223309516906738, - "learning_rate": 8.131590672805584e-06, - "loss": 0.0589, - "step": 49080 - }, - { - "epoch": 3.6454774988860836, - "grad_norm": 3.398197889328003, - "learning_rate": 8.1271350066835e-06, - "loss": 0.049, - "step": 49090 - }, - { - "epoch": 3.646220109906431, - "grad_norm": 0.7745069861412048, - "learning_rate": 8.122679340561414e-06, - "loss": 0.0341, - "step": 49100 - }, - { - "epoch": 3.6469627209267785, - "grad_norm": 1.4048975706100464, - "learning_rate": 8.11822367443933e-06, - "loss": 0.0626, - "step": 49110 - }, - { - "epoch": 3.647705331947126, - "grad_norm": 1.0643982887268066, - "learning_rate": 8.113768008317243e-06, - "loss": 0.0543, - "step": 49120 - }, - { - "epoch": 3.648447942967474, - "grad_norm": 2.2764272689819336, - "learning_rate": 8.109312342195158e-06, - "loss": 0.0827, - "step": 49130 - }, - { - "epoch": 3.6491905539878213, - "grad_norm": 1.6373050212860107, - "learning_rate": 8.104856676073073e-06, - "loss": 0.0593, - "step": 49140 - }, - { - "epoch": 3.649933165008169, - "grad_norm": 0.9760125875473022, - "learning_rate": 8.100401009950988e-06, - "loss": 0.0482, - "step": 49150 - }, - { - "epoch": 3.6506757760285162, - "grad_norm": 0.3532949388027191, - "learning_rate": 8.095945343828903e-06, - "loss": 0.0667, - "step": 49160 - }, - { - "epoch": 3.6514183870488637, - "grad_norm": 1.1069635152816772, - "learning_rate": 8.091489677706818e-06, - "loss": 0.054, - "step": 49170 - }, - { - "epoch": 3.6521609980692116, - "grad_norm": 2.3868649005889893, - "learning_rate": 8.087034011584733e-06, - "loss": 0.0543, - "step": 49180 - }, - { - "epoch": 3.652903609089559, - "grad_norm": 2.724820375442505, - "learning_rate": 8.082578345462646e-06, - "loss": 0.0669, - "step": 49190 - }, - { - "epoch": 3.6536462201099065, - "grad_norm": 1.579540491104126, - "learning_rate": 8.078122679340561e-06, - "loss": 0.0574, - "step": 49200 - }, - { - "epoch": 3.654388831130254, - "grad_norm": 1.0479410886764526, - "learning_rate": 8.073667013218478e-06, - "loss": 0.0563, - "step": 49210 - }, - { - "epoch": 3.6551314421506014, - "grad_norm": 0.6656275391578674, - "learning_rate": 8.069211347096391e-06, - "loss": 0.0449, - "step": 49220 - }, - { - "epoch": 3.6558740531709493, - "grad_norm": 0.6518800258636475, - "learning_rate": 8.064755680974306e-06, - "loss": 0.0884, - "step": 49230 - }, - { - "epoch": 3.6566166641912967, - "grad_norm": 0.2461584061384201, - "learning_rate": 8.06030001485222e-06, - "loss": 0.0426, - "step": 49240 - }, - { - "epoch": 3.657359275211644, - "grad_norm": 1.2033755779266357, - "learning_rate": 8.055844348730136e-06, - "loss": 0.052, - "step": 49250 - }, - { - "epoch": 3.6581018862319916, - "grad_norm": 2.62395977973938, - "learning_rate": 8.051388682608051e-06, - "loss": 0.1079, - "step": 49260 - }, - { - "epoch": 3.658844497252339, - "grad_norm": 0.4178759455680847, - "learning_rate": 8.046933016485964e-06, - "loss": 0.0778, - "step": 49270 - }, - { - "epoch": 3.659587108272687, - "grad_norm": 1.5172581672668457, - "learning_rate": 8.042477350363881e-06, - "loss": 0.1025, - "step": 49280 - }, - { - "epoch": 3.6603297192930344, - "grad_norm": 2.6945109367370605, - "learning_rate": 8.038021684241794e-06, - "loss": 0.0959, - "step": 49290 - }, - { - "epoch": 3.661072330313382, - "grad_norm": 1.2761311531066895, - "learning_rate": 8.03356601811971e-06, - "loss": 0.0477, - "step": 49300 - }, - { - "epoch": 3.6618149413337293, - "grad_norm": 0.8883626461029053, - "learning_rate": 8.029110351997623e-06, - "loss": 0.0615, - "step": 49310 - }, - { - "epoch": 3.6625575523540768, - "grad_norm": 0.7210109829902649, - "learning_rate": 8.02465468587554e-06, - "loss": 0.0618, - "step": 49320 - }, - { - "epoch": 3.6633001633744247, - "grad_norm": 1.4874513149261475, - "learning_rate": 8.020199019753454e-06, - "loss": 0.0522, - "step": 49330 - }, - { - "epoch": 3.664042774394772, - "grad_norm": 0.9390859603881836, - "learning_rate": 8.015743353631368e-06, - "loss": 0.0567, - "step": 49340 - }, - { - "epoch": 3.6647853854151196, - "grad_norm": 0.896518349647522, - "learning_rate": 8.011287687509283e-06, - "loss": 0.0589, - "step": 49350 - }, - { - "epoch": 3.665527996435467, - "grad_norm": 2.0531513690948486, - "learning_rate": 8.006832021387198e-06, - "loss": 0.0871, - "step": 49360 - }, - { - "epoch": 3.6662706074558145, - "grad_norm": 1.1506352424621582, - "learning_rate": 8.002376355265113e-06, - "loss": 0.068, - "step": 49370 - }, - { - "epoch": 3.6670132184761624, - "grad_norm": 0.7232264280319214, - "learning_rate": 7.997920689143028e-06, - "loss": 0.0859, - "step": 49380 - }, - { - "epoch": 3.66775582949651, - "grad_norm": 2.220487594604492, - "learning_rate": 7.99346502302094e-06, - "loss": 0.0333, - "step": 49390 - }, - { - "epoch": 3.6684984405168573, - "grad_norm": 0.4155381917953491, - "learning_rate": 7.989009356898858e-06, - "loss": 0.0488, - "step": 49400 - }, - { - "epoch": 3.6692410515372047, - "grad_norm": 0.726426362991333, - "learning_rate": 7.98455369077677e-06, - "loss": 0.0712, - "step": 49410 - }, - { - "epoch": 3.669983662557552, - "grad_norm": 1.152137041091919, - "learning_rate": 7.980098024654686e-06, - "loss": 0.0479, - "step": 49420 - }, - { - "epoch": 3.6707262735779, - "grad_norm": 0.6122065782546997, - "learning_rate": 7.9756423585326e-06, - "loss": 0.0538, - "step": 49430 - }, - { - "epoch": 3.6714688845982475, - "grad_norm": 1.2292253971099854, - "learning_rate": 7.971186692410516e-06, - "loss": 0.046, - "step": 49440 - }, - { - "epoch": 3.672211495618595, - "grad_norm": 0.447689950466156, - "learning_rate": 7.96673102628843e-06, - "loss": 0.0483, - "step": 49450 - }, - { - "epoch": 3.6729541066389424, - "grad_norm": 0.4693281352519989, - "learning_rate": 7.962275360166344e-06, - "loss": 0.0252, - "step": 49460 - }, - { - "epoch": 3.67369671765929, - "grad_norm": 1.050255298614502, - "learning_rate": 7.95781969404426e-06, - "loss": 0.0934, - "step": 49470 - }, - { - "epoch": 3.6744393286796377, - "grad_norm": 1.7105246782302856, - "learning_rate": 7.953364027922174e-06, - "loss": 0.0491, - "step": 49480 - }, - { - "epoch": 3.675181939699985, - "grad_norm": 1.1339107751846313, - "learning_rate": 7.948908361800089e-06, - "loss": 0.0531, - "step": 49490 - }, - { - "epoch": 3.6759245507203326, - "grad_norm": 2.340886116027832, - "learning_rate": 7.944452695678004e-06, - "loss": 0.0547, - "step": 49500 - }, - { - "epoch": 3.67666716174068, - "grad_norm": 0.6771245002746582, - "learning_rate": 7.939997029555919e-06, - "loss": 0.0726, - "step": 49510 - }, - { - "epoch": 3.6774097727610275, - "grad_norm": 1.6597684621810913, - "learning_rate": 7.935541363433834e-06, - "loss": 0.0894, - "step": 49520 - }, - { - "epoch": 3.6781523837813754, - "grad_norm": 2.375394821166992, - "learning_rate": 7.931085697311747e-06, - "loss": 0.0645, - "step": 49530 - }, - { - "epoch": 3.678894994801723, - "grad_norm": 1.4455935955047607, - "learning_rate": 7.926630031189662e-06, - "loss": 0.06, - "step": 49540 - }, - { - "epoch": 3.6796376058220703, - "grad_norm": 0.4095980226993561, - "learning_rate": 7.922174365067577e-06, - "loss": 0.0667, - "step": 49550 - }, - { - "epoch": 3.680380216842418, - "grad_norm": 0.5983967781066895, - "learning_rate": 7.917718698945492e-06, - "loss": 0.0564, - "step": 49560 - }, - { - "epoch": 3.6811228278627652, - "grad_norm": 0.8008362054824829, - "learning_rate": 7.913263032823407e-06, - "loss": 0.0642, - "step": 49570 - }, - { - "epoch": 3.681865438883113, - "grad_norm": 4.587621688842773, - "learning_rate": 7.908807366701322e-06, - "loss": 0.0828, - "step": 49580 - }, - { - "epoch": 3.6826080499034606, - "grad_norm": 2.4655842781066895, - "learning_rate": 7.904351700579237e-06, - "loss": 0.0508, - "step": 49590 - }, - { - "epoch": 3.683350660923808, - "grad_norm": 3.4618539810180664, - "learning_rate": 7.89989603445715e-06, - "loss": 0.0668, - "step": 49600 - }, - { - "epoch": 3.684093271944156, - "grad_norm": 0.5393896698951721, - "learning_rate": 7.895440368335066e-06, - "loss": 0.0231, - "step": 49610 - }, - { - "epoch": 3.684835882964503, - "grad_norm": 2.3107566833496094, - "learning_rate": 7.890984702212982e-06, - "loss": 0.0643, - "step": 49620 - }, - { - "epoch": 3.685578493984851, - "grad_norm": 0.5587324500083923, - "learning_rate": 7.886529036090896e-06, - "loss": 0.0612, - "step": 49630 - }, - { - "epoch": 3.6863211050051983, - "grad_norm": 3.5032103061676025, - "learning_rate": 7.88207336996881e-06, - "loss": 0.0796, - "step": 49640 - }, - { - "epoch": 3.6870637160255457, - "grad_norm": 1.4119980335235596, - "learning_rate": 7.877617703846724e-06, - "loss": 0.0507, - "step": 49650 - }, - { - "epoch": 3.6878063270458936, - "grad_norm": 2.8842852115631104, - "learning_rate": 7.87316203772464e-06, - "loss": 0.0847, - "step": 49660 - }, - { - "epoch": 3.688548938066241, - "grad_norm": 2.1859562397003174, - "learning_rate": 7.868706371602556e-06, - "loss": 0.0462, - "step": 49670 - }, - { - "epoch": 3.6892915490865885, - "grad_norm": 1.2712724208831787, - "learning_rate": 7.864250705480469e-06, - "loss": 0.0603, - "step": 49680 - }, - { - "epoch": 3.690034160106936, - "grad_norm": 0.29489457607269287, - "learning_rate": 7.859795039358386e-06, - "loss": 0.0504, - "step": 49690 - }, - { - "epoch": 3.6907767711272834, - "grad_norm": 1.2111194133758545, - "learning_rate": 7.855339373236299e-06, - "loss": 0.0466, - "step": 49700 - }, - { - "epoch": 3.6915193821476313, - "grad_norm": 0.5714041590690613, - "learning_rate": 7.850883707114214e-06, - "loss": 0.0915, - "step": 49710 - }, - { - "epoch": 3.6922619931679788, - "grad_norm": 1.720264196395874, - "learning_rate": 7.846428040992127e-06, - "loss": 0.0974, - "step": 49720 - }, - { - "epoch": 3.693004604188326, - "grad_norm": 1.2143886089324951, - "learning_rate": 7.841972374870044e-06, - "loss": 0.047, - "step": 49730 - }, - { - "epoch": 3.6937472152086737, - "grad_norm": 1.8155580759048462, - "learning_rate": 7.837516708747959e-06, - "loss": 0.0645, - "step": 49740 - }, - { - "epoch": 3.694489826229021, - "grad_norm": 1.720137596130371, - "learning_rate": 7.833061042625872e-06, - "loss": 0.0603, - "step": 49750 - }, - { - "epoch": 3.695232437249369, - "grad_norm": 1.3623203039169312, - "learning_rate": 7.828605376503787e-06, - "loss": 0.0717, - "step": 49760 - }, - { - "epoch": 3.6959750482697165, - "grad_norm": 0.7780874967575073, - "learning_rate": 7.824149710381702e-06, - "loss": 0.0413, - "step": 49770 - }, - { - "epoch": 3.696717659290064, - "grad_norm": 1.2925565242767334, - "learning_rate": 7.819694044259617e-06, - "loss": 0.0699, - "step": 49780 - }, - { - "epoch": 3.6974602703104114, - "grad_norm": 1.491782546043396, - "learning_rate": 7.815238378137532e-06, - "loss": 0.0693, - "step": 49790 - }, - { - "epoch": 3.698202881330759, - "grad_norm": 1.258483648300171, - "learning_rate": 7.810782712015445e-06, - "loss": 0.0523, - "step": 49800 - }, - { - "epoch": 3.6989454923511067, - "grad_norm": 0.5992489457130432, - "learning_rate": 7.806327045893362e-06, - "loss": 0.0645, - "step": 49810 - }, - { - "epoch": 3.699688103371454, - "grad_norm": 2.4607632160186768, - "learning_rate": 7.801871379771275e-06, - "loss": 0.0682, - "step": 49820 - }, - { - "epoch": 3.7004307143918016, - "grad_norm": 1.1010299921035767, - "learning_rate": 7.79741571364919e-06, - "loss": 0.0794, - "step": 49830 - }, - { - "epoch": 3.701173325412149, - "grad_norm": 3.313181161880493, - "learning_rate": 7.792960047527105e-06, - "loss": 0.0543, - "step": 49840 - }, - { - "epoch": 3.7019159364324965, - "grad_norm": 2.4023070335388184, - "learning_rate": 7.78850438140502e-06, - "loss": 0.0673, - "step": 49850 - }, - { - "epoch": 3.7026585474528444, - "grad_norm": 1.3617714643478394, - "learning_rate": 7.784048715282935e-06, - "loss": 0.0477, - "step": 49860 - }, - { - "epoch": 3.703401158473192, - "grad_norm": 1.4105480909347534, - "learning_rate": 7.779593049160849e-06, - "loss": 0.0659, - "step": 49870 - }, - { - "epoch": 3.7041437694935393, - "grad_norm": 2.565361261367798, - "learning_rate": 7.775137383038765e-06, - "loss": 0.0741, - "step": 49880 - }, - { - "epoch": 3.7048863805138867, - "grad_norm": 1.3081820011138916, - "learning_rate": 7.770681716916679e-06, - "loss": 0.0527, - "step": 49890 - }, - { - "epoch": 3.705628991534234, - "grad_norm": 0.5387427806854248, - "learning_rate": 7.766226050794594e-06, - "loss": 0.0848, - "step": 49900 - }, - { - "epoch": 3.706371602554582, - "grad_norm": 1.5753438472747803, - "learning_rate": 7.761770384672509e-06, - "loss": 0.0789, - "step": 49910 - }, - { - "epoch": 3.7071142135749295, - "grad_norm": 1.9173258543014526, - "learning_rate": 7.757314718550424e-06, - "loss": 0.0863, - "step": 49920 - }, - { - "epoch": 3.707856824595277, - "grad_norm": 0.6267051696777344, - "learning_rate": 7.752859052428339e-06, - "loss": 0.0511, - "step": 49930 - }, - { - "epoch": 3.7085994356156244, - "grad_norm": 0.7196197509765625, - "learning_rate": 7.748403386306252e-06, - "loss": 0.07, - "step": 49940 - }, - { - "epoch": 3.709342046635972, - "grad_norm": 1.6515315771102905, - "learning_rate": 7.743947720184169e-06, - "loss": 0.0634, - "step": 49950 - }, - { - "epoch": 3.71008465765632, - "grad_norm": 1.1136648654937744, - "learning_rate": 7.739492054062084e-06, - "loss": 0.0372, - "step": 49960 - }, - { - "epoch": 3.7108272686766672, - "grad_norm": 1.6340572834014893, - "learning_rate": 7.735036387939997e-06, - "loss": 0.0599, - "step": 49970 - }, - { - "epoch": 3.7115698796970147, - "grad_norm": 1.4177989959716797, - "learning_rate": 7.730580721817912e-06, - "loss": 0.077, - "step": 49980 - }, - { - "epoch": 3.712312490717362, - "grad_norm": 1.4736772775650024, - "learning_rate": 7.726125055695827e-06, - "loss": 0.0872, - "step": 49990 - }, - { - "epoch": 3.7130551017377096, - "grad_norm": 1.0160552263259888, - "learning_rate": 7.721669389573742e-06, - "loss": 0.0432, - "step": 50000 - }, - { - "epoch": 3.7137977127580575, - "grad_norm": 2.048893451690674, - "learning_rate": 7.717213723451655e-06, - "loss": 0.0606, - "step": 50010 - }, - { - "epoch": 3.714540323778405, - "grad_norm": 0.3452848494052887, - "learning_rate": 7.71275805732957e-06, - "loss": 0.0257, - "step": 50020 - }, - { - "epoch": 3.7152829347987524, - "grad_norm": 2.9031543731689453, - "learning_rate": 7.708302391207487e-06, - "loss": 0.0525, - "step": 50030 - }, - { - "epoch": 3.7160255458191, - "grad_norm": 1.9653599262237549, - "learning_rate": 7.7038467250854e-06, - "loss": 0.0811, - "step": 50040 - }, - { - "epoch": 3.7167681568394473, - "grad_norm": 0.6306089758872986, - "learning_rate": 7.699391058963315e-06, - "loss": 0.0619, - "step": 50050 - }, - { - "epoch": 3.717510767859795, - "grad_norm": 2.5835585594177246, - "learning_rate": 7.694935392841229e-06, - "loss": 0.0772, - "step": 50060 - }, - { - "epoch": 3.7182533788801426, - "grad_norm": 1.6114825010299683, - "learning_rate": 7.690479726719145e-06, - "loss": 0.0758, - "step": 50070 - }, - { - "epoch": 3.71899598990049, - "grad_norm": 2.363929271697998, - "learning_rate": 7.68602406059706e-06, - "loss": 0.0746, - "step": 50080 - }, - { - "epoch": 3.7197386009208375, - "grad_norm": 0.5508888959884644, - "learning_rate": 7.681568394474973e-06, - "loss": 0.0874, - "step": 50090 - }, - { - "epoch": 3.720481211941185, - "grad_norm": 0.6778455972671509, - "learning_rate": 7.67711272835289e-06, - "loss": 0.0444, - "step": 50100 - }, - { - "epoch": 3.721223822961533, - "grad_norm": 0.6257083415985107, - "learning_rate": 7.672657062230803e-06, - "loss": 0.0501, - "step": 50110 - }, - { - "epoch": 3.7219664339818803, - "grad_norm": 0.7296163439750671, - "learning_rate": 7.668201396108718e-06, - "loss": 0.0719, - "step": 50120 - }, - { - "epoch": 3.7227090450022278, - "grad_norm": 1.6046028137207031, - "learning_rate": 7.663745729986632e-06, - "loss": 0.0848, - "step": 50130 - }, - { - "epoch": 3.723451656022575, - "grad_norm": 2.0736193656921387, - "learning_rate": 7.659290063864548e-06, - "loss": 0.0585, - "step": 50140 - }, - { - "epoch": 3.7241942670429227, - "grad_norm": 3.5360021591186523, - "learning_rate": 7.654834397742463e-06, - "loss": 0.0601, - "step": 50150 - }, - { - "epoch": 3.7249368780632706, - "grad_norm": 1.6731319427490234, - "learning_rate": 7.650378731620377e-06, - "loss": 0.0783, - "step": 50160 - }, - { - "epoch": 3.725679489083618, - "grad_norm": 1.434248685836792, - "learning_rate": 7.645923065498292e-06, - "loss": 0.0688, - "step": 50170 - }, - { - "epoch": 3.7264221001039655, - "grad_norm": 2.1045992374420166, - "learning_rate": 7.641467399376207e-06, - "loss": 0.0679, - "step": 50180 - }, - { - "epoch": 3.7271647111243134, - "grad_norm": 2.572453022003174, - "learning_rate": 7.637011733254122e-06, - "loss": 0.0378, - "step": 50190 - }, - { - "epoch": 3.7279073221446604, - "grad_norm": 2.0661511421203613, - "learning_rate": 7.632556067132037e-06, - "loss": 0.0657, - "step": 50200 - }, - { - "epoch": 3.7286499331650083, - "grad_norm": 4.1037278175354, - "learning_rate": 7.628100401009952e-06, - "loss": 0.0909, - "step": 50210 - }, - { - "epoch": 3.7293925441853557, - "grad_norm": 2.651695728302002, - "learning_rate": 7.623644734887866e-06, - "loss": 0.07, - "step": 50220 - }, - { - "epoch": 3.730135155205703, - "grad_norm": 1.7881807088851929, - "learning_rate": 7.619189068765781e-06, - "loss": 0.0622, - "step": 50230 - }, - { - "epoch": 3.730877766226051, - "grad_norm": 0.9953715801239014, - "learning_rate": 7.614733402643695e-06, - "loss": 0.0471, - "step": 50240 - }, - { - "epoch": 3.7316203772463985, - "grad_norm": 1.1658798456192017, - "learning_rate": 7.610277736521611e-06, - "loss": 0.0636, - "step": 50250 - }, - { - "epoch": 3.732362988266746, - "grad_norm": 1.1256842613220215, - "learning_rate": 7.605822070399525e-06, - "loss": 0.0665, - "step": 50260 - }, - { - "epoch": 3.7331055992870934, - "grad_norm": 0.6367254853248596, - "learning_rate": 7.601366404277439e-06, - "loss": 0.052, - "step": 50270 - }, - { - "epoch": 3.733848210307441, - "grad_norm": 2.0500311851501465, - "learning_rate": 7.596910738155354e-06, - "loss": 0.0545, - "step": 50280 - }, - { - "epoch": 3.7345908213277887, - "grad_norm": 0.8507987856864929, - "learning_rate": 7.59245507203327e-06, - "loss": 0.0459, - "step": 50290 - }, - { - "epoch": 3.735333432348136, - "grad_norm": 1.231722354888916, - "learning_rate": 7.587999405911184e-06, - "loss": 0.0689, - "step": 50300 - }, - { - "epoch": 3.7360760433684836, - "grad_norm": 1.071260929107666, - "learning_rate": 7.583543739789098e-06, - "loss": 0.0589, - "step": 50310 - }, - { - "epoch": 3.736818654388831, - "grad_norm": 0.9924083948135376, - "learning_rate": 7.5790880736670124e-06, - "loss": 0.0662, - "step": 50320 - }, - { - "epoch": 3.7375612654091785, - "grad_norm": 3.8950726985931396, - "learning_rate": 7.574632407544928e-06, - "loss": 0.0801, - "step": 50330 - }, - { - "epoch": 3.7383038764295264, - "grad_norm": 1.2569303512573242, - "learning_rate": 7.570176741422843e-06, - "loss": 0.0549, - "step": 50340 - }, - { - "epoch": 3.739046487449874, - "grad_norm": 2.5183095932006836, - "learning_rate": 7.565721075300757e-06, - "loss": 0.071, - "step": 50350 - }, - { - "epoch": 3.7397890984702213, - "grad_norm": 0.9033598303794861, - "learning_rate": 7.561265409178673e-06, - "loss": 0.0671, - "step": 50360 - }, - { - "epoch": 3.740531709490569, - "grad_norm": 3.6152873039245605, - "learning_rate": 7.556809743056587e-06, - "loss": 0.0498, - "step": 50370 - }, - { - "epoch": 3.7412743205109162, - "grad_norm": 0.6265145540237427, - "learning_rate": 7.5523540769345015e-06, - "loss": 0.0791, - "step": 50380 - }, - { - "epoch": 3.742016931531264, - "grad_norm": 0.47644293308258057, - "learning_rate": 7.547898410812416e-06, - "loss": 0.0719, - "step": 50390 - }, - { - "epoch": 3.7427595425516116, - "grad_norm": 1.28927743434906, - "learning_rate": 7.543442744690332e-06, - "loss": 0.0471, - "step": 50400 - }, - { - "epoch": 3.743502153571959, - "grad_norm": 0.5300150513648987, - "learning_rate": 7.5389870785682465e-06, - "loss": 0.0565, - "step": 50410 - }, - { - "epoch": 3.7442447645923065, - "grad_norm": 0.5767967104911804, - "learning_rate": 7.534531412446161e-06, - "loss": 0.0767, - "step": 50420 - }, - { - "epoch": 3.744987375612654, - "grad_norm": 1.1901088953018188, - "learning_rate": 7.530075746324075e-06, - "loss": 0.0639, - "step": 50430 - }, - { - "epoch": 3.745729986633002, - "grad_norm": 0.5107831954956055, - "learning_rate": 7.525620080201991e-06, - "loss": 0.0423, - "step": 50440 - }, - { - "epoch": 3.7464725976533493, - "grad_norm": 0.25454196333885193, - "learning_rate": 7.521164414079905e-06, - "loss": 0.0639, - "step": 50450 - }, - { - "epoch": 3.7472152086736967, - "grad_norm": 0.2913890480995178, - "learning_rate": 7.51670874795782e-06, - "loss": 0.0553, - "step": 50460 - }, - { - "epoch": 3.747957819694044, - "grad_norm": 1.8346703052520752, - "learning_rate": 7.512253081835736e-06, - "loss": 0.0751, - "step": 50470 - }, - { - "epoch": 3.7487004307143916, - "grad_norm": 1.856366753578186, - "learning_rate": 7.50779741571365e-06, - "loss": 0.0852, - "step": 50480 - }, - { - "epoch": 3.7494430417347395, - "grad_norm": 1.0152894258499146, - "learning_rate": 7.503341749591564e-06, - "loss": 0.0439, - "step": 50490 - }, - { - "epoch": 3.750185652755087, - "grad_norm": 1.121036171913147, - "learning_rate": 7.498886083469479e-06, - "loss": 0.0781, - "step": 50500 - }, - { - "epoch": 3.7509282637754344, - "grad_norm": 3.682908535003662, - "learning_rate": 7.494430417347393e-06, - "loss": 0.0624, - "step": 50510 - }, - { - "epoch": 3.751670874795782, - "grad_norm": 1.5728288888931274, - "learning_rate": 7.489974751225309e-06, - "loss": 0.0812, - "step": 50520 - }, - { - "epoch": 3.7524134858161293, - "grad_norm": 1.3011523485183716, - "learning_rate": 7.485519085103223e-06, - "loss": 0.1094, - "step": 50530 - }, - { - "epoch": 3.753156096836477, - "grad_norm": 1.683307409286499, - "learning_rate": 7.481063418981138e-06, - "loss": 0.0715, - "step": 50540 - }, - { - "epoch": 3.7538987078568247, - "grad_norm": 0.9677258729934692, - "learning_rate": 7.476607752859052e-06, - "loss": 0.0619, - "step": 50550 - }, - { - "epoch": 3.754641318877172, - "grad_norm": 1.786702036857605, - "learning_rate": 7.472152086736967e-06, - "loss": 0.0721, - "step": 50560 - }, - { - "epoch": 3.7553839298975196, - "grad_norm": 4.346724987030029, - "learning_rate": 7.467696420614882e-06, - "loss": 0.0477, - "step": 50570 - }, - { - "epoch": 3.756126540917867, - "grad_norm": 0.49064943194389343, - "learning_rate": 7.463240754492797e-06, - "loss": 0.073, - "step": 50580 - }, - { - "epoch": 3.756869151938215, - "grad_norm": 0.6625070571899414, - "learning_rate": 7.458785088370711e-06, - "loss": 0.0626, - "step": 50590 - }, - { - "epoch": 3.7576117629585624, - "grad_norm": 3.1530327796936035, - "learning_rate": 7.454329422248626e-06, - "loss": 0.0642, - "step": 50600 - }, - { - "epoch": 3.75835437397891, - "grad_norm": 0.38084813952445984, - "learning_rate": 7.449873756126541e-06, - "loss": 0.0687, - "step": 50610 - }, - { - "epoch": 3.7590969849992573, - "grad_norm": 0.8041538000106812, - "learning_rate": 7.4454180900044555e-06, - "loss": 0.0677, - "step": 50620 - }, - { - "epoch": 3.7598395960196047, - "grad_norm": 1.4357954263687134, - "learning_rate": 7.440962423882371e-06, - "loss": 0.0461, - "step": 50630 - }, - { - "epoch": 3.7605822070399526, - "grad_norm": 0.9471798539161682, - "learning_rate": 7.4365067577602855e-06, - "loss": 0.0871, - "step": 50640 - }, - { - "epoch": 3.7613248180603, - "grad_norm": 1.356444001197815, - "learning_rate": 7.4320510916382005e-06, - "loss": 0.0514, - "step": 50650 - }, - { - "epoch": 3.7620674290806475, - "grad_norm": 0.9252959489822388, - "learning_rate": 7.427595425516115e-06, - "loss": 0.0675, - "step": 50660 - }, - { - "epoch": 3.762810040100995, - "grad_norm": 1.9478005170822144, - "learning_rate": 7.42313975939403e-06, - "loss": 0.0473, - "step": 50670 - }, - { - "epoch": 3.7635526511213424, - "grad_norm": 0.6486461162567139, - "learning_rate": 7.418684093271944e-06, - "loss": 0.0632, - "step": 50680 - }, - { - "epoch": 3.7642952621416903, - "grad_norm": 1.0158225297927856, - "learning_rate": 7.41422842714986e-06, - "loss": 0.0945, - "step": 50690 - }, - { - "epoch": 3.7650378731620378, - "grad_norm": 1.4446204900741577, - "learning_rate": 7.409772761027774e-06, - "loss": 0.0441, - "step": 50700 - }, - { - "epoch": 3.765780484182385, - "grad_norm": 1.2535960674285889, - "learning_rate": 7.405317094905689e-06, - "loss": 0.0414, - "step": 50710 - }, - { - "epoch": 3.7665230952027327, - "grad_norm": 0.49216005206108093, - "learning_rate": 7.400861428783603e-06, - "loss": 0.0505, - "step": 50720 - }, - { - "epoch": 3.76726570622308, - "grad_norm": 1.1775668859481812, - "learning_rate": 7.396405762661518e-06, - "loss": 0.0713, - "step": 50730 - }, - { - "epoch": 3.768008317243428, - "grad_norm": 1.1207523345947266, - "learning_rate": 7.391950096539433e-06, - "loss": 0.0645, - "step": 50740 - }, - { - "epoch": 3.7687509282637754, - "grad_norm": 0.7334256172180176, - "learning_rate": 7.387494430417348e-06, - "loss": 0.0407, - "step": 50750 - }, - { - "epoch": 3.769493539284123, - "grad_norm": 0.34325599670410156, - "learning_rate": 7.383038764295263e-06, - "loss": 0.0618, - "step": 50760 - }, - { - "epoch": 3.770236150304471, - "grad_norm": 1.9858282804489136, - "learning_rate": 7.378583098173177e-06, - "loss": 0.0502, - "step": 50770 - }, - { - "epoch": 3.770978761324818, - "grad_norm": 0.8460551500320435, - "learning_rate": 7.374127432051092e-06, - "loss": 0.0662, - "step": 50780 - }, - { - "epoch": 3.7717213723451657, - "grad_norm": 1.374432921409607, - "learning_rate": 7.369671765929006e-06, - "loss": 0.0294, - "step": 50790 - }, - { - "epoch": 3.772463983365513, - "grad_norm": 0.547275722026825, - "learning_rate": 7.365216099806921e-06, - "loss": 0.0512, - "step": 50800 - }, - { - "epoch": 3.7732065943858606, - "grad_norm": 2.171943426132202, - "learning_rate": 7.360760433684836e-06, - "loss": 0.074, - "step": 50810 - }, - { - "epoch": 3.7739492054062085, - "grad_norm": 5.715026378631592, - "learning_rate": 7.356304767562751e-06, - "loss": 0.0664, - "step": 50820 - }, - { - "epoch": 3.774691816426556, - "grad_norm": 0.8222048282623291, - "learning_rate": 7.351849101440665e-06, - "loss": 0.0573, - "step": 50830 - }, - { - "epoch": 3.7754344274469034, - "grad_norm": 0.7894913554191589, - "learning_rate": 7.34739343531858e-06, - "loss": 0.0447, - "step": 50840 - }, - { - "epoch": 3.776177038467251, - "grad_norm": 0.9093634486198425, - "learning_rate": 7.342937769196494e-06, - "loss": 0.0685, - "step": 50850 - }, - { - "epoch": 3.7769196494875983, - "grad_norm": 0.49111273884773254, - "learning_rate": 7.338482103074409e-06, - "loss": 0.0421, - "step": 50860 - }, - { - "epoch": 3.777662260507946, - "grad_norm": 2.105043411254883, - "learning_rate": 7.334026436952325e-06, - "loss": 0.0791, - "step": 50870 - }, - { - "epoch": 3.7784048715282936, - "grad_norm": 0.9441326856613159, - "learning_rate": 7.329570770830239e-06, - "loss": 0.0506, - "step": 50880 - }, - { - "epoch": 3.779147482548641, - "grad_norm": 0.6536591053009033, - "learning_rate": 7.325115104708154e-06, - "loss": 0.0574, - "step": 50890 - }, - { - "epoch": 3.7798900935689885, - "grad_norm": 0.7419950366020203, - "learning_rate": 7.3206594385860685e-06, - "loss": 0.0576, - "step": 50900 - }, - { - "epoch": 3.780632704589336, - "grad_norm": 0.9035283923149109, - "learning_rate": 7.3162037724639835e-06, - "loss": 0.0522, - "step": 50910 - }, - { - "epoch": 3.781375315609684, - "grad_norm": 1.0308187007904053, - "learning_rate": 7.3117481063418985e-06, - "loss": 0.0575, - "step": 50920 - }, - { - "epoch": 3.7821179266300313, - "grad_norm": 1.2720097303390503, - "learning_rate": 7.3072924402198135e-06, - "loss": 0.0663, - "step": 50930 - }, - { - "epoch": 3.7828605376503788, - "grad_norm": 2.122695207595825, - "learning_rate": 7.302836774097728e-06, - "loss": 0.0483, - "step": 50940 - }, - { - "epoch": 3.7836031486707262, - "grad_norm": 1.060192346572876, - "learning_rate": 7.298381107975643e-06, - "loss": 0.0663, - "step": 50950 - }, - { - "epoch": 3.7843457596910737, - "grad_norm": 2.148590087890625, - "learning_rate": 7.293925441853557e-06, - "loss": 0.068, - "step": 50960 - }, - { - "epoch": 3.7850883707114216, - "grad_norm": 3.7318813800811768, - "learning_rate": 7.289469775731472e-06, - "loss": 0.0743, - "step": 50970 - }, - { - "epoch": 3.785830981731769, - "grad_norm": 0.6228942275047302, - "learning_rate": 7.285014109609387e-06, - "loss": 0.0545, - "step": 50980 - }, - { - "epoch": 3.7865735927521165, - "grad_norm": 0.46231064200401306, - "learning_rate": 7.280558443487302e-06, - "loss": 0.0374, - "step": 50990 - }, - { - "epoch": 3.787316203772464, - "grad_norm": 1.6403611898422241, - "learning_rate": 7.276102777365217e-06, - "loss": 0.0674, - "step": 51000 - }, - { - "epoch": 3.7880588147928114, - "grad_norm": 0.5807299613952637, - "learning_rate": 7.271647111243131e-06, - "loss": 0.0653, - "step": 51010 - }, - { - "epoch": 3.7888014258131593, - "grad_norm": 2.2317705154418945, - "learning_rate": 7.267191445121046e-06, - "loss": 0.0464, - "step": 51020 - }, - { - "epoch": 3.7895440368335067, - "grad_norm": 1.0599946975708008, - "learning_rate": 7.26273577899896e-06, - "loss": 0.0787, - "step": 51030 - }, - { - "epoch": 3.790286647853854, - "grad_norm": 0.7271379232406616, - "learning_rate": 7.258280112876876e-06, - "loss": 0.0682, - "step": 51040 - }, - { - "epoch": 3.7910292588742016, - "grad_norm": 1.6912349462509155, - "learning_rate": 7.25382444675479e-06, - "loss": 0.0537, - "step": 51050 - }, - { - "epoch": 3.791771869894549, - "grad_norm": 1.233393907546997, - "learning_rate": 7.249368780632705e-06, - "loss": 0.0698, - "step": 51060 - }, - { - "epoch": 3.792514480914897, - "grad_norm": 1.0007754564285278, - "learning_rate": 7.244913114510619e-06, - "loss": 0.0515, - "step": 51070 - }, - { - "epoch": 3.7932570919352444, - "grad_norm": 1.4426878690719604, - "learning_rate": 7.240457448388534e-06, - "loss": 0.0731, - "step": 51080 - }, - { - "epoch": 3.793999702955592, - "grad_norm": 0.7018789649009705, - "learning_rate": 7.236001782266448e-06, - "loss": 0.0422, - "step": 51090 - }, - { - "epoch": 3.7947423139759393, - "grad_norm": 0.8804644346237183, - "learning_rate": 7.231546116144364e-06, - "loss": 0.0557, - "step": 51100 - }, - { - "epoch": 3.7954849249962868, - "grad_norm": 0.588465690612793, - "learning_rate": 7.227090450022278e-06, - "loss": 0.0795, - "step": 51110 - }, - { - "epoch": 3.7962275360166347, - "grad_norm": 1.5997480154037476, - "learning_rate": 7.222634783900193e-06, - "loss": 0.0358, - "step": 51120 - }, - { - "epoch": 3.796970147036982, - "grad_norm": 0.4901600182056427, - "learning_rate": 7.218179117778108e-06, - "loss": 0.0743, - "step": 51130 - }, - { - "epoch": 3.7977127580573296, - "grad_norm": 1.299644947052002, - "learning_rate": 7.2137234516560225e-06, - "loss": 0.0445, - "step": 51140 - }, - { - "epoch": 3.798455369077677, - "grad_norm": 2.474388599395752, - "learning_rate": 7.2092677855339375e-06, - "loss": 0.0676, - "step": 51150 - }, - { - "epoch": 3.7991979800980245, - "grad_norm": 0.26905447244644165, - "learning_rate": 7.2048121194118525e-06, - "loss": 0.0655, - "step": 51160 - }, - { - "epoch": 3.7999405911183723, - "grad_norm": 1.1446030139923096, - "learning_rate": 7.2003564532897675e-06, - "loss": 0.056, - "step": 51170 - }, - { - "epoch": 3.80068320213872, - "grad_norm": 0.9644284844398499, - "learning_rate": 7.195900787167682e-06, - "loss": 0.0443, - "step": 51180 - }, - { - "epoch": 3.8014258131590672, - "grad_norm": 1.8860034942626953, - "learning_rate": 7.191445121045597e-06, - "loss": 0.0591, - "step": 51190 - }, - { - "epoch": 3.8021684241794147, - "grad_norm": 1.3141664266586304, - "learning_rate": 7.186989454923511e-06, - "loss": 0.0319, - "step": 51200 - }, - { - "epoch": 3.802911035199762, - "grad_norm": 1.2933363914489746, - "learning_rate": 7.182533788801426e-06, - "loss": 0.0596, - "step": 51210 - }, - { - "epoch": 3.80365364622011, - "grad_norm": 1.3324415683746338, - "learning_rate": 7.178078122679341e-06, - "loss": 0.0632, - "step": 51220 - }, - { - "epoch": 3.8043962572404575, - "grad_norm": 1.7264912128448486, - "learning_rate": 7.173622456557256e-06, - "loss": 0.0858, - "step": 51230 - }, - { - "epoch": 3.805138868260805, - "grad_norm": 1.394806146621704, - "learning_rate": 7.16916679043517e-06, - "loss": 0.0673, - "step": 51240 - }, - { - "epoch": 3.8058814792811524, - "grad_norm": 0.5998439788818359, - "learning_rate": 7.164711124313085e-06, - "loss": 0.0434, - "step": 51250 - }, - { - "epoch": 3.8066240903015, - "grad_norm": 1.282382607460022, - "learning_rate": 7.160255458191e-06, - "loss": 0.0696, - "step": 51260 - }, - { - "epoch": 3.8073667013218477, - "grad_norm": 1.0203378200531006, - "learning_rate": 7.155799792068915e-06, - "loss": 0.0709, - "step": 51270 - }, - { - "epoch": 3.808109312342195, - "grad_norm": 1.8482102155685425, - "learning_rate": 7.15134412594683e-06, - "loss": 0.0499, - "step": 51280 - }, - { - "epoch": 3.8088519233625426, - "grad_norm": 1.2708896398544312, - "learning_rate": 7.146888459824744e-06, - "loss": 0.0467, - "step": 51290 - }, - { - "epoch": 3.80959453438289, - "grad_norm": 2.240234375, - "learning_rate": 7.142432793702659e-06, - "loss": 0.0742, - "step": 51300 - }, - { - "epoch": 3.8103371454032375, - "grad_norm": 1.0520896911621094, - "learning_rate": 7.137977127580573e-06, - "loss": 0.0631, - "step": 51310 - }, - { - "epoch": 3.8110797564235854, - "grad_norm": 0.6658138036727905, - "learning_rate": 7.133521461458488e-06, - "loss": 0.0522, - "step": 51320 - }, - { - "epoch": 3.811822367443933, - "grad_norm": 1.1442534923553467, - "learning_rate": 7.129065795336403e-06, - "loss": 0.0711, - "step": 51330 - }, - { - "epoch": 3.8125649784642803, - "grad_norm": 2.0070974826812744, - "learning_rate": 7.124610129214318e-06, - "loss": 0.0479, - "step": 51340 - }, - { - "epoch": 3.8133075894846282, - "grad_norm": 1.5337635278701782, - "learning_rate": 7.120154463092232e-06, - "loss": 0.0552, - "step": 51350 - }, - { - "epoch": 3.8140502005049752, - "grad_norm": 2.758072853088379, - "learning_rate": 7.115698796970147e-06, - "loss": 0.0632, - "step": 51360 - }, - { - "epoch": 3.814792811525323, - "grad_norm": 1.1585407257080078, - "learning_rate": 7.111243130848061e-06, - "loss": 0.0559, - "step": 51370 - }, - { - "epoch": 3.8155354225456706, - "grad_norm": 0.7389517426490784, - "learning_rate": 7.106787464725976e-06, - "loss": 0.0523, - "step": 51380 - }, - { - "epoch": 3.816278033566018, - "grad_norm": 1.0852411985397339, - "learning_rate": 7.102331798603892e-06, - "loss": 0.0459, - "step": 51390 - }, - { - "epoch": 3.817020644586366, - "grad_norm": 1.4274524450302124, - "learning_rate": 7.097876132481806e-06, - "loss": 0.0707, - "step": 51400 - }, - { - "epoch": 3.8177632556067134, - "grad_norm": 2.2135133743286133, - "learning_rate": 7.093420466359721e-06, - "loss": 0.0488, - "step": 51410 - }, - { - "epoch": 3.818505866627061, - "grad_norm": 2.0411906242370605, - "learning_rate": 7.0889648002376355e-06, - "loss": 0.0572, - "step": 51420 - }, - { - "epoch": 3.8192484776474083, - "grad_norm": 1.3621501922607422, - "learning_rate": 7.0845091341155505e-06, - "loss": 0.0438, - "step": 51430 - }, - { - "epoch": 3.8199910886677557, - "grad_norm": 2.786686658859253, - "learning_rate": 7.080053467993465e-06, - "loss": 0.0702, - "step": 51440 - }, - { - "epoch": 3.8207336996881036, - "grad_norm": 0.6535211801528931, - "learning_rate": 7.0755978018713805e-06, - "loss": 0.0564, - "step": 51450 - }, - { - "epoch": 3.821476310708451, - "grad_norm": 0.9478582143783569, - "learning_rate": 7.071142135749295e-06, - "loss": 0.0522, - "step": 51460 - }, - { - "epoch": 3.8222189217287985, - "grad_norm": 1.7308098077774048, - "learning_rate": 7.06668646962721e-06, - "loss": 0.0388, - "step": 51470 - }, - { - "epoch": 3.822961532749146, - "grad_norm": 1.9273875951766968, - "learning_rate": 7.062230803505124e-06, - "loss": 0.0684, - "step": 51480 - }, - { - "epoch": 3.8237041437694934, - "grad_norm": 0.6402938961982727, - "learning_rate": 7.057775137383039e-06, - "loss": 0.0933, - "step": 51490 - }, - { - "epoch": 3.8244467547898413, - "grad_norm": 1.3616961240768433, - "learning_rate": 7.053319471260953e-06, - "loss": 0.0663, - "step": 51500 - }, - { - "epoch": 3.8251893658101888, - "grad_norm": 1.957161784172058, - "learning_rate": 7.048863805138869e-06, - "loss": 0.0756, - "step": 51510 - }, - { - "epoch": 3.825931976830536, - "grad_norm": 2.335097074508667, - "learning_rate": 7.044408139016784e-06, - "loss": 0.0619, - "step": 51520 - }, - { - "epoch": 3.8266745878508837, - "grad_norm": 0.9050447940826416, - "learning_rate": 7.039952472894698e-06, - "loss": 0.0537, - "step": 51530 - }, - { - "epoch": 3.827417198871231, - "grad_norm": 0.2950853705406189, - "learning_rate": 7.035496806772613e-06, - "loss": 0.0598, - "step": 51540 - }, - { - "epoch": 3.828159809891579, - "grad_norm": 2.684269428253174, - "learning_rate": 7.031041140650527e-06, - "loss": 0.0896, - "step": 51550 - }, - { - "epoch": 3.8289024209119265, - "grad_norm": 2.0332887172698975, - "learning_rate": 7.026585474528442e-06, - "loss": 0.0801, - "step": 51560 - }, - { - "epoch": 3.829645031932274, - "grad_norm": 2.3198342323303223, - "learning_rate": 7.022129808406357e-06, - "loss": 0.0645, - "step": 51570 - }, - { - "epoch": 3.8303876429526214, - "grad_norm": 1.091840386390686, - "learning_rate": 7.017674142284272e-06, - "loss": 0.0553, - "step": 51580 - }, - { - "epoch": 3.831130253972969, - "grad_norm": 1.6161314249038696, - "learning_rate": 7.013218476162186e-06, - "loss": 0.0612, - "step": 51590 - }, - { - "epoch": 3.8318728649933167, - "grad_norm": 0.7637621164321899, - "learning_rate": 7.008762810040101e-06, - "loss": 0.0349, - "step": 51600 - }, - { - "epoch": 3.832615476013664, - "grad_norm": 0.9852764010429382, - "learning_rate": 7.004307143918015e-06, - "loss": 0.0476, - "step": 51610 - }, - { - "epoch": 3.8333580870340116, - "grad_norm": 2.236307144165039, - "learning_rate": 6.999851477795931e-06, - "loss": 0.0687, - "step": 51620 - }, - { - "epoch": 3.834100698054359, - "grad_norm": 0.9051099419593811, - "learning_rate": 6.995395811673845e-06, - "loss": 0.0861, - "step": 51630 - }, - { - "epoch": 3.8348433090747065, - "grad_norm": 0.784583568572998, - "learning_rate": 6.99094014555176e-06, - "loss": 0.0594, - "step": 51640 - }, - { - "epoch": 3.8355859200950544, - "grad_norm": 1.5118728876113892, - "learning_rate": 6.9864844794296745e-06, - "loss": 0.0769, - "step": 51650 - }, - { - "epoch": 3.836328531115402, - "grad_norm": 1.1480764150619507, - "learning_rate": 6.9820288133075895e-06, - "loss": 0.0678, - "step": 51660 - }, - { - "epoch": 3.8370711421357493, - "grad_norm": 1.8852975368499756, - "learning_rate": 6.9775731471855045e-06, - "loss": 0.0714, - "step": 51670 - }, - { - "epoch": 3.8378137531560967, - "grad_norm": 0.9265196323394775, - "learning_rate": 6.9731174810634195e-06, - "loss": 0.0424, - "step": 51680 - }, - { - "epoch": 3.838556364176444, - "grad_norm": 0.8514029383659363, - "learning_rate": 6.9686618149413345e-06, - "loss": 0.0565, - "step": 51690 - }, - { - "epoch": 3.839298975196792, - "grad_norm": 3.038891077041626, - "learning_rate": 6.964206148819249e-06, - "loss": 0.0627, - "step": 51700 - }, - { - "epoch": 3.8400415862171395, - "grad_norm": 2.795001268386841, - "learning_rate": 6.959750482697164e-06, - "loss": 0.0808, - "step": 51710 - }, - { - "epoch": 3.840784197237487, - "grad_norm": 0.8033546805381775, - "learning_rate": 6.955294816575078e-06, - "loss": 0.0667, - "step": 51720 - }, - { - "epoch": 3.8415268082578344, - "grad_norm": 3.8095524311065674, - "learning_rate": 6.950839150452993e-06, - "loss": 0.0525, - "step": 51730 - }, - { - "epoch": 3.842269419278182, - "grad_norm": 1.2857472896575928, - "learning_rate": 6.946383484330908e-06, - "loss": 0.0707, - "step": 51740 - }, - { - "epoch": 3.84301203029853, - "grad_norm": 0.5481483340263367, - "learning_rate": 6.941927818208823e-06, - "loss": 0.0624, - "step": 51750 - }, - { - "epoch": 3.8437546413188772, - "grad_norm": 2.4594411849975586, - "learning_rate": 6.937472152086737e-06, - "loss": 0.0699, - "step": 51760 - }, - { - "epoch": 3.8444972523392247, - "grad_norm": 2.338852882385254, - "learning_rate": 6.933016485964652e-06, - "loss": 0.0485, - "step": 51770 - }, - { - "epoch": 3.845239863359572, - "grad_norm": 0.8134142160415649, - "learning_rate": 6.928560819842566e-06, - "loss": 0.0581, - "step": 51780 - }, - { - "epoch": 3.8459824743799196, - "grad_norm": 2.4749584197998047, - "learning_rate": 6.924105153720481e-06, - "loss": 0.077, - "step": 51790 - }, - { - "epoch": 3.8467250854002675, - "grad_norm": 0.5151814222335815, - "learning_rate": 6.919649487598397e-06, - "loss": 0.0554, - "step": 51800 - }, - { - "epoch": 3.847467696420615, - "grad_norm": 2.5601933002471924, - "learning_rate": 6.915193821476311e-06, - "loss": 0.0677, - "step": 51810 - }, - { - "epoch": 3.8482103074409624, - "grad_norm": 1.637519121170044, - "learning_rate": 6.910738155354226e-06, - "loss": 0.1058, - "step": 51820 - }, - { - "epoch": 3.84895291846131, - "grad_norm": 0.6686950325965881, - "learning_rate": 6.90628248923214e-06, - "loss": 0.0781, - "step": 51830 - }, - { - "epoch": 3.8496955294816573, - "grad_norm": 1.9050847291946411, - "learning_rate": 6.901826823110055e-06, - "loss": 0.0675, - "step": 51840 - }, - { - "epoch": 3.850438140502005, - "grad_norm": 1.7424448728561401, - "learning_rate": 6.897371156987969e-06, - "loss": 0.0585, - "step": 51850 - }, - { - "epoch": 3.8511807515223526, - "grad_norm": 0.6480499505996704, - "learning_rate": 6.892915490865885e-06, - "loss": 0.0579, - "step": 51860 - }, - { - "epoch": 3.8519233625427, - "grad_norm": 0.8286868929862976, - "learning_rate": 6.888459824743799e-06, - "loss": 0.0416, - "step": 51870 - }, - { - "epoch": 3.8526659735630475, - "grad_norm": 0.9048423171043396, - "learning_rate": 6.884004158621714e-06, - "loss": 0.0521, - "step": 51880 - }, - { - "epoch": 3.853408584583395, - "grad_norm": 0.8243032097816467, - "learning_rate": 6.879548492499628e-06, - "loss": 0.0405, - "step": 51890 - }, - { - "epoch": 3.854151195603743, - "grad_norm": 0.618739128112793, - "learning_rate": 6.875092826377543e-06, - "loss": 0.0427, - "step": 51900 - }, - { - "epoch": 3.8548938066240903, - "grad_norm": 0.4855763912200928, - "learning_rate": 6.8706371602554575e-06, - "loss": 0.054, - "step": 51910 - }, - { - "epoch": 3.8556364176444378, - "grad_norm": 0.3211023211479187, - "learning_rate": 6.866181494133373e-06, - "loss": 0.0877, - "step": 51920 - }, - { - "epoch": 3.8563790286647857, - "grad_norm": 0.4299314320087433, - "learning_rate": 6.861725828011288e-06, - "loss": 0.0633, - "step": 51930 - }, - { - "epoch": 3.8571216396851327, - "grad_norm": 1.2965507507324219, - "learning_rate": 6.8572701618892025e-06, - "loss": 0.0749, - "step": 51940 - }, - { - "epoch": 3.8578642507054806, - "grad_norm": 0.9099006056785583, - "learning_rate": 6.8528144957671175e-06, - "loss": 0.0495, - "step": 51950 - }, - { - "epoch": 3.858606861725828, - "grad_norm": 2.0867326259613037, - "learning_rate": 6.848358829645032e-06, - "loss": 0.0509, - "step": 51960 - }, - { - "epoch": 3.8593494727461755, - "grad_norm": 3.871561050415039, - "learning_rate": 6.8439031635229475e-06, - "loss": 0.0922, - "step": 51970 - }, - { - "epoch": 3.8600920837665234, - "grad_norm": 0.6323529481887817, - "learning_rate": 6.839447497400862e-06, - "loss": 0.0609, - "step": 51980 - }, - { - "epoch": 3.860834694786871, - "grad_norm": 2.84472393989563, - "learning_rate": 6.834991831278777e-06, - "loss": 0.0602, - "step": 51990 - }, - { - "epoch": 3.8615773058072183, - "grad_norm": 0.9858630895614624, - "learning_rate": 6.830536165156691e-06, - "loss": 0.035, - "step": 52000 - }, - { - "epoch": 3.8623199168275657, - "grad_norm": 1.1265889406204224, - "learning_rate": 6.826080499034606e-06, - "loss": 0.0581, - "step": 52010 - }, - { - "epoch": 3.863062527847913, - "grad_norm": 1.8120416402816772, - "learning_rate": 6.82162483291252e-06, - "loss": 0.0919, - "step": 52020 - }, - { - "epoch": 3.863805138868261, - "grad_norm": 0.340191513299942, - "learning_rate": 6.817169166790436e-06, - "loss": 0.0482, - "step": 52030 - }, - { - "epoch": 3.8645477498886085, - "grad_norm": 2.8395814895629883, - "learning_rate": 6.81271350066835e-06, - "loss": 0.0593, - "step": 52040 - }, - { - "epoch": 3.865290360908956, - "grad_norm": 1.4814605712890625, - "learning_rate": 6.808257834546265e-06, - "loss": 0.0911, - "step": 52050 - }, - { - "epoch": 3.8660329719293034, - "grad_norm": 1.4447940587997437, - "learning_rate": 6.80380216842418e-06, - "loss": 0.0592, - "step": 52060 - }, - { - "epoch": 3.866775582949651, - "grad_norm": 1.7888092994689941, - "learning_rate": 6.799346502302094e-06, - "loss": 0.0895, - "step": 52070 - }, - { - "epoch": 3.8675181939699987, - "grad_norm": 1.2364249229431152, - "learning_rate": 6.794890836180009e-06, - "loss": 0.0805, - "step": 52080 - }, - { - "epoch": 3.868260804990346, - "grad_norm": 1.0468952655792236, - "learning_rate": 6.790435170057924e-06, - "loss": 0.0674, - "step": 52090 - }, - { - "epoch": 3.8690034160106936, - "grad_norm": 1.610219120979309, - "learning_rate": 6.785979503935839e-06, - "loss": 0.0458, - "step": 52100 - }, - { - "epoch": 3.869746027031041, - "grad_norm": 0.4736784100532532, - "learning_rate": 6.781523837813753e-06, - "loss": 0.0512, - "step": 52110 - }, - { - "epoch": 3.8704886380513885, - "grad_norm": 1.5783486366271973, - "learning_rate": 6.777068171691668e-06, - "loss": 0.089, - "step": 52120 - }, - { - "epoch": 3.8712312490717364, - "grad_norm": 1.8247394561767578, - "learning_rate": 6.772612505569582e-06, - "loss": 0.0809, - "step": 52130 - }, - { - "epoch": 3.871973860092084, - "grad_norm": 1.473613977432251, - "learning_rate": 6.768156839447497e-06, - "loss": 0.0699, - "step": 52140 - }, - { - "epoch": 3.8727164711124313, - "grad_norm": 0.9467633962631226, - "learning_rate": 6.763701173325412e-06, - "loss": 0.0398, - "step": 52150 - }, - { - "epoch": 3.873459082132779, - "grad_norm": 0.7337872982025146, - "learning_rate": 6.759245507203327e-06, - "loss": 0.0428, - "step": 52160 - }, - { - "epoch": 3.8742016931531262, - "grad_norm": 1.1722044944763184, - "learning_rate": 6.7547898410812415e-06, - "loss": 0.0658, - "step": 52170 - }, - { - "epoch": 3.874944304173474, - "grad_norm": 1.2315232753753662, - "learning_rate": 6.7503341749591565e-06, - "loss": 0.0714, - "step": 52180 - }, - { - "epoch": 3.8756869151938216, - "grad_norm": 1.0262686014175415, - "learning_rate": 6.7458785088370715e-06, - "loss": 0.0493, - "step": 52190 - }, - { - "epoch": 3.876429526214169, - "grad_norm": 2.3308000564575195, - "learning_rate": 6.741422842714986e-06, - "loss": 0.0469, - "step": 52200 - }, - { - "epoch": 3.8771721372345165, - "grad_norm": 0.9160858392715454, - "learning_rate": 6.7369671765929014e-06, - "loss": 0.0738, - "step": 52210 - }, - { - "epoch": 3.877914748254864, - "grad_norm": 0.8617852926254272, - "learning_rate": 6.732511510470816e-06, - "loss": 0.0674, - "step": 52220 - }, - { - "epoch": 3.878657359275212, - "grad_norm": 0.7218712568283081, - "learning_rate": 6.728055844348731e-06, - "loss": 0.0494, - "step": 52230 - }, - { - "epoch": 3.8793999702955593, - "grad_norm": 2.578873872756958, - "learning_rate": 6.723600178226645e-06, - "loss": 0.0685, - "step": 52240 - }, - { - "epoch": 3.8801425813159067, - "grad_norm": 0.5948718786239624, - "learning_rate": 6.71914451210456e-06, - "loss": 0.0504, - "step": 52250 - }, - { - "epoch": 3.880885192336254, - "grad_norm": 3.3981211185455322, - "learning_rate": 6.714688845982474e-06, - "loss": 0.0623, - "step": 52260 - }, - { - "epoch": 3.8816278033566016, - "grad_norm": 1.991584300994873, - "learning_rate": 6.71023317986039e-06, - "loss": 0.0566, - "step": 52270 - }, - { - "epoch": 3.8823704143769495, - "grad_norm": 0.5625025629997253, - "learning_rate": 6.705777513738304e-06, - "loss": 0.043, - "step": 52280 - }, - { - "epoch": 3.883113025397297, - "grad_norm": 0.8824495673179626, - "learning_rate": 6.701321847616219e-06, - "loss": 0.0552, - "step": 52290 - }, - { - "epoch": 3.8838556364176444, - "grad_norm": 0.5333496928215027, - "learning_rate": 6.696866181494133e-06, - "loss": 0.061, - "step": 52300 - }, - { - "epoch": 3.884598247437992, - "grad_norm": 0.932508647441864, - "learning_rate": 6.692410515372048e-06, - "loss": 0.044, - "step": 52310 - }, - { - "epoch": 3.8853408584583393, - "grad_norm": 0.28375789523124695, - "learning_rate": 6.687954849249964e-06, - "loss": 0.0445, - "step": 52320 - }, - { - "epoch": 3.886083469478687, - "grad_norm": 0.8071836829185486, - "learning_rate": 6.683499183127878e-06, - "loss": 0.1011, - "step": 52330 - }, - { - "epoch": 3.8868260804990347, - "grad_norm": 1.178012490272522, - "learning_rate": 6.679043517005793e-06, - "loss": 0.0465, - "step": 52340 - }, - { - "epoch": 3.887568691519382, - "grad_norm": 0.8823719620704651, - "learning_rate": 6.674587850883707e-06, - "loss": 0.045, - "step": 52350 - }, - { - "epoch": 3.8883113025397296, - "grad_norm": 1.4806567430496216, - "learning_rate": 6.670132184761622e-06, - "loss": 0.0486, - "step": 52360 - }, - { - "epoch": 3.889053913560077, - "grad_norm": 1.835952877998352, - "learning_rate": 6.665676518639536e-06, - "loss": 0.0594, - "step": 52370 - }, - { - "epoch": 3.889796524580425, - "grad_norm": 1.964036226272583, - "learning_rate": 6.661220852517452e-06, - "loss": 0.0783, - "step": 52380 - }, - { - "epoch": 3.8905391356007724, - "grad_norm": 0.7686440944671631, - "learning_rate": 6.656765186395366e-06, - "loss": 0.0448, - "step": 52390 - }, - { - "epoch": 3.89128174662112, - "grad_norm": 0.5452464818954468, - "learning_rate": 6.652309520273281e-06, - "loss": 0.0475, - "step": 52400 - }, - { - "epoch": 3.8920243576414673, - "grad_norm": 1.948075532913208, - "learning_rate": 6.647853854151195e-06, - "loss": 0.0631, - "step": 52410 - }, - { - "epoch": 3.8927669686618147, - "grad_norm": 1.347221851348877, - "learning_rate": 6.64339818802911e-06, - "loss": 0.0502, - "step": 52420 - }, - { - "epoch": 3.8935095796821626, - "grad_norm": 0.37357455492019653, - "learning_rate": 6.6389425219070245e-06, - "loss": 0.0583, - "step": 52430 - }, - { - "epoch": 3.89425219070251, - "grad_norm": 1.4739619493484497, - "learning_rate": 6.63448685578494e-06, - "loss": 0.0433, - "step": 52440 - }, - { - "epoch": 3.8949948017228575, - "grad_norm": 0.9696072936058044, - "learning_rate": 6.630031189662855e-06, - "loss": 0.0642, - "step": 52450 - }, - { - "epoch": 3.895737412743205, - "grad_norm": 2.1969690322875977, - "learning_rate": 6.6255755235407695e-06, - "loss": 0.042, - "step": 52460 - }, - { - "epoch": 3.8964800237635524, - "grad_norm": 1.8756181001663208, - "learning_rate": 6.6211198574186845e-06, - "loss": 0.0537, - "step": 52470 - }, - { - "epoch": 3.8972226347839003, - "grad_norm": 0.8761207461357117, - "learning_rate": 6.616664191296599e-06, - "loss": 0.0534, - "step": 52480 - }, - { - "epoch": 3.8979652458042477, - "grad_norm": 1.1076685190200806, - "learning_rate": 6.612208525174514e-06, - "loss": 0.0536, - "step": 52490 - }, - { - "epoch": 3.898707856824595, - "grad_norm": 0.5765098333358765, - "learning_rate": 6.607752859052429e-06, - "loss": 0.0318, - "step": 52500 - }, - { - "epoch": 3.899450467844943, - "grad_norm": 1.015160322189331, - "learning_rate": 6.603297192930344e-06, - "loss": 0.0931, - "step": 52510 - }, - { - "epoch": 3.90019307886529, - "grad_norm": 1.242473840713501, - "learning_rate": 6.598841526808258e-06, - "loss": 0.0466, - "step": 52520 - }, - { - "epoch": 3.900935689885638, - "grad_norm": 0.7415289878845215, - "learning_rate": 6.594385860686173e-06, - "loss": 0.0643, - "step": 52530 - }, - { - "epoch": 3.9016783009059854, - "grad_norm": 1.0019993782043457, - "learning_rate": 6.589930194564087e-06, - "loss": 0.0633, - "step": 52540 - }, - { - "epoch": 3.902420911926333, - "grad_norm": 3.3935482501983643, - "learning_rate": 6.585474528442002e-06, - "loss": 0.071, - "step": 52550 - }, - { - "epoch": 3.903163522946681, - "grad_norm": 2.050471067428589, - "learning_rate": 6.581018862319917e-06, - "loss": 0.0605, - "step": 52560 - }, - { - "epoch": 3.9039061339670282, - "grad_norm": 1.7919012308120728, - "learning_rate": 6.576563196197832e-06, - "loss": 0.0388, - "step": 52570 - }, - { - "epoch": 3.9046487449873757, - "grad_norm": 2.5567381381988525, - "learning_rate": 6.572107530075747e-06, - "loss": 0.072, - "step": 52580 - }, - { - "epoch": 3.905391356007723, - "grad_norm": 1.0397082567214966, - "learning_rate": 6.567651863953661e-06, - "loss": 0.0877, - "step": 52590 - }, - { - "epoch": 3.9061339670280706, - "grad_norm": 0.6068091988563538, - "learning_rate": 6.563196197831576e-06, - "loss": 0.0714, - "step": 52600 - }, - { - "epoch": 3.9068765780484185, - "grad_norm": 1.3227723836898804, - "learning_rate": 6.55874053170949e-06, - "loss": 0.0669, - "step": 52610 - }, - { - "epoch": 3.907619189068766, - "grad_norm": 1.6749495267868042, - "learning_rate": 6.554284865587406e-06, - "loss": 0.0689, - "step": 52620 - }, - { - "epoch": 3.9083618000891134, - "grad_norm": 1.0209654569625854, - "learning_rate": 6.54982919946532e-06, - "loss": 0.0847, - "step": 52630 - }, - { - "epoch": 3.909104411109461, - "grad_norm": 1.9074591398239136, - "learning_rate": 6.545373533343235e-06, - "loss": 0.0379, - "step": 52640 - }, - { - "epoch": 3.9098470221298083, - "grad_norm": 0.3264058828353882, - "learning_rate": 6.540917867221149e-06, - "loss": 0.0683, - "step": 52650 - }, - { - "epoch": 3.910589633150156, - "grad_norm": 1.0117005109786987, - "learning_rate": 6.536462201099064e-06, - "loss": 0.0578, - "step": 52660 - }, - { - "epoch": 3.9113322441705036, - "grad_norm": 2.336249589920044, - "learning_rate": 6.532006534976979e-06, - "loss": 0.0476, - "step": 52670 - }, - { - "epoch": 3.912074855190851, - "grad_norm": 0.9599561095237732, - "learning_rate": 6.527550868854894e-06, - "loss": 0.0638, - "step": 52680 - }, - { - "epoch": 3.9128174662111985, - "grad_norm": 1.9813097715377808, - "learning_rate": 6.5230952027328085e-06, - "loss": 0.0477, - "step": 52690 - }, - { - "epoch": 3.913560077231546, - "grad_norm": 1.9416104555130005, - "learning_rate": 6.5186395366107235e-06, - "loss": 0.1075, - "step": 52700 - }, - { - "epoch": 3.914302688251894, - "grad_norm": 2.747821569442749, - "learning_rate": 6.5141838704886384e-06, - "loss": 0.0997, - "step": 52710 - }, - { - "epoch": 3.9150452992722413, - "grad_norm": 0.8139704465866089, - "learning_rate": 6.509728204366553e-06, - "loss": 0.0475, - "step": 52720 - }, - { - "epoch": 3.9157879102925888, - "grad_norm": 3.535797357559204, - "learning_rate": 6.5052725382444684e-06, - "loss": 0.074, - "step": 52730 - }, - { - "epoch": 3.916530521312936, - "grad_norm": 1.303336501121521, - "learning_rate": 6.500816872122383e-06, - "loss": 0.0383, - "step": 52740 - }, - { - "epoch": 3.9172731323332837, - "grad_norm": 1.1751950979232788, - "learning_rate": 6.496361206000298e-06, - "loss": 0.0735, - "step": 52750 - }, - { - "epoch": 3.9180157433536316, - "grad_norm": 0.4327644407749176, - "learning_rate": 6.491905539878212e-06, - "loss": 0.0379, - "step": 52760 - }, - { - "epoch": 3.918758354373979, - "grad_norm": 1.1452745199203491, - "learning_rate": 6.487449873756127e-06, - "loss": 0.0403, - "step": 52770 - }, - { - "epoch": 3.9195009653943265, - "grad_norm": 0.25140857696533203, - "learning_rate": 6.482994207634041e-06, - "loss": 0.0589, - "step": 52780 - }, - { - "epoch": 3.920243576414674, - "grad_norm": 2.8998043537139893, - "learning_rate": 6.478538541511957e-06, - "loss": 0.0906, - "step": 52790 - }, - { - "epoch": 3.9209861874350214, - "grad_norm": 0.8305198550224304, - "learning_rate": 6.474082875389871e-06, - "loss": 0.0389, - "step": 52800 - }, - { - "epoch": 3.9217287984553693, - "grad_norm": 1.9127947092056274, - "learning_rate": 6.469627209267786e-06, - "loss": 0.0678, - "step": 52810 - }, - { - "epoch": 3.9224714094757167, - "grad_norm": 3.7456071376800537, - "learning_rate": 6.4651715431457e-06, - "loss": 0.0809, - "step": 52820 - }, - { - "epoch": 3.923214020496064, - "grad_norm": 2.8850257396698, - "learning_rate": 6.460715877023615e-06, - "loss": 0.0541, - "step": 52830 - }, - { - "epoch": 3.9239566315164116, - "grad_norm": 0.6639874577522278, - "learning_rate": 6.45626021090153e-06, - "loss": 0.0512, - "step": 52840 - }, - { - "epoch": 3.924699242536759, - "grad_norm": 1.1795817613601685, - "learning_rate": 6.451804544779445e-06, - "loss": 0.0508, - "step": 52850 - }, - { - "epoch": 3.925441853557107, - "grad_norm": 0.710468590259552, - "learning_rate": 6.44734887865736e-06, - "loss": 0.0463, - "step": 52860 - }, - { - "epoch": 3.9261844645774544, - "grad_norm": 1.7391959428787231, - "learning_rate": 6.442893212535274e-06, - "loss": 0.0873, - "step": 52870 - }, - { - "epoch": 3.926927075597802, - "grad_norm": 0.5861713290214539, - "learning_rate": 6.438437546413189e-06, - "loss": 0.0452, - "step": 52880 - }, - { - "epoch": 3.9276696866181493, - "grad_norm": 0.8153517842292786, - "learning_rate": 6.433981880291103e-06, - "loss": 0.0403, - "step": 52890 - }, - { - "epoch": 3.9284122976384968, - "grad_norm": 3.0007896423339844, - "learning_rate": 6.429526214169018e-06, - "loss": 0.0681, - "step": 52900 - }, - { - "epoch": 3.9291549086588446, - "grad_norm": 2.487295627593994, - "learning_rate": 6.425070548046933e-06, - "loss": 0.0734, - "step": 52910 - }, - { - "epoch": 3.929897519679192, - "grad_norm": 1.7912089824676514, - "learning_rate": 6.420614881924848e-06, - "loss": 0.0914, - "step": 52920 - }, - { - "epoch": 3.9306401306995395, - "grad_norm": 3.3400092124938965, - "learning_rate": 6.416159215802762e-06, - "loss": 0.0571, - "step": 52930 - }, - { - "epoch": 3.931382741719887, - "grad_norm": 2.8643431663513184, - "learning_rate": 6.411703549680677e-06, - "loss": 0.0645, - "step": 52940 - }, - { - "epoch": 3.9321253527402344, - "grad_norm": 0.3890113830566406, - "learning_rate": 6.4072478835585915e-06, - "loss": 0.0606, - "step": 52950 - }, - { - "epoch": 3.9328679637605823, - "grad_norm": 0.5684772729873657, - "learning_rate": 6.4027922174365065e-06, - "loss": 0.0604, - "step": 52960 - }, - { - "epoch": 3.93361057478093, - "grad_norm": 2.360518217086792, - "learning_rate": 6.398336551314422e-06, - "loss": 0.063, - "step": 52970 - }, - { - "epoch": 3.9343531858012772, - "grad_norm": 1.0846983194351196, - "learning_rate": 6.3938808851923365e-06, - "loss": 0.0421, - "step": 52980 - }, - { - "epoch": 3.9350957968216247, - "grad_norm": 0.8970616459846497, - "learning_rate": 6.3894252190702515e-06, - "loss": 0.0558, - "step": 52990 - }, - { - "epoch": 3.935838407841972, - "grad_norm": 1.57839834690094, - "learning_rate": 6.384969552948166e-06, - "loss": 0.0796, - "step": 53000 - }, - { - "epoch": 3.93658101886232, - "grad_norm": 1.5346392393112183, - "learning_rate": 6.380513886826081e-06, - "loss": 0.0477, - "step": 53010 - }, - { - "epoch": 3.9373236298826675, - "grad_norm": 1.0439153909683228, - "learning_rate": 6.376058220703996e-06, - "loss": 0.0641, - "step": 53020 - }, - { - "epoch": 3.938066240903015, - "grad_norm": 0.8135676383972168, - "learning_rate": 6.371602554581911e-06, - "loss": 0.0671, - "step": 53030 - }, - { - "epoch": 3.9388088519233624, - "grad_norm": 2.3363993167877197, - "learning_rate": 6.367146888459825e-06, - "loss": 0.0387, - "step": 53040 - }, - { - "epoch": 3.93955146294371, - "grad_norm": 1.2489509582519531, - "learning_rate": 6.36269122233774e-06, - "loss": 0.0375, - "step": 53050 - }, - { - "epoch": 3.9402940739640577, - "grad_norm": 1.8481662273406982, - "learning_rate": 6.358235556215654e-06, - "loss": 0.0492, - "step": 53060 - }, - { - "epoch": 3.941036684984405, - "grad_norm": 2.6093268394470215, - "learning_rate": 6.353779890093569e-06, - "loss": 0.0814, - "step": 53070 - }, - { - "epoch": 3.9417792960047526, - "grad_norm": 1.122709035873413, - "learning_rate": 6.349324223971484e-06, - "loss": 0.0497, - "step": 53080 - }, - { - "epoch": 3.9425219070251005, - "grad_norm": 0.5797234177589417, - "learning_rate": 6.344868557849399e-06, - "loss": 0.0727, - "step": 53090 - }, - { - "epoch": 3.9432645180454475, - "grad_norm": 0.6840558648109436, - "learning_rate": 6.340412891727314e-06, - "loss": 0.0617, - "step": 53100 - }, - { - "epoch": 3.9440071290657954, - "grad_norm": 0.6512880325317383, - "learning_rate": 6.335957225605228e-06, - "loss": 0.0461, - "step": 53110 - }, - { - "epoch": 3.944749740086143, - "grad_norm": 0.3409847319126129, - "learning_rate": 6.331501559483143e-06, - "loss": 0.0355, - "step": 53120 - }, - { - "epoch": 3.9454923511064903, - "grad_norm": 1.1444979906082153, - "learning_rate": 6.327045893361057e-06, - "loss": 0.0465, - "step": 53130 - }, - { - "epoch": 3.946234962126838, - "grad_norm": 2.209327459335327, - "learning_rate": 6.322590227238973e-06, - "loss": 0.0566, - "step": 53140 - }, - { - "epoch": 3.9469775731471857, - "grad_norm": 0.7463454604148865, - "learning_rate": 6.318134561116887e-06, - "loss": 0.0632, - "step": 53150 - }, - { - "epoch": 3.947720184167533, - "grad_norm": 0.4572658836841583, - "learning_rate": 6.313678894994802e-06, - "loss": 0.0592, - "step": 53160 - }, - { - "epoch": 3.9484627951878806, - "grad_norm": 0.3454363942146301, - "learning_rate": 6.309223228872716e-06, - "loss": 0.0801, - "step": 53170 - }, - { - "epoch": 3.949205406208228, - "grad_norm": 1.0574936866760254, - "learning_rate": 6.304767562750631e-06, - "loss": 0.0735, - "step": 53180 - }, - { - "epoch": 3.949948017228576, - "grad_norm": 1.7936334609985352, - "learning_rate": 6.3003118966285455e-06, - "loss": 0.0525, - "step": 53190 - }, - { - "epoch": 3.9506906282489234, - "grad_norm": 1.0691609382629395, - "learning_rate": 6.295856230506461e-06, - "loss": 0.0766, - "step": 53200 - }, - { - "epoch": 3.951433239269271, - "grad_norm": 2.9121530055999756, - "learning_rate": 6.2914005643843755e-06, - "loss": 0.0436, - "step": 53210 - }, - { - "epoch": 3.9521758502896183, - "grad_norm": 1.2134883403778076, - "learning_rate": 6.2869448982622904e-06, - "loss": 0.0619, - "step": 53220 - }, - { - "epoch": 3.9529184613099657, - "grad_norm": 3.7583858966827393, - "learning_rate": 6.282489232140205e-06, - "loss": 0.0716, - "step": 53230 - }, - { - "epoch": 3.9536610723303136, - "grad_norm": 0.6128131151199341, - "learning_rate": 6.27803356601812e-06, - "loss": 0.0474, - "step": 53240 - }, - { - "epoch": 3.954403683350661, - "grad_norm": 2.04298734664917, - "learning_rate": 6.273577899896035e-06, - "loss": 0.0499, - "step": 53250 - }, - { - "epoch": 3.9551462943710085, - "grad_norm": 2.507197618484497, - "learning_rate": 6.26912223377395e-06, - "loss": 0.0556, - "step": 53260 - }, - { - "epoch": 3.955888905391356, - "grad_norm": 2.27921986579895, - "learning_rate": 6.2646665676518646e-06, - "loss": 0.0311, - "step": 53270 - }, - { - "epoch": 3.9566315164117034, - "grad_norm": 1.2935408353805542, - "learning_rate": 6.260210901529779e-06, - "loss": 0.0436, - "step": 53280 - }, - { - "epoch": 3.9573741274320513, - "grad_norm": 1.6734613180160522, - "learning_rate": 6.255755235407694e-06, - "loss": 0.0638, - "step": 53290 - }, - { - "epoch": 3.9581167384523988, - "grad_norm": 2.51885724067688, - "learning_rate": 6.251299569285608e-06, - "loss": 0.063, - "step": 53300 - }, - { - "epoch": 3.958859349472746, - "grad_norm": 1.2505862712860107, - "learning_rate": 6.246843903163523e-06, - "loss": 0.0401, - "step": 53310 - }, - { - "epoch": 3.9596019604930937, - "grad_norm": 0.8314433097839355, - "learning_rate": 6.242388237041438e-06, - "loss": 0.0514, - "step": 53320 - }, - { - "epoch": 3.960344571513441, - "grad_norm": 1.3497314453125, - "learning_rate": 6.237932570919353e-06, - "loss": 0.0438, - "step": 53330 - }, - { - "epoch": 3.961087182533789, - "grad_norm": 0.9097846746444702, - "learning_rate": 6.233476904797267e-06, - "loss": 0.0561, - "step": 53340 - }, - { - "epoch": 3.9618297935541364, - "grad_norm": 2.9179575443267822, - "learning_rate": 6.229021238675182e-06, - "loss": 0.0492, - "step": 53350 - }, - { - "epoch": 3.962572404574484, - "grad_norm": 1.4877798557281494, - "learning_rate": 6.224565572553096e-06, - "loss": 0.0712, - "step": 53360 - }, - { - "epoch": 3.9633150155948313, - "grad_norm": 0.9369436502456665, - "learning_rate": 6.220109906431012e-06, - "loss": 0.075, - "step": 53370 - }, - { - "epoch": 3.964057626615179, - "grad_norm": 1.126839280128479, - "learning_rate": 6.215654240308927e-06, - "loss": 0.0503, - "step": 53380 - }, - { - "epoch": 3.9648002376355267, - "grad_norm": 1.7786030769348145, - "learning_rate": 6.211198574186841e-06, - "loss": 0.0628, - "step": 53390 - }, - { - "epoch": 3.965542848655874, - "grad_norm": 1.3987665176391602, - "learning_rate": 6.206742908064756e-06, - "loss": 0.0942, - "step": 53400 - }, - { - "epoch": 3.9662854596762216, - "grad_norm": 0.4520890712738037, - "learning_rate": 6.20228724194267e-06, - "loss": 0.0451, - "step": 53410 - }, - { - "epoch": 3.967028070696569, - "grad_norm": 1.6644339561462402, - "learning_rate": 6.197831575820585e-06, - "loss": 0.0483, - "step": 53420 - }, - { - "epoch": 3.9677706817169165, - "grad_norm": 1.37549889087677, - "learning_rate": 6.1933759096985e-06, - "loss": 0.0521, - "step": 53430 - }, - { - "epoch": 3.9685132927372644, - "grad_norm": 2.7035961151123047, - "learning_rate": 6.188920243576415e-06, - "loss": 0.0649, - "step": 53440 - }, - { - "epoch": 3.969255903757612, - "grad_norm": 1.5665085315704346, - "learning_rate": 6.184464577454329e-06, - "loss": 0.0313, - "step": 53450 - }, - { - "epoch": 3.9699985147779593, - "grad_norm": 1.6840803623199463, - "learning_rate": 6.180008911332244e-06, - "loss": 0.0966, - "step": 53460 - }, - { - "epoch": 3.9707411257983067, - "grad_norm": 2.5111865997314453, - "learning_rate": 6.1755532452101585e-06, - "loss": 0.0306, - "step": 53470 - }, - { - "epoch": 3.971483736818654, - "grad_norm": 1.9524160623550415, - "learning_rate": 6.1710975790880735e-06, - "loss": 0.0649, - "step": 53480 - }, - { - "epoch": 3.972226347839002, - "grad_norm": 1.0792032480239868, - "learning_rate": 6.1666419129659885e-06, - "loss": 0.07, - "step": 53490 - }, - { - "epoch": 3.9729689588593495, - "grad_norm": 1.4523978233337402, - "learning_rate": 6.1621862468439035e-06, - "loss": 0.0678, - "step": 53500 - }, - { - "epoch": 3.973711569879697, - "grad_norm": 2.432481527328491, - "learning_rate": 6.1577305807218185e-06, - "loss": 0.0852, - "step": 53510 - }, - { - "epoch": 3.9744541809000444, - "grad_norm": 1.813653588294983, - "learning_rate": 6.153274914599733e-06, - "loss": 0.0837, - "step": 53520 - }, - { - "epoch": 3.975196791920392, - "grad_norm": 0.7046365141868591, - "learning_rate": 6.148819248477648e-06, - "loss": 0.056, - "step": 53530 - }, - { - "epoch": 3.9759394029407398, - "grad_norm": 2.6574409008026123, - "learning_rate": 6.144363582355562e-06, - "loss": 0.0592, - "step": 53540 - }, - { - "epoch": 3.9766820139610872, - "grad_norm": 2.38148832321167, - "learning_rate": 6.139907916233478e-06, - "loss": 0.0468, - "step": 53550 - }, - { - "epoch": 3.9774246249814347, - "grad_norm": 1.9013030529022217, - "learning_rate": 6.135452250111392e-06, - "loss": 0.0815, - "step": 53560 - }, - { - "epoch": 3.978167236001782, - "grad_norm": 0.9605785608291626, - "learning_rate": 6.130996583989307e-06, - "loss": 0.0733, - "step": 53570 - }, - { - "epoch": 3.9789098470221296, - "grad_norm": 0.8357670903205872, - "learning_rate": 6.126540917867221e-06, - "loss": 0.0657, - "step": 53580 - }, - { - "epoch": 3.9796524580424775, - "grad_norm": 2.5137290954589844, - "learning_rate": 6.122085251745136e-06, - "loss": 0.0539, - "step": 53590 - }, - { - "epoch": 3.980395069062825, - "grad_norm": 0.9419310688972473, - "learning_rate": 6.11762958562305e-06, - "loss": 0.0589, - "step": 53600 - }, - { - "epoch": 3.9811376800831724, - "grad_norm": 1.140599012374878, - "learning_rate": 6.113173919500966e-06, - "loss": 0.0427, - "step": 53610 - }, - { - "epoch": 3.98188029110352, - "grad_norm": 1.1350241899490356, - "learning_rate": 6.10871825337888e-06, - "loss": 0.0699, - "step": 53620 - }, - { - "epoch": 3.9826229021238673, - "grad_norm": 1.3582016229629517, - "learning_rate": 6.104262587256795e-06, - "loss": 0.0442, - "step": 53630 - }, - { - "epoch": 3.983365513144215, - "grad_norm": 1.5812486410140991, - "learning_rate": 6.09980692113471e-06, - "loss": 0.0749, - "step": 53640 - }, - { - "epoch": 3.9841081241645626, - "grad_norm": 0.9100233912467957, - "learning_rate": 6.095351255012624e-06, - "loss": 0.0737, - "step": 53650 - }, - { - "epoch": 3.98485073518491, - "grad_norm": 0.8678179383277893, - "learning_rate": 6.090895588890539e-06, - "loss": 0.0367, - "step": 53660 - }, - { - "epoch": 3.985593346205258, - "grad_norm": 2.191160202026367, - "learning_rate": 6.086439922768454e-06, - "loss": 0.0931, - "step": 53670 - }, - { - "epoch": 3.986335957225605, - "grad_norm": 1.9324291944503784, - "learning_rate": 6.081984256646369e-06, - "loss": 0.0731, - "step": 53680 - }, - { - "epoch": 3.987078568245953, - "grad_norm": 0.9478018879890442, - "learning_rate": 6.077528590524283e-06, - "loss": 0.0615, - "step": 53690 - }, - { - "epoch": 3.9878211792663003, - "grad_norm": 1.2666985988616943, - "learning_rate": 6.073072924402198e-06, - "loss": 0.0543, - "step": 53700 - }, - { - "epoch": 3.9885637902866478, - "grad_norm": 1.0880041122436523, - "learning_rate": 6.0686172582801125e-06, - "loss": 0.0707, - "step": 53710 - }, - { - "epoch": 3.9893064013069957, - "grad_norm": 0.5105621814727783, - "learning_rate": 6.064161592158028e-06, - "loss": 0.0686, - "step": 53720 - }, - { - "epoch": 3.990049012327343, - "grad_norm": 1.3909556865692139, - "learning_rate": 6.0597059260359424e-06, - "loss": 0.0491, - "step": 53730 - }, - { - "epoch": 3.9907916233476906, - "grad_norm": 3.0516223907470703, - "learning_rate": 6.0552502599138574e-06, - "loss": 0.0681, - "step": 53740 - }, - { - "epoch": 3.991534234368038, - "grad_norm": 0.9059837460517883, - "learning_rate": 6.050794593791772e-06, - "loss": 0.034, - "step": 53750 - }, - { - "epoch": 3.9922768453883855, - "grad_norm": 2.0982439517974854, - "learning_rate": 6.046338927669687e-06, - "loss": 0.0533, - "step": 53760 - }, - { - "epoch": 3.9930194564087333, - "grad_norm": 1.3619189262390137, - "learning_rate": 6.041883261547602e-06, - "loss": 0.053, - "step": 53770 - }, - { - "epoch": 3.993762067429081, - "grad_norm": 0.7172215580940247, - "learning_rate": 6.0374275954255166e-06, - "loss": 0.0728, - "step": 53780 - }, - { - "epoch": 3.9945046784494282, - "grad_norm": 1.9413235187530518, - "learning_rate": 6.0329719293034316e-06, - "loss": 0.0779, - "step": 53790 - }, - { - "epoch": 3.9952472894697757, - "grad_norm": 2.7654523849487305, - "learning_rate": 6.028516263181346e-06, - "loss": 0.0537, - "step": 53800 - }, - { - "epoch": 3.995989900490123, - "grad_norm": 1.1179438829421997, - "learning_rate": 6.024060597059261e-06, - "loss": 0.0586, - "step": 53810 - }, - { - "epoch": 3.996732511510471, - "grad_norm": 2.428178071975708, - "learning_rate": 6.019604930937175e-06, - "loss": 0.0714, - "step": 53820 - }, - { - "epoch": 3.9974751225308185, - "grad_norm": 1.2658203840255737, - "learning_rate": 6.01514926481509e-06, - "loss": 0.0462, - "step": 53830 - }, - { - "epoch": 3.998217733551166, - "grad_norm": 0.42739203572273254, - "learning_rate": 6.010693598693005e-06, - "loss": 0.0538, - "step": 53840 - }, - { - "epoch": 3.9989603445715134, - "grad_norm": 0.2762404978275299, - "learning_rate": 6.00623793257092e-06, - "loss": 0.0637, - "step": 53850 - }, - { - "epoch": 3.999702955591861, - "grad_norm": 0.41970357298851013, - "learning_rate": 6.001782266448834e-06, - "loss": 0.0443, - "step": 53860 - }, - { - "epoch": 4.0, - "eval_f1": 0.0, - "eval_loss": 0.05353143438696861, - "eval_runtime": 798.3212, - "eval_samples_per_second": 47.624, - "eval_steps_per_second": 2.977, - "step": 53864 - }, - { - "epoch": 4.000445566612209, - "grad_norm": 1.3657422065734863, - "learning_rate": 5.997326600326749e-06, - "loss": 0.0483, - "step": 53870 - }, - { - "epoch": 4.001188177632556, - "grad_norm": 1.7870548963546753, - "learning_rate": 5.992870934204663e-06, - "loss": 0.0755, - "step": 53880 - }, - { - "epoch": 4.001930788652904, - "grad_norm": 1.3454580307006836, - "learning_rate": 5.988415268082578e-06, - "loss": 0.0706, - "step": 53890 - }, - { - "epoch": 4.0026733996732515, - "grad_norm": 1.4166697263717651, - "learning_rate": 5.983959601960494e-06, - "loss": 0.0645, - "step": 53900 - }, - { - "epoch": 4.0034160106935985, - "grad_norm": 1.9602590799331665, - "learning_rate": 5.979503935838408e-06, - "loss": 0.0707, - "step": 53910 - }, - { - "epoch": 4.004158621713946, - "grad_norm": 3.5693228244781494, - "learning_rate": 5.975048269716323e-06, - "loss": 0.0585, - "step": 53920 - }, - { - "epoch": 4.004901232734293, - "grad_norm": 3.0163803100585938, - "learning_rate": 5.970592603594237e-06, - "loss": 0.0689, - "step": 53930 - }, - { - "epoch": 4.005643843754641, - "grad_norm": 0.6391358375549316, - "learning_rate": 5.966136937472152e-06, - "loss": 0.0442, - "step": 53940 - }, - { - "epoch": 4.006386454774989, - "grad_norm": 0.6875801086425781, - "learning_rate": 5.961681271350066e-06, - "loss": 0.0441, - "step": 53950 - }, - { - "epoch": 4.007129065795336, - "grad_norm": 2.397939443588257, - "learning_rate": 5.957225605227982e-06, - "loss": 0.067, - "step": 53960 - }, - { - "epoch": 4.007871676815684, - "grad_norm": 1.6307581663131714, - "learning_rate": 5.952769939105896e-06, - "loss": 0.0316, - "step": 53970 - }, - { - "epoch": 4.008614287836031, - "grad_norm": 1.195626974105835, - "learning_rate": 5.948314272983811e-06, - "loss": 0.0583, - "step": 53980 - }, - { - "epoch": 4.009356898856379, - "grad_norm": 2.5990028381347656, - "learning_rate": 5.9438586068617255e-06, - "loss": 0.079, - "step": 53990 - }, - { - "epoch": 4.010099509876727, - "grad_norm": 1.327807068824768, - "learning_rate": 5.9394029407396405e-06, - "loss": 0.0702, - "step": 54000 - }, - { - "epoch": 4.010842120897074, - "grad_norm": 2.4777891635894775, - "learning_rate": 5.9349472746175555e-06, - "loss": 0.049, - "step": 54010 - }, - { - "epoch": 4.011584731917422, - "grad_norm": 1.1965991258621216, - "learning_rate": 5.9304916084954705e-06, - "loss": 0.0718, - "step": 54020 - }, - { - "epoch": 4.012327342937769, - "grad_norm": 0.5554696321487427, - "learning_rate": 5.9260359423733855e-06, - "loss": 0.0433, - "step": 54030 - }, - { - "epoch": 4.013069953958117, - "grad_norm": 1.6719565391540527, - "learning_rate": 5.9215802762513e-06, - "loss": 0.074, - "step": 54040 - }, - { - "epoch": 4.013812564978465, - "grad_norm": 0.40224915742874146, - "learning_rate": 5.917124610129215e-06, - "loss": 0.0552, - "step": 54050 - }, - { - "epoch": 4.014555175998812, - "grad_norm": 0.9646987915039062, - "learning_rate": 5.912668944007129e-06, - "loss": 0.0513, - "step": 54060 - }, - { - "epoch": 4.0152977870191595, - "grad_norm": 2.081406593322754, - "learning_rate": 5.908213277885045e-06, - "loss": 0.0644, - "step": 54070 - }, - { - "epoch": 4.0160403980395065, - "grad_norm": 1.1674538850784302, - "learning_rate": 5.903757611762959e-06, - "loss": 0.048, - "step": 54080 - }, - { - "epoch": 4.016783009059854, - "grad_norm": 0.48358213901519775, - "learning_rate": 5.899301945640874e-06, - "loss": 0.0558, - "step": 54090 - }, - { - "epoch": 4.017525620080202, - "grad_norm": 0.6902279257774353, - "learning_rate": 5.894846279518788e-06, - "loss": 0.0671, - "step": 54100 - }, - { - "epoch": 4.018268231100549, - "grad_norm": 1.8209834098815918, - "learning_rate": 5.890390613396703e-06, - "loss": 0.0673, - "step": 54110 - }, - { - "epoch": 4.019010842120897, - "grad_norm": 0.6729385256767273, - "learning_rate": 5.885934947274617e-06, - "loss": 0.0378, - "step": 54120 - }, - { - "epoch": 4.019753453141244, - "grad_norm": 0.9727711081504822, - "learning_rate": 5.881479281152533e-06, - "loss": 0.0405, - "step": 54130 - }, - { - "epoch": 4.020496064161592, - "grad_norm": 0.18303215503692627, - "learning_rate": 5.877023615030447e-06, - "loss": 0.0852, - "step": 54140 - }, - { - "epoch": 4.02123867518194, - "grad_norm": 1.9007530212402344, - "learning_rate": 5.872567948908362e-06, - "loss": 0.0734, - "step": 54150 - }, - { - "epoch": 4.021981286202287, - "grad_norm": 1.277612566947937, - "learning_rate": 5.868112282786277e-06, - "loss": 0.055, - "step": 54160 - }, - { - "epoch": 4.022723897222635, - "grad_norm": 0.40179601311683655, - "learning_rate": 5.863656616664191e-06, - "loss": 0.0591, - "step": 54170 - }, - { - "epoch": 4.023466508242982, - "grad_norm": 0.6786103248596191, - "learning_rate": 5.859200950542106e-06, - "loss": 0.0633, - "step": 54180 - }, - { - "epoch": 4.02420911926333, - "grad_norm": 2.203526496887207, - "learning_rate": 5.854745284420021e-06, - "loss": 0.0759, - "step": 54190 - }, - { - "epoch": 4.024951730283678, - "grad_norm": 1.4047437906265259, - "learning_rate": 5.850289618297936e-06, - "loss": 0.0761, - "step": 54200 - }, - { - "epoch": 4.025694341304025, - "grad_norm": 1.809567928314209, - "learning_rate": 5.84583395217585e-06, - "loss": 0.0656, - "step": 54210 - }, - { - "epoch": 4.026436952324373, - "grad_norm": 1.946758508682251, - "learning_rate": 5.841378286053765e-06, - "loss": 0.0509, - "step": 54220 - }, - { - "epoch": 4.02717956334472, - "grad_norm": 1.9087995290756226, - "learning_rate": 5.8369226199316795e-06, - "loss": 0.0697, - "step": 54230 - }, - { - "epoch": 4.0279221743650675, - "grad_norm": 0.6901232600212097, - "learning_rate": 5.8324669538095944e-06, - "loss": 0.0574, - "step": 54240 - }, - { - "epoch": 4.028664785385415, - "grad_norm": 1.0720127820968628, - "learning_rate": 5.8280112876875094e-06, - "loss": 0.0663, - "step": 54250 - }, - { - "epoch": 4.029407396405762, - "grad_norm": 1.3058216571807861, - "learning_rate": 5.8235556215654244e-06, - "loss": 0.046, - "step": 54260 - }, - { - "epoch": 4.03015000742611, - "grad_norm": 0.79048752784729, - "learning_rate": 5.819099955443339e-06, - "loss": 0.0757, - "step": 54270 - }, - { - "epoch": 4.030892618446458, - "grad_norm": 2.9405195713043213, - "learning_rate": 5.814644289321254e-06, - "loss": 0.0786, - "step": 54280 - }, - { - "epoch": 4.031635229466805, - "grad_norm": 2.264370918273926, - "learning_rate": 5.8101886231991686e-06, - "loss": 0.0659, - "step": 54290 - }, - { - "epoch": 4.032377840487153, - "grad_norm": 0.7332669496536255, - "learning_rate": 5.805732957077083e-06, - "loss": 0.0595, - "step": 54300 - }, - { - "epoch": 4.0331204515075, - "grad_norm": 2.585928440093994, - "learning_rate": 5.8012772909549986e-06, - "loss": 0.0937, - "step": 54310 - }, - { - "epoch": 4.033863062527848, - "grad_norm": 0.5826427340507507, - "learning_rate": 5.796821624832913e-06, - "loss": 0.0509, - "step": 54320 - }, - { - "epoch": 4.034605673548196, - "grad_norm": 1.9038110971450806, - "learning_rate": 5.792365958710828e-06, - "loss": 0.0811, - "step": 54330 - }, - { - "epoch": 4.035348284568543, - "grad_norm": 0.9601470232009888, - "learning_rate": 5.787910292588742e-06, - "loss": 0.0506, - "step": 54340 - }, - { - "epoch": 4.036090895588891, - "grad_norm": 0.9888731241226196, - "learning_rate": 5.783454626466657e-06, - "loss": 0.0703, - "step": 54350 - }, - { - "epoch": 4.036833506609238, - "grad_norm": 0.9209118485450745, - "learning_rate": 5.778998960344572e-06, - "loss": 0.0736, - "step": 54360 - }, - { - "epoch": 4.037576117629586, - "grad_norm": 2.1321849822998047, - "learning_rate": 5.774543294222487e-06, - "loss": 0.058, - "step": 54370 - }, - { - "epoch": 4.038318728649934, - "grad_norm": 0.4780378043651581, - "learning_rate": 5.770087628100401e-06, - "loss": 0.0451, - "step": 54380 - }, - { - "epoch": 4.039061339670281, - "grad_norm": 0.6175082921981812, - "learning_rate": 5.765631961978316e-06, - "loss": 0.0457, - "step": 54390 - }, - { - "epoch": 4.0398039506906285, - "grad_norm": 0.8569179773330688, - "learning_rate": 5.76117629585623e-06, - "loss": 0.063, - "step": 54400 - }, - { - "epoch": 4.0405465617109755, - "grad_norm": 0.5725436210632324, - "learning_rate": 5.756720629734145e-06, - "loss": 0.0555, - "step": 54410 - }, - { - "epoch": 4.041289172731323, - "grad_norm": 1.1730387210845947, - "learning_rate": 5.752264963612061e-06, - "loss": 0.0586, - "step": 54420 - }, - { - "epoch": 4.042031783751671, - "grad_norm": 1.527571678161621, - "learning_rate": 5.747809297489975e-06, - "loss": 0.0381, - "step": 54430 - }, - { - "epoch": 4.042774394772018, - "grad_norm": 1.1510217189788818, - "learning_rate": 5.74335363136789e-06, - "loss": 0.0494, - "step": 54440 - }, - { - "epoch": 4.043517005792366, - "grad_norm": 2.92785382270813, - "learning_rate": 5.738897965245804e-06, - "loss": 0.0484, - "step": 54450 - }, - { - "epoch": 4.044259616812713, - "grad_norm": 1.290939211845398, - "learning_rate": 5.734442299123719e-06, - "loss": 0.0585, - "step": 54460 - }, - { - "epoch": 4.045002227833061, - "grad_norm": 1.4122437238693237, - "learning_rate": 5.729986633001633e-06, - "loss": 0.0517, - "step": 54470 - }, - { - "epoch": 4.045744838853409, - "grad_norm": 1.2548693418502808, - "learning_rate": 5.725530966879549e-06, - "loss": 0.0642, - "step": 54480 - }, - { - "epoch": 4.046487449873756, - "grad_norm": 2.637096405029297, - "learning_rate": 5.721075300757463e-06, - "loss": 0.0761, - "step": 54490 - }, - { - "epoch": 4.047230060894104, - "grad_norm": 1.5033966302871704, - "learning_rate": 5.716619634635378e-06, - "loss": 0.0705, - "step": 54500 - }, - { - "epoch": 4.047972671914451, - "grad_norm": 0.40369749069213867, - "learning_rate": 5.7121639685132925e-06, - "loss": 0.0678, - "step": 54510 - }, - { - "epoch": 4.048715282934799, - "grad_norm": 2.240159511566162, - "learning_rate": 5.7077083023912075e-06, - "loss": 0.0721, - "step": 54520 - }, - { - "epoch": 4.049457893955147, - "grad_norm": 2.353022336959839, - "learning_rate": 5.703252636269122e-06, - "loss": 0.0597, - "step": 54530 - }, - { - "epoch": 4.050200504975494, - "grad_norm": 0.9646291732788086, - "learning_rate": 5.6987969701470375e-06, - "loss": 0.049, - "step": 54540 - }, - { - "epoch": 4.050943115995842, - "grad_norm": 2.1946423053741455, - "learning_rate": 5.6943413040249525e-06, - "loss": 0.062, - "step": 54550 - }, - { - "epoch": 4.051685727016189, - "grad_norm": 2.869677782058716, - "learning_rate": 5.689885637902867e-06, - "loss": 0.0553, - "step": 54560 - }, - { - "epoch": 4.0524283380365365, - "grad_norm": 3.2481913566589355, - "learning_rate": 5.685429971780782e-06, - "loss": 0.0373, - "step": 54570 - }, - { - "epoch": 4.053170949056884, - "grad_norm": 1.6126598119735718, - "learning_rate": 5.680974305658696e-06, - "loss": 0.0596, - "step": 54580 - }, - { - "epoch": 4.053913560077231, - "grad_norm": 1.128563404083252, - "learning_rate": 5.676518639536611e-06, - "loss": 0.0334, - "step": 54590 - }, - { - "epoch": 4.054656171097579, - "grad_norm": 2.0686044692993164, - "learning_rate": 5.672062973414526e-06, - "loss": 0.0672, - "step": 54600 - }, - { - "epoch": 4.055398782117926, - "grad_norm": 0.4722823202610016, - "learning_rate": 5.667607307292441e-06, - "loss": 0.0368, - "step": 54610 - }, - { - "epoch": 4.056141393138274, - "grad_norm": 1.7301435470581055, - "learning_rate": 5.663151641170355e-06, - "loss": 0.0736, - "step": 54620 - }, - { - "epoch": 4.056884004158622, - "grad_norm": 0.7229349613189697, - "learning_rate": 5.65869597504827e-06, - "loss": 0.0901, - "step": 54630 - }, - { - "epoch": 4.057626615178969, - "grad_norm": 2.0059196949005127, - "learning_rate": 5.654240308926184e-06, - "loss": 0.0608, - "step": 54640 - }, - { - "epoch": 4.058369226199317, - "grad_norm": 0.5810075402259827, - "learning_rate": 5.649784642804099e-06, - "loss": 0.0438, - "step": 54650 - }, - { - "epoch": 4.059111837219664, - "grad_norm": 1.114890456199646, - "learning_rate": 5.645328976682014e-06, - "loss": 0.0457, - "step": 54660 - }, - { - "epoch": 4.059854448240012, - "grad_norm": 1.2894550561904907, - "learning_rate": 5.640873310559929e-06, - "loss": 0.0744, - "step": 54670 - }, - { - "epoch": 4.06059705926036, - "grad_norm": 0.7137770652770996, - "learning_rate": 5.636417644437843e-06, - "loss": 0.0785, - "step": 54680 - }, - { - "epoch": 4.061339670280707, - "grad_norm": 2.466371774673462, - "learning_rate": 5.631961978315758e-06, - "loss": 0.0536, - "step": 54690 - }, - { - "epoch": 4.062082281301055, - "grad_norm": 0.39749160408973694, - "learning_rate": 5.627506312193673e-06, - "loss": 0.0582, - "step": 54700 - }, - { - "epoch": 4.062824892321402, - "grad_norm": 1.1517406702041626, - "learning_rate": 5.623050646071588e-06, - "loss": 0.0545, - "step": 54710 - }, - { - "epoch": 4.0635675033417495, - "grad_norm": 0.7695569396018982, - "learning_rate": 5.618594979949503e-06, - "loss": 0.0449, - "step": 54720 - }, - { - "epoch": 4.064310114362097, - "grad_norm": 0.4918759763240814, - "learning_rate": 5.614139313827417e-06, - "loss": 0.0566, - "step": 54730 - }, - { - "epoch": 4.065052725382444, - "grad_norm": 1.6468467712402344, - "learning_rate": 5.609683647705332e-06, - "loss": 0.052, - "step": 54740 - }, - { - "epoch": 4.065795336402792, - "grad_norm": 1.0345146656036377, - "learning_rate": 5.6052279815832464e-06, - "loss": 0.0394, - "step": 54750 - }, - { - "epoch": 4.066537947423139, - "grad_norm": 3.2710659503936768, - "learning_rate": 5.6007723154611614e-06, - "loss": 0.071, - "step": 54760 - }, - { - "epoch": 4.067280558443487, - "grad_norm": 2.8637590408325195, - "learning_rate": 5.5963166493390764e-06, - "loss": 0.0476, - "step": 54770 - }, - { - "epoch": 4.068023169463835, - "grad_norm": 0.5226951837539673, - "learning_rate": 5.5918609832169914e-06, - "loss": 0.0442, - "step": 54780 - }, - { - "epoch": 4.068765780484182, - "grad_norm": 0.48805347084999084, - "learning_rate": 5.587405317094906e-06, - "loss": 0.044, - "step": 54790 - }, - { - "epoch": 4.06950839150453, - "grad_norm": 0.6936028599739075, - "learning_rate": 5.5829496509728206e-06, - "loss": 0.0593, - "step": 54800 - }, - { - "epoch": 4.070251002524877, - "grad_norm": 2.0860705375671387, - "learning_rate": 5.578493984850735e-06, - "loss": 0.0653, - "step": 54810 - }, - { - "epoch": 4.070993613545225, - "grad_norm": 1.1754940748214722, - "learning_rate": 5.57403831872865e-06, - "loss": 0.0628, - "step": 54820 - }, - { - "epoch": 4.071736224565573, - "grad_norm": 1.029878854751587, - "learning_rate": 5.5695826526065656e-06, - "loss": 0.0553, - "step": 54830 - }, - { - "epoch": 4.07247883558592, - "grad_norm": 0.6767436861991882, - "learning_rate": 5.56512698648448e-06, - "loss": 0.0671, - "step": 54840 - }, - { - "epoch": 4.073221446606268, - "grad_norm": 1.4418343305587769, - "learning_rate": 5.560671320362395e-06, - "loss": 0.0582, - "step": 54850 - }, - { - "epoch": 4.073964057626615, - "grad_norm": 0.44620373845100403, - "learning_rate": 5.556215654240309e-06, - "loss": 0.0845, - "step": 54860 - }, - { - "epoch": 4.074706668646963, - "grad_norm": 1.5117290019989014, - "learning_rate": 5.551759988118224e-06, - "loss": 0.0614, - "step": 54870 - }, - { - "epoch": 4.0754492796673105, - "grad_norm": 1.3769358396530151, - "learning_rate": 5.547304321996138e-06, - "loss": 0.0551, - "step": 54880 - }, - { - "epoch": 4.0761918906876575, - "grad_norm": 1.5998951196670532, - "learning_rate": 5.542848655874054e-06, - "loss": 0.0743, - "step": 54890 - }, - { - "epoch": 4.076934501708005, - "grad_norm": 1.0898070335388184, - "learning_rate": 5.538392989751968e-06, - "loss": 0.0667, - "step": 54900 - }, - { - "epoch": 4.077677112728353, - "grad_norm": 1.2157825231552124, - "learning_rate": 5.533937323629883e-06, - "loss": 0.0511, - "step": 54910 - }, - { - "epoch": 4.0784197237487, - "grad_norm": 2.0952835083007812, - "learning_rate": 5.529481657507797e-06, - "loss": 0.0447, - "step": 54920 - }, - { - "epoch": 4.079162334769048, - "grad_norm": 1.1729780435562134, - "learning_rate": 5.525025991385712e-06, - "loss": 0.1045, - "step": 54930 - }, - { - "epoch": 4.079904945789395, - "grad_norm": 1.0873030424118042, - "learning_rate": 5.520570325263626e-06, - "loss": 0.062, - "step": 54940 - }, - { - "epoch": 4.080647556809743, - "grad_norm": 0.5280413627624512, - "learning_rate": 5.516114659141542e-06, - "loss": 0.0579, - "step": 54950 - }, - { - "epoch": 4.081390167830091, - "grad_norm": 1.2786023616790771, - "learning_rate": 5.511658993019457e-06, - "loss": 0.0453, - "step": 54960 - }, - { - "epoch": 4.082132778850438, - "grad_norm": 1.6670242547988892, - "learning_rate": 5.507203326897371e-06, - "loss": 0.0718, - "step": 54970 - }, - { - "epoch": 4.082875389870786, - "grad_norm": 0.9172728657722473, - "learning_rate": 5.502747660775286e-06, - "loss": 0.0472, - "step": 54980 - }, - { - "epoch": 4.083618000891133, - "grad_norm": 0.3648128807544708, - "learning_rate": 5.4982919946532e-06, - "loss": 0.0824, - "step": 54990 - }, - { - "epoch": 4.084360611911481, - "grad_norm": 3.186699151992798, - "learning_rate": 5.493836328531115e-06, - "loss": 0.0656, - "step": 55000 - }, - { - "epoch": 4.085103222931829, - "grad_norm": 1.5202915668487549, - "learning_rate": 5.48938066240903e-06, - "loss": 0.0586, - "step": 55010 - }, - { - "epoch": 4.085845833952176, - "grad_norm": 1.0740153789520264, - "learning_rate": 5.484924996286945e-06, - "loss": 0.0612, - "step": 55020 - }, - { - "epoch": 4.086588444972524, - "grad_norm": 1.715743899345398, - "learning_rate": 5.4804693301648595e-06, - "loss": 0.0613, - "step": 55030 - }, - { - "epoch": 4.087331055992871, - "grad_norm": 2.118396520614624, - "learning_rate": 5.4760136640427745e-06, - "loss": 0.0698, - "step": 55040 - }, - { - "epoch": 4.0880736670132185, - "grad_norm": 0.9983229637145996, - "learning_rate": 5.471557997920689e-06, - "loss": 0.0509, - "step": 55050 - }, - { - "epoch": 4.088816278033566, - "grad_norm": 2.4664018154144287, - "learning_rate": 5.4671023317986045e-06, - "loss": 0.0678, - "step": 55060 - }, - { - "epoch": 4.089558889053913, - "grad_norm": 0.848099410533905, - "learning_rate": 5.462646665676519e-06, - "loss": 0.0499, - "step": 55070 - }, - { - "epoch": 4.090301500074261, - "grad_norm": 1.8434141874313354, - "learning_rate": 5.458190999554434e-06, - "loss": 0.0476, - "step": 55080 - }, - { - "epoch": 4.091044111094608, - "grad_norm": 1.2754813432693481, - "learning_rate": 5.453735333432349e-06, - "loss": 0.0586, - "step": 55090 - }, - { - "epoch": 4.091786722114956, - "grad_norm": 0.441261887550354, - "learning_rate": 5.449279667310263e-06, - "loss": 0.0583, - "step": 55100 - }, - { - "epoch": 4.092529333135304, - "grad_norm": 1.066699743270874, - "learning_rate": 5.444824001188178e-06, - "loss": 0.0658, - "step": 55110 - }, - { - "epoch": 4.093271944155651, - "grad_norm": 0.8867554664611816, - "learning_rate": 5.440368335066093e-06, - "loss": 0.0909, - "step": 55120 - }, - { - "epoch": 4.094014555175999, - "grad_norm": 0.7380247116088867, - "learning_rate": 5.435912668944008e-06, - "loss": 0.0538, - "step": 55130 - }, - { - "epoch": 4.094757166196346, - "grad_norm": 1.3226486444473267, - "learning_rate": 5.431457002821922e-06, - "loss": 0.0688, - "step": 55140 - }, - { - "epoch": 4.095499777216694, - "grad_norm": 1.1807446479797363, - "learning_rate": 5.427001336699837e-06, - "loss": 0.0498, - "step": 55150 - }, - { - "epoch": 4.096242388237042, - "grad_norm": 1.0485824346542358, - "learning_rate": 5.422545670577751e-06, - "loss": 0.0652, - "step": 55160 - }, - { - "epoch": 4.096984999257389, - "grad_norm": 0.35923972725868225, - "learning_rate": 5.418090004455666e-06, - "loss": 0.0483, - "step": 55170 - }, - { - "epoch": 4.097727610277737, - "grad_norm": 0.8954094648361206, - "learning_rate": 5.413634338333581e-06, - "loss": 0.0722, - "step": 55180 - }, - { - "epoch": 4.098470221298084, - "grad_norm": 2.760606288909912, - "learning_rate": 5.409178672211496e-06, - "loss": 0.0826, - "step": 55190 - }, - { - "epoch": 4.099212832318432, - "grad_norm": 1.2169206142425537, - "learning_rate": 5.40472300608941e-06, - "loss": 0.043, - "step": 55200 - }, - { - "epoch": 4.0999554433387795, - "grad_norm": 0.5677967667579651, - "learning_rate": 5.400267339967325e-06, - "loss": 0.0508, - "step": 55210 - }, - { - "epoch": 4.1006980543591265, - "grad_norm": 0.8861594200134277, - "learning_rate": 5.39581167384524e-06, - "loss": 0.0754, - "step": 55220 - }, - { - "epoch": 4.101440665379474, - "grad_norm": 1.1130675077438354, - "learning_rate": 5.391356007723154e-06, - "loss": 0.0487, - "step": 55230 - }, - { - "epoch": 4.102183276399821, - "grad_norm": 1.4723432064056396, - "learning_rate": 5.38690034160107e-06, - "loss": 0.0622, - "step": 55240 - }, - { - "epoch": 4.102925887420169, - "grad_norm": 2.4365248680114746, - "learning_rate": 5.382444675478984e-06, - "loss": 0.0429, - "step": 55250 - }, - { - "epoch": 4.103668498440517, - "grad_norm": 2.329158067703247, - "learning_rate": 5.377989009356899e-06, - "loss": 0.0546, - "step": 55260 - }, - { - "epoch": 4.104411109460864, - "grad_norm": 0.6031007170677185, - "learning_rate": 5.3735333432348134e-06, - "loss": 0.0434, - "step": 55270 - }, - { - "epoch": 4.105153720481212, - "grad_norm": 1.8432621955871582, - "learning_rate": 5.3690776771127284e-06, - "loss": 0.0504, - "step": 55280 - }, - { - "epoch": 4.105896331501559, - "grad_norm": 4.149932384490967, - "learning_rate": 5.364622010990643e-06, - "loss": 0.0487, - "step": 55290 - }, - { - "epoch": 4.106638942521907, - "grad_norm": 0.8511655926704407, - "learning_rate": 5.360166344868558e-06, - "loss": 0.0491, - "step": 55300 - }, - { - "epoch": 4.107381553542255, - "grad_norm": 0.9837514162063599, - "learning_rate": 5.3557106787464726e-06, - "loss": 0.0698, - "step": 55310 - }, - { - "epoch": 4.108124164562602, - "grad_norm": 3.7656993865966797, - "learning_rate": 5.3512550126243876e-06, - "loss": 0.0776, - "step": 55320 - }, - { - "epoch": 4.10886677558295, - "grad_norm": 3.2895989418029785, - "learning_rate": 5.346799346502302e-06, - "loss": 0.0607, - "step": 55330 - }, - { - "epoch": 4.109609386603297, - "grad_norm": 1.3208965063095093, - "learning_rate": 5.342343680380217e-06, - "loss": 0.0559, - "step": 55340 - }, - { - "epoch": 4.110351997623645, - "grad_norm": 0.915107250213623, - "learning_rate": 5.337888014258132e-06, - "loss": 0.0958, - "step": 55350 - }, - { - "epoch": 4.111094608643993, - "grad_norm": 3.8144876956939697, - "learning_rate": 5.333432348136047e-06, - "loss": 0.0531, - "step": 55360 - }, - { - "epoch": 4.11183721966434, - "grad_norm": 0.789252519607544, - "learning_rate": 5.328976682013962e-06, - "loss": 0.0589, - "step": 55370 - }, - { - "epoch": 4.1125798306846875, - "grad_norm": 0.5822809934616089, - "learning_rate": 5.324521015891876e-06, - "loss": 0.0394, - "step": 55380 - }, - { - "epoch": 4.1133224417050345, - "grad_norm": 1.0521644353866577, - "learning_rate": 5.320065349769791e-06, - "loss": 0.0813, - "step": 55390 - }, - { - "epoch": 4.114065052725382, - "grad_norm": 3.525383710861206, - "learning_rate": 5.315609683647705e-06, - "loss": 0.0696, - "step": 55400 - }, - { - "epoch": 4.11480766374573, - "grad_norm": 2.288709878921509, - "learning_rate": 5.311154017525621e-06, - "loss": 0.0517, - "step": 55410 - }, - { - "epoch": 4.115550274766077, - "grad_norm": 1.1933070421218872, - "learning_rate": 5.306698351403535e-06, - "loss": 0.0642, - "step": 55420 - }, - { - "epoch": 4.116292885786425, - "grad_norm": 0.5231961011886597, - "learning_rate": 5.30224268528145e-06, - "loss": 0.0373, - "step": 55430 - }, - { - "epoch": 4.117035496806773, - "grad_norm": 2.135310173034668, - "learning_rate": 5.297787019159364e-06, - "loss": 0.0701, - "step": 55440 - }, - { - "epoch": 4.11777810782712, - "grad_norm": 1.2622560262680054, - "learning_rate": 5.293331353037279e-06, - "loss": 0.0359, - "step": 55450 - }, - { - "epoch": 4.118520718847468, - "grad_norm": 0.9313594102859497, - "learning_rate": 5.288875686915193e-06, - "loss": 0.0639, - "step": 55460 - }, - { - "epoch": 4.119263329867815, - "grad_norm": 1.2040377855300903, - "learning_rate": 5.284420020793109e-06, - "loss": 0.0436, - "step": 55470 - }, - { - "epoch": 4.120005940888163, - "grad_norm": 1.6361944675445557, - "learning_rate": 5.279964354671024e-06, - "loss": 0.045, - "step": 55480 - }, - { - "epoch": 4.120748551908511, - "grad_norm": 0.6171565651893616, - "learning_rate": 5.275508688548938e-06, - "loss": 0.0487, - "step": 55490 - }, - { - "epoch": 4.121491162928858, - "grad_norm": 1.9398356676101685, - "learning_rate": 5.271053022426853e-06, - "loss": 0.0551, - "step": 55500 - }, - { - "epoch": 4.122233773949206, - "grad_norm": 0.6700782775878906, - "learning_rate": 5.266597356304767e-06, - "loss": 0.0695, - "step": 55510 - }, - { - "epoch": 4.122976384969553, - "grad_norm": 1.5354483127593994, - "learning_rate": 5.262141690182682e-06, - "loss": 0.0606, - "step": 55520 - }, - { - "epoch": 4.1237189959899005, - "grad_norm": 2.600698947906494, - "learning_rate": 5.257686024060597e-06, - "loss": 0.0734, - "step": 55530 - }, - { - "epoch": 4.124461607010248, - "grad_norm": 1.9559639692306519, - "learning_rate": 5.253230357938512e-06, - "loss": 0.0569, - "step": 55540 - }, - { - "epoch": 4.1252042180305954, - "grad_norm": 2.100451946258545, - "learning_rate": 5.2487746918164265e-06, - "loss": 0.0729, - "step": 55550 - }, - { - "epoch": 4.125946829050943, - "grad_norm": 2.451205253601074, - "learning_rate": 5.2443190256943415e-06, - "loss": 0.0457, - "step": 55560 - }, - { - "epoch": 4.12668944007129, - "grad_norm": 1.9854660034179688, - "learning_rate": 5.239863359572256e-06, - "loss": 0.0379, - "step": 55570 - }, - { - "epoch": 4.127432051091638, - "grad_norm": 1.1762291193008423, - "learning_rate": 5.235407693450171e-06, - "loss": 0.0425, - "step": 55580 - }, - { - "epoch": 4.128174662111986, - "grad_norm": 1.3475638628005981, - "learning_rate": 5.230952027328086e-06, - "loss": 0.0654, - "step": 55590 - }, - { - "epoch": 4.128917273132333, - "grad_norm": 1.0399333238601685, - "learning_rate": 5.226496361206001e-06, - "loss": 0.0595, - "step": 55600 - }, - { - "epoch": 4.129659884152681, - "grad_norm": 0.3056696355342865, - "learning_rate": 5.222040695083916e-06, - "loss": 0.0424, - "step": 55610 - }, - { - "epoch": 4.130402495173028, - "grad_norm": 2.0627856254577637, - "learning_rate": 5.21758502896183e-06, - "loss": 0.0635, - "step": 55620 - }, - { - "epoch": 4.131145106193376, - "grad_norm": 2.32353138923645, - "learning_rate": 5.213129362839745e-06, - "loss": 0.0512, - "step": 55630 - }, - { - "epoch": 4.131887717213724, - "grad_norm": 1.2325079441070557, - "learning_rate": 5.208673696717659e-06, - "loss": 0.027, - "step": 55640 - }, - { - "epoch": 4.132630328234071, - "grad_norm": 1.5056791305541992, - "learning_rate": 5.204218030595575e-06, - "loss": 0.0549, - "step": 55650 - }, - { - "epoch": 4.133372939254419, - "grad_norm": 2.7190983295440674, - "learning_rate": 5.199762364473489e-06, - "loss": 0.0631, - "step": 55660 - }, - { - "epoch": 4.134115550274766, - "grad_norm": 1.026847243309021, - "learning_rate": 5.195306698351404e-06, - "loss": 0.0541, - "step": 55670 - }, - { - "epoch": 4.134858161295114, - "grad_norm": 1.6611924171447754, - "learning_rate": 5.190851032229318e-06, - "loss": 0.0663, - "step": 55680 - }, - { - "epoch": 4.1356007723154615, - "grad_norm": 0.3207695186138153, - "learning_rate": 5.186395366107233e-06, - "loss": 0.0528, - "step": 55690 - }, - { - "epoch": 4.1363433833358085, - "grad_norm": 1.145107388496399, - "learning_rate": 5.181939699985148e-06, - "loss": 0.0617, - "step": 55700 - }, - { - "epoch": 4.137085994356156, - "grad_norm": 2.4018893241882324, - "learning_rate": 5.177484033863063e-06, - "loss": 0.092, - "step": 55710 - }, - { - "epoch": 4.137828605376503, - "grad_norm": 0.642469048500061, - "learning_rate": 5.173028367740977e-06, - "loss": 0.0801, - "step": 55720 - }, - { - "epoch": 4.138571216396851, - "grad_norm": 0.8119722008705139, - "learning_rate": 5.168572701618892e-06, - "loss": 0.0502, - "step": 55730 - }, - { - "epoch": 4.139313827417199, - "grad_norm": 0.8802699446678162, - "learning_rate": 5.164117035496807e-06, - "loss": 0.058, - "step": 55740 - }, - { - "epoch": 4.140056438437546, - "grad_norm": 2.3792264461517334, - "learning_rate": 5.159661369374721e-06, - "loss": 0.0601, - "step": 55750 - }, - { - "epoch": 4.140799049457894, - "grad_norm": 0.80853670835495, - "learning_rate": 5.155205703252637e-06, - "loss": 0.0554, - "step": 55760 - }, - { - "epoch": 4.141541660478241, - "grad_norm": 1.0470110177993774, - "learning_rate": 5.150750037130551e-06, - "loss": 0.0569, - "step": 55770 - }, - { - "epoch": 4.142284271498589, - "grad_norm": 3.2647767066955566, - "learning_rate": 5.146294371008466e-06, - "loss": 0.0529, - "step": 55780 - }, - { - "epoch": 4.143026882518937, - "grad_norm": 1.5302366018295288, - "learning_rate": 5.1418387048863804e-06, - "loss": 0.0634, - "step": 55790 - }, - { - "epoch": 4.143769493539284, - "grad_norm": 3.8410422801971436, - "learning_rate": 5.1373830387642954e-06, - "loss": 0.0607, - "step": 55800 - }, - { - "epoch": 4.144512104559632, - "grad_norm": 1.0163367986679077, - "learning_rate": 5.1329273726422096e-06, - "loss": 0.0323, - "step": 55810 - }, - { - "epoch": 4.145254715579979, - "grad_norm": 1.0663138628005981, - "learning_rate": 5.128471706520125e-06, - "loss": 0.0599, - "step": 55820 - }, - { - "epoch": 4.145997326600327, - "grad_norm": 1.1490668058395386, - "learning_rate": 5.1240160403980396e-06, - "loss": 0.0542, - "step": 55830 - }, - { - "epoch": 4.146739937620675, - "grad_norm": 2.020784616470337, - "learning_rate": 5.1195603742759546e-06, - "loss": 0.0645, - "step": 55840 - }, - { - "epoch": 4.147482548641022, - "grad_norm": 0.6550213098526001, - "learning_rate": 5.115104708153869e-06, - "loss": 0.0588, - "step": 55850 - }, - { - "epoch": 4.1482251596613695, - "grad_norm": 1.2832306623458862, - "learning_rate": 5.110649042031784e-06, - "loss": 0.0504, - "step": 55860 - }, - { - "epoch": 4.1489677706817165, - "grad_norm": 0.4903786778450012, - "learning_rate": 5.106193375909699e-06, - "loss": 0.0826, - "step": 55870 - }, - { - "epoch": 4.149710381702064, - "grad_norm": 4.058075904846191, - "learning_rate": 5.101737709787614e-06, - "loss": 0.0807, - "step": 55880 - }, - { - "epoch": 4.150452992722412, - "grad_norm": 0.9241071939468384, - "learning_rate": 5.097282043665529e-06, - "loss": 0.0651, - "step": 55890 - }, - { - "epoch": 4.151195603742759, - "grad_norm": 0.6750563979148865, - "learning_rate": 5.092826377543443e-06, - "loss": 0.049, - "step": 55900 - }, - { - "epoch": 4.151938214763107, - "grad_norm": 2.1607372760772705, - "learning_rate": 5.088370711421358e-06, - "loss": 0.092, - "step": 55910 - }, - { - "epoch": 4.152680825783454, - "grad_norm": 0.9965745210647583, - "learning_rate": 5.083915045299272e-06, - "loss": 0.0498, - "step": 55920 - }, - { - "epoch": 4.153423436803802, - "grad_norm": 2.7033541202545166, - "learning_rate": 5.079459379177187e-06, - "loss": 0.0601, - "step": 55930 - }, - { - "epoch": 4.15416604782415, - "grad_norm": 0.4900580644607544, - "learning_rate": 5.075003713055102e-06, - "loss": 0.0497, - "step": 55940 - }, - { - "epoch": 4.154908658844497, - "grad_norm": 2.328214406967163, - "learning_rate": 5.070548046933017e-06, - "loss": 0.047, - "step": 55950 - }, - { - "epoch": 4.155651269864845, - "grad_norm": 1.3433852195739746, - "learning_rate": 5.066092380810931e-06, - "loss": 0.0311, - "step": 55960 - }, - { - "epoch": 4.156393880885192, - "grad_norm": 2.1777939796447754, - "learning_rate": 5.061636714688846e-06, - "loss": 0.0605, - "step": 55970 - }, - { - "epoch": 4.15713649190554, - "grad_norm": 0.2275165617465973, - "learning_rate": 5.05718104856676e-06, - "loss": 0.047, - "step": 55980 - }, - { - "epoch": 4.157879102925888, - "grad_norm": 0.16543416678905487, - "learning_rate": 5.052725382444675e-06, - "loss": 0.0714, - "step": 55990 - }, - { - "epoch": 4.158621713946235, - "grad_norm": 1.2253612279891968, - "learning_rate": 5.048269716322591e-06, - "loss": 0.0605, - "step": 56000 - }, - { - "epoch": 4.159364324966583, - "grad_norm": 2.219952344894409, - "learning_rate": 5.043814050200505e-06, - "loss": 0.0681, - "step": 56010 - }, - { - "epoch": 4.16010693598693, - "grad_norm": 1.6367051601409912, - "learning_rate": 5.03935838407842e-06, - "loss": 0.038, - "step": 56020 - }, - { - "epoch": 4.1608495470072775, - "grad_norm": 1.207080602645874, - "learning_rate": 5.034902717956334e-06, - "loss": 0.0787, - "step": 56030 - }, - { - "epoch": 4.161592158027625, - "grad_norm": 2.1484475135803223, - "learning_rate": 5.030447051834249e-06, - "loss": 0.0528, - "step": 56040 - }, - { - "epoch": 4.162334769047972, - "grad_norm": 0.8749150633811951, - "learning_rate": 5.025991385712164e-06, - "loss": 0.0848, - "step": 56050 - }, - { - "epoch": 4.16307738006832, - "grad_norm": 0.868646502494812, - "learning_rate": 5.021535719590079e-06, - "loss": 0.0662, - "step": 56060 - }, - { - "epoch": 4.163819991088668, - "grad_norm": 0.94334876537323, - "learning_rate": 5.0170800534679935e-06, - "loss": 0.058, - "step": 56070 - }, - { - "epoch": 4.164562602109015, - "grad_norm": 0.9084467887878418, - "learning_rate": 5.0126243873459085e-06, - "loss": 0.0357, - "step": 56080 - }, - { - "epoch": 4.165305213129363, - "grad_norm": 1.5573078393936157, - "learning_rate": 5.008168721223823e-06, - "loss": 0.0886, - "step": 56090 - }, - { - "epoch": 4.16604782414971, - "grad_norm": 2.178457260131836, - "learning_rate": 5.003713055101738e-06, - "loss": 0.0442, - "step": 56100 - }, - { - "epoch": 4.166790435170058, - "grad_norm": 1.7466106414794922, - "learning_rate": 4.999257388979653e-06, - "loss": 0.051, - "step": 56110 - }, - { - "epoch": 4.167533046190406, - "grad_norm": 1.0477418899536133, - "learning_rate": 4.994801722857568e-06, - "loss": 0.0392, - "step": 56120 - }, - { - "epoch": 4.168275657210753, - "grad_norm": 2.2968533039093018, - "learning_rate": 4.990346056735482e-06, - "loss": 0.0433, - "step": 56130 - }, - { - "epoch": 4.169018268231101, - "grad_norm": 0.44317713379859924, - "learning_rate": 4.985890390613397e-06, - "loss": 0.0537, - "step": 56140 - }, - { - "epoch": 4.169760879251448, - "grad_norm": 1.9365060329437256, - "learning_rate": 4.981434724491312e-06, - "loss": 0.0842, - "step": 56150 - }, - { - "epoch": 4.170503490271796, - "grad_norm": 0.4268725514411926, - "learning_rate": 4.976979058369226e-06, - "loss": 0.064, - "step": 56160 - }, - { - "epoch": 4.171246101292144, - "grad_norm": 0.8040658235549927, - "learning_rate": 4.972523392247142e-06, - "loss": 0.0691, - "step": 56170 - }, - { - "epoch": 4.171988712312491, - "grad_norm": 1.0074480772018433, - "learning_rate": 4.968067726125056e-06, - "loss": 0.0687, - "step": 56180 - }, - { - "epoch": 4.1727313233328385, - "grad_norm": 0.6365683078765869, - "learning_rate": 4.963612060002971e-06, - "loss": 0.0461, - "step": 56190 - }, - { - "epoch": 4.1734739343531855, - "grad_norm": 2.264573812484741, - "learning_rate": 4.959156393880885e-06, - "loss": 0.0618, - "step": 56200 - }, - { - "epoch": 4.174216545373533, - "grad_norm": 1.5489435195922852, - "learning_rate": 4.9547007277588e-06, - "loss": 0.0888, - "step": 56210 - }, - { - "epoch": 4.174959156393881, - "grad_norm": 0.8037591576576233, - "learning_rate": 4.950245061636714e-06, - "loss": 0.0793, - "step": 56220 - }, - { - "epoch": 4.175701767414228, - "grad_norm": 1.1610863208770752, - "learning_rate": 4.94578939551463e-06, - "loss": 0.0623, - "step": 56230 - }, - { - "epoch": 4.176444378434576, - "grad_norm": 1.3831819295883179, - "learning_rate": 4.941333729392544e-06, - "loss": 0.059, - "step": 56240 - }, - { - "epoch": 4.177186989454923, - "grad_norm": 0.5763561725616455, - "learning_rate": 4.936878063270459e-06, - "loss": 0.0341, - "step": 56250 - }, - { - "epoch": 4.177929600475271, - "grad_norm": 1.123581051826477, - "learning_rate": 4.932422397148373e-06, - "loss": 0.0658, - "step": 56260 - }, - { - "epoch": 4.178672211495619, - "grad_norm": 1.3264278173446655, - "learning_rate": 4.927966731026288e-06, - "loss": 0.0623, - "step": 56270 - }, - { - "epoch": 4.179414822515966, - "grad_norm": 1.4704315662384033, - "learning_rate": 4.923511064904203e-06, - "loss": 0.0734, - "step": 56280 - }, - { - "epoch": 4.180157433536314, - "grad_norm": 1.4368115663528442, - "learning_rate": 4.919055398782118e-06, - "loss": 0.0561, - "step": 56290 - }, - { - "epoch": 4.180900044556661, - "grad_norm": 0.8675275444984436, - "learning_rate": 4.914599732660033e-06, - "loss": 0.08, - "step": 56300 - }, - { - "epoch": 4.181642655577009, - "grad_norm": 1.2973276376724243, - "learning_rate": 4.9101440665379474e-06, - "loss": 0.0725, - "step": 56310 - }, - { - "epoch": 4.182385266597357, - "grad_norm": 0.6267523169517517, - "learning_rate": 4.905688400415862e-06, - "loss": 0.0461, - "step": 56320 - }, - { - "epoch": 4.183127877617704, - "grad_norm": 2.129601001739502, - "learning_rate": 4.9012327342937766e-06, - "loss": 0.0556, - "step": 56330 - }, - { - "epoch": 4.1838704886380516, - "grad_norm": 2.3097314834594727, - "learning_rate": 4.8967770681716916e-06, - "loss": 0.0503, - "step": 56340 - }, - { - "epoch": 4.184613099658399, - "grad_norm": 1.3276792764663696, - "learning_rate": 4.8923214020496066e-06, - "loss": 0.076, - "step": 56350 - }, - { - "epoch": 4.1853557106787465, - "grad_norm": 0.35334932804107666, - "learning_rate": 4.8878657359275216e-06, - "loss": 0.0596, - "step": 56360 - }, - { - "epoch": 4.186098321699094, - "grad_norm": 2.6858012676239014, - "learning_rate": 4.883410069805436e-06, - "loss": 0.0484, - "step": 56370 - }, - { - "epoch": 4.186840932719441, - "grad_norm": 2.2050089836120605, - "learning_rate": 4.878954403683351e-06, - "loss": 0.067, - "step": 56380 - }, - { - "epoch": 4.187583543739789, - "grad_norm": 0.7625178694725037, - "learning_rate": 4.874498737561265e-06, - "loss": 0.0654, - "step": 56390 - }, - { - "epoch": 4.188326154760136, - "grad_norm": 1.7497589588165283, - "learning_rate": 4.870043071439181e-06, - "loss": 0.0781, - "step": 56400 - }, - { - "epoch": 4.189068765780484, - "grad_norm": 0.6290327906608582, - "learning_rate": 4.865587405317096e-06, - "loss": 0.045, - "step": 56410 - }, - { - "epoch": 4.189811376800832, - "grad_norm": 2.0656166076660156, - "learning_rate": 4.86113173919501e-06, - "loss": 0.068, - "step": 56420 - }, - { - "epoch": 4.190553987821179, - "grad_norm": 1.3763740062713623, - "learning_rate": 4.856676073072925e-06, - "loss": 0.0746, - "step": 56430 - }, - { - "epoch": 4.191296598841527, - "grad_norm": 1.222730278968811, - "learning_rate": 4.852220406950839e-06, - "loss": 0.0773, - "step": 56440 - }, - { - "epoch": 4.192039209861874, - "grad_norm": 0.5212209820747375, - "learning_rate": 4.847764740828754e-06, - "loss": 0.0392, - "step": 56450 - }, - { - "epoch": 4.192781820882222, - "grad_norm": 0.5401204824447632, - "learning_rate": 4.843309074706669e-06, - "loss": 0.0801, - "step": 56460 - }, - { - "epoch": 4.19352443190257, - "grad_norm": 0.9797640442848206, - "learning_rate": 4.838853408584584e-06, - "loss": 0.0541, - "step": 56470 - }, - { - "epoch": 4.194267042922917, - "grad_norm": 0.41546717286109924, - "learning_rate": 4.834397742462498e-06, - "loss": 0.0495, - "step": 56480 - }, - { - "epoch": 4.195009653943265, - "grad_norm": 1.290562391281128, - "learning_rate": 4.829942076340413e-06, - "loss": 0.0371, - "step": 56490 - }, - { - "epoch": 4.195752264963612, - "grad_norm": 0.5906802415847778, - "learning_rate": 4.825486410218327e-06, - "loss": 0.064, - "step": 56500 - }, - { - "epoch": 4.1964948759839595, - "grad_norm": 1.46896493434906, - "learning_rate": 4.821030744096242e-06, - "loss": 0.0478, - "step": 56510 - }, - { - "epoch": 4.197237487004307, - "grad_norm": 1.2562772035598755, - "learning_rate": 4.816575077974157e-06, - "loss": 0.0531, - "step": 56520 - }, - { - "epoch": 4.197980098024654, - "grad_norm": 1.5765583515167236, - "learning_rate": 4.812119411852072e-06, - "loss": 0.0586, - "step": 56530 - }, - { - "epoch": 4.198722709045002, - "grad_norm": 1.9203531742095947, - "learning_rate": 4.807663745729987e-06, - "loss": 0.0759, - "step": 56540 - }, - { - "epoch": 4.19946532006535, - "grad_norm": 0.2275390923023224, - "learning_rate": 4.803208079607901e-06, - "loss": 0.0347, - "step": 56550 - }, - { - "epoch": 4.200207931085697, - "grad_norm": 0.8825653791427612, - "learning_rate": 4.798752413485816e-06, - "loss": 0.0288, - "step": 56560 - }, - { - "epoch": 4.200950542106045, - "grad_norm": 0.9257974624633789, - "learning_rate": 4.7942967473637305e-06, - "loss": 0.0421, - "step": 56570 - }, - { - "epoch": 4.201693153126392, - "grad_norm": 0.9080449938774109, - "learning_rate": 4.789841081241646e-06, - "loss": 0.0569, - "step": 56580 - }, - { - "epoch": 4.20243576414674, - "grad_norm": 0.7005447149276733, - "learning_rate": 4.7853854151195605e-06, - "loss": 0.1045, - "step": 56590 - }, - { - "epoch": 4.203178375167088, - "grad_norm": 0.9929253458976746, - "learning_rate": 4.7809297489974755e-06, - "loss": 0.053, - "step": 56600 - }, - { - "epoch": 4.203920986187435, - "grad_norm": 2.272272825241089, - "learning_rate": 4.77647408287539e-06, - "loss": 0.076, - "step": 56610 - }, - { - "epoch": 4.204663597207783, - "grad_norm": 1.6657465696334839, - "learning_rate": 4.772018416753305e-06, - "loss": 0.0705, - "step": 56620 - }, - { - "epoch": 4.20540620822813, - "grad_norm": 4.0360589027404785, - "learning_rate": 4.767562750631219e-06, - "loss": 0.0637, - "step": 56630 - }, - { - "epoch": 4.206148819248478, - "grad_norm": 1.8561382293701172, - "learning_rate": 4.763107084509135e-06, - "loss": 0.0368, - "step": 56640 - }, - { - "epoch": 4.206891430268826, - "grad_norm": 0.5723518133163452, - "learning_rate": 4.758651418387049e-06, - "loss": 0.0841, - "step": 56650 - }, - { - "epoch": 4.207634041289173, - "grad_norm": 0.5894990563392639, - "learning_rate": 4.754195752264964e-06, - "loss": 0.0622, - "step": 56660 - }, - { - "epoch": 4.2083766523095205, - "grad_norm": 0.48831412196159363, - "learning_rate": 4.749740086142879e-06, - "loss": 0.1112, - "step": 56670 - }, - { - "epoch": 4.2091192633298675, - "grad_norm": 1.655131220817566, - "learning_rate": 4.745284420020793e-06, - "loss": 0.0583, - "step": 56680 - }, - { - "epoch": 4.209861874350215, - "grad_norm": 0.4988187849521637, - "learning_rate": 4.740828753898708e-06, - "loss": 0.0436, - "step": 56690 - }, - { - "epoch": 4.210604485370563, - "grad_norm": 6.94892692565918, - "learning_rate": 4.736373087776623e-06, - "loss": 0.0508, - "step": 56700 - }, - { - "epoch": 4.21134709639091, - "grad_norm": 0.6588563323020935, - "learning_rate": 4.731917421654538e-06, - "loss": 0.0307, - "step": 56710 - }, - { - "epoch": 4.212089707411258, - "grad_norm": 1.032076358795166, - "learning_rate": 4.727461755532452e-06, - "loss": 0.0532, - "step": 56720 - }, - { - "epoch": 4.212832318431605, - "grad_norm": 0.7659674882888794, - "learning_rate": 4.723006089410367e-06, - "loss": 0.0798, - "step": 56730 - }, - { - "epoch": 4.213574929451953, - "grad_norm": 1.151066780090332, - "learning_rate": 4.718550423288281e-06, - "loss": 0.0603, - "step": 56740 - }, - { - "epoch": 4.214317540472301, - "grad_norm": 3.9358067512512207, - "learning_rate": 4.714094757166197e-06, - "loss": 0.0505, - "step": 56750 - }, - { - "epoch": 4.215060151492648, - "grad_norm": 0.6404879093170166, - "learning_rate": 4.709639091044111e-06, - "loss": 0.0577, - "step": 56760 - }, - { - "epoch": 4.215802762512996, - "grad_norm": 2.104989767074585, - "learning_rate": 4.705183424922026e-06, - "loss": 0.0566, - "step": 56770 - }, - { - "epoch": 4.216545373533343, - "grad_norm": 0.7882208228111267, - "learning_rate": 4.70072775879994e-06, - "loss": 0.058, - "step": 56780 - }, - { - "epoch": 4.217287984553691, - "grad_norm": 1.2608188390731812, - "learning_rate": 4.696272092677855e-06, - "loss": 0.0692, - "step": 56790 - }, - { - "epoch": 4.218030595574039, - "grad_norm": 1.273901343345642, - "learning_rate": 4.69181642655577e-06, - "loss": 0.067, - "step": 56800 - }, - { - "epoch": 4.218773206594386, - "grad_norm": 2.1798079013824463, - "learning_rate": 4.687360760433685e-06, - "loss": 0.0807, - "step": 56810 - }, - { - "epoch": 4.219515817614734, - "grad_norm": 1.1464588642120361, - "learning_rate": 4.6829050943116e-06, - "loss": 0.0356, - "step": 56820 - }, - { - "epoch": 4.220258428635081, - "grad_norm": 2.1322357654571533, - "learning_rate": 4.678449428189514e-06, - "loss": 0.0811, - "step": 56830 - }, - { - "epoch": 4.2210010396554285, - "grad_norm": 1.9702597856521606, - "learning_rate": 4.673993762067429e-06, - "loss": 0.0382, - "step": 56840 - }, - { - "epoch": 4.221743650675776, - "grad_norm": 1.0465569496154785, - "learning_rate": 4.6695380959453436e-06, - "loss": 0.0481, - "step": 56850 - }, - { - "epoch": 4.222486261696123, - "grad_norm": 1.8960071802139282, - "learning_rate": 4.6650824298232586e-06, - "loss": 0.0823, - "step": 56860 - }, - { - "epoch": 4.223228872716471, - "grad_norm": 1.5519205331802368, - "learning_rate": 4.6606267637011736e-06, - "loss": 0.0807, - "step": 56870 - }, - { - "epoch": 4.223971483736818, - "grad_norm": 2.4255125522613525, - "learning_rate": 4.6561710975790885e-06, - "loss": 0.0666, - "step": 56880 - }, - { - "epoch": 4.224714094757166, - "grad_norm": 1.1080626249313354, - "learning_rate": 4.651715431457003e-06, - "loss": 0.081, - "step": 56890 - }, - { - "epoch": 4.225456705777514, - "grad_norm": 2.251464605331421, - "learning_rate": 4.647259765334918e-06, - "loss": 0.0743, - "step": 56900 - }, - { - "epoch": 4.226199316797861, - "grad_norm": 0.8593553900718689, - "learning_rate": 4.642804099212832e-06, - "loss": 0.0523, - "step": 56910 - }, - { - "epoch": 4.226941927818209, - "grad_norm": 0.543804407119751, - "learning_rate": 4.638348433090747e-06, - "loss": 0.0748, - "step": 56920 - }, - { - "epoch": 4.227684538838556, - "grad_norm": 2.7038323879241943, - "learning_rate": 4.633892766968663e-06, - "loss": 0.059, - "step": 56930 - }, - { - "epoch": 4.228427149858904, - "grad_norm": 3.215402364730835, - "learning_rate": 4.629437100846577e-06, - "loss": 0.0626, - "step": 56940 - }, - { - "epoch": 4.229169760879252, - "grad_norm": 0.7325242161750793, - "learning_rate": 4.624981434724492e-06, - "loss": 0.055, - "step": 56950 - }, - { - "epoch": 4.229912371899599, - "grad_norm": 1.3402073383331299, - "learning_rate": 4.620525768602406e-06, - "loss": 0.0661, - "step": 56960 - }, - { - "epoch": 4.230654982919947, - "grad_norm": 2.4132492542266846, - "learning_rate": 4.616070102480321e-06, - "loss": 0.0618, - "step": 56970 - }, - { - "epoch": 4.231397593940294, - "grad_norm": 2.361948251724243, - "learning_rate": 4.611614436358235e-06, - "loss": 0.029, - "step": 56980 - }, - { - "epoch": 4.232140204960642, - "grad_norm": 1.8688277006149292, - "learning_rate": 4.607158770236151e-06, - "loss": 0.0675, - "step": 56990 - }, - { - "epoch": 4.2328828159809895, - "grad_norm": 1.2990567684173584, - "learning_rate": 4.602703104114065e-06, - "loss": 0.0553, - "step": 57000 - }, - { - "epoch": 4.2336254270013365, - "grad_norm": 1.1148401498794556, - "learning_rate": 4.59824743799198e-06, - "loss": 0.0565, - "step": 57010 - }, - { - "epoch": 4.234368038021684, - "grad_norm": 1.900498867034912, - "learning_rate": 4.593791771869894e-06, - "loss": 0.0724, - "step": 57020 - }, - { - "epoch": 4.235110649042031, - "grad_norm": 1.3050068616867065, - "learning_rate": 4.589336105747809e-06, - "loss": 0.059, - "step": 57030 - }, - { - "epoch": 4.235853260062379, - "grad_norm": 1.2359297275543213, - "learning_rate": 4.584880439625723e-06, - "loss": 0.0581, - "step": 57040 - }, - { - "epoch": 4.236595871082727, - "grad_norm": 0.6790696382522583, - "learning_rate": 4.580424773503639e-06, - "loss": 0.0591, - "step": 57050 - }, - { - "epoch": 4.237338482103074, - "grad_norm": 1.258554220199585, - "learning_rate": 4.575969107381554e-06, - "loss": 0.0604, - "step": 57060 - }, - { - "epoch": 4.238081093123422, - "grad_norm": 0.6367031335830688, - "learning_rate": 4.571513441259468e-06, - "loss": 0.0627, - "step": 57070 - }, - { - "epoch": 4.238823704143769, - "grad_norm": 1.3916947841644287, - "learning_rate": 4.567057775137383e-06, - "loss": 0.0727, - "step": 57080 - }, - { - "epoch": 4.239566315164117, - "grad_norm": 1.1736927032470703, - "learning_rate": 4.5626021090152975e-06, - "loss": 0.0492, - "step": 57090 - }, - { - "epoch": 4.240308926184465, - "grad_norm": 2.185208320617676, - "learning_rate": 4.558146442893213e-06, - "loss": 0.0618, - "step": 57100 - }, - { - "epoch": 4.241051537204812, - "grad_norm": 0.6165786385536194, - "learning_rate": 4.5536907767711275e-06, - "loss": 0.061, - "step": 57110 - }, - { - "epoch": 4.24179414822516, - "grad_norm": 0.5800535678863525, - "learning_rate": 4.5492351106490425e-06, - "loss": 0.0724, - "step": 57120 - }, - { - "epoch": 4.242536759245507, - "grad_norm": 0.8701562881469727, - "learning_rate": 4.544779444526957e-06, - "loss": 0.0333, - "step": 57130 - }, - { - "epoch": 4.243279370265855, - "grad_norm": 0.8652254343032837, - "learning_rate": 4.540323778404872e-06, - "loss": 0.0258, - "step": 57140 - }, - { - "epoch": 4.244021981286203, - "grad_norm": 3.373532772064209, - "learning_rate": 4.535868112282786e-06, - "loss": 0.066, - "step": 57150 - }, - { - "epoch": 4.24476459230655, - "grad_norm": 0.9906690120697021, - "learning_rate": 4.531412446160702e-06, - "loss": 0.0716, - "step": 57160 - }, - { - "epoch": 4.2455072033268975, - "grad_norm": 2.1584084033966064, - "learning_rate": 4.526956780038616e-06, - "loss": 0.0605, - "step": 57170 - }, - { - "epoch": 4.2462498143472445, - "grad_norm": 1.1581281423568726, - "learning_rate": 4.522501113916531e-06, - "loss": 0.0437, - "step": 57180 - }, - { - "epoch": 4.246992425367592, - "grad_norm": 0.5750119686126709, - "learning_rate": 4.518045447794446e-06, - "loss": 0.0542, - "step": 57190 - }, - { - "epoch": 4.24773503638794, - "grad_norm": 0.9591627717018127, - "learning_rate": 4.51358978167236e-06, - "loss": 0.0591, - "step": 57200 - }, - { - "epoch": 4.248477647408287, - "grad_norm": 1.943953037261963, - "learning_rate": 4.509134115550275e-06, - "loss": 0.0455, - "step": 57210 - }, - { - "epoch": 4.249220258428635, - "grad_norm": 1.54121732711792, - "learning_rate": 4.50467844942819e-06, - "loss": 0.052, - "step": 57220 - }, - { - "epoch": 4.249962869448983, - "grad_norm": 1.1736338138580322, - "learning_rate": 4.500222783306105e-06, - "loss": 0.0741, - "step": 57230 - }, - { - "epoch": 4.25070548046933, - "grad_norm": 1.9899519681930542, - "learning_rate": 4.495767117184019e-06, - "loss": 0.0944, - "step": 57240 - }, - { - "epoch": 4.251448091489678, - "grad_norm": 1.6793705224990845, - "learning_rate": 4.491311451061934e-06, - "loss": 0.0749, - "step": 57250 - }, - { - "epoch": 4.252190702510025, - "grad_norm": 2.0780696868896484, - "learning_rate": 4.486855784939848e-06, - "loss": 0.0529, - "step": 57260 - }, - { - "epoch": 4.252933313530373, - "grad_norm": 0.6456514000892639, - "learning_rate": 4.482400118817763e-06, - "loss": 0.0516, - "step": 57270 - }, - { - "epoch": 4.253675924550721, - "grad_norm": 2.047966241836548, - "learning_rate": 4.477944452695678e-06, - "loss": 0.0564, - "step": 57280 - }, - { - "epoch": 4.254418535571068, - "grad_norm": 1.8581652641296387, - "learning_rate": 4.473488786573593e-06, - "loss": 0.0645, - "step": 57290 - }, - { - "epoch": 4.255161146591416, - "grad_norm": 0.8233331441879272, - "learning_rate": 4.469033120451507e-06, - "loss": 0.0339, - "step": 57300 - }, - { - "epoch": 4.255903757611763, - "grad_norm": 1.5012775659561157, - "learning_rate": 4.464577454329422e-06, - "loss": 0.0678, - "step": 57310 - }, - { - "epoch": 4.2566463686321105, - "grad_norm": 1.7684147357940674, - "learning_rate": 4.460121788207337e-06, - "loss": 0.0655, - "step": 57320 - }, - { - "epoch": 4.257388979652458, - "grad_norm": 1.061919927597046, - "learning_rate": 4.4556661220852514e-06, - "loss": 0.0663, - "step": 57330 - }, - { - "epoch": 4.2581315906728054, - "grad_norm": 3.2770798206329346, - "learning_rate": 4.451210455963167e-06, - "loss": 0.0863, - "step": 57340 - }, - { - "epoch": 4.258874201693153, - "grad_norm": 0.6535485982894897, - "learning_rate": 4.446754789841081e-06, - "loss": 0.0603, - "step": 57350 - }, - { - "epoch": 4.2596168127135, - "grad_norm": 0.8265778422355652, - "learning_rate": 4.442299123718996e-06, - "loss": 0.0573, - "step": 57360 - }, - { - "epoch": 4.260359423733848, - "grad_norm": 0.3725847005844116, - "learning_rate": 4.4378434575969106e-06, - "loss": 0.0543, - "step": 57370 - }, - { - "epoch": 4.261102034754196, - "grad_norm": 1.1065174341201782, - "learning_rate": 4.4333877914748256e-06, - "loss": 0.0578, - "step": 57380 - }, - { - "epoch": 4.261844645774543, - "grad_norm": 3.13139009475708, - "learning_rate": 4.42893212535274e-06, - "loss": 0.0632, - "step": 57390 - }, - { - "epoch": 4.262587256794891, - "grad_norm": 1.8046602010726929, - "learning_rate": 4.4244764592306555e-06, - "loss": 0.0508, - "step": 57400 - }, - { - "epoch": 4.263329867815238, - "grad_norm": 0.9874463677406311, - "learning_rate": 4.42002079310857e-06, - "loss": 0.0546, - "step": 57410 - }, - { - "epoch": 4.264072478835586, - "grad_norm": 2.401059865951538, - "learning_rate": 4.415565126986485e-06, - "loss": 0.0502, - "step": 57420 - }, - { - "epoch": 4.264815089855934, - "grad_norm": 1.2862542867660522, - "learning_rate": 4.411109460864399e-06, - "loss": 0.0722, - "step": 57430 - }, - { - "epoch": 4.265557700876281, - "grad_norm": 0.6845186948776245, - "learning_rate": 4.406653794742314e-06, - "loss": 0.053, - "step": 57440 - }, - { - "epoch": 4.266300311896629, - "grad_norm": 0.5840150117874146, - "learning_rate": 4.40219812862023e-06, - "loss": 0.0395, - "step": 57450 - }, - { - "epoch": 4.267042922916976, - "grad_norm": 0.8561588525772095, - "learning_rate": 4.397742462498144e-06, - "loss": 0.076, - "step": 57460 - }, - { - "epoch": 4.267785533937324, - "grad_norm": 1.2348568439483643, - "learning_rate": 4.393286796376059e-06, - "loss": 0.0679, - "step": 57470 - }, - { - "epoch": 4.2685281449576715, - "grad_norm": 1.4345803260803223, - "learning_rate": 4.388831130253973e-06, - "loss": 0.0542, - "step": 57480 - }, - { - "epoch": 4.2692707559780185, - "grad_norm": 0.30238887667655945, - "learning_rate": 4.384375464131888e-06, - "loss": 0.0568, - "step": 57490 - }, - { - "epoch": 4.270013366998366, - "grad_norm": 1.476379156112671, - "learning_rate": 4.379919798009802e-06, - "loss": 0.0448, - "step": 57500 - }, - { - "epoch": 4.270755978018713, - "grad_norm": 0.6961525678634644, - "learning_rate": 4.375464131887718e-06, - "loss": 0.053, - "step": 57510 - }, - { - "epoch": 4.271498589039061, - "grad_norm": 1.5048184394836426, - "learning_rate": 4.371008465765632e-06, - "loss": 0.0572, - "step": 57520 - }, - { - "epoch": 4.272241200059409, - "grad_norm": 1.902750849723816, - "learning_rate": 4.366552799643547e-06, - "loss": 0.0564, - "step": 57530 - }, - { - "epoch": 4.272983811079756, - "grad_norm": 2.073760747909546, - "learning_rate": 4.362097133521461e-06, - "loss": 0.052, - "step": 57540 - }, - { - "epoch": 4.273726422100104, - "grad_norm": 0.6217005252838135, - "learning_rate": 4.357641467399376e-06, - "loss": 0.0794, - "step": 57550 - }, - { - "epoch": 4.274469033120451, - "grad_norm": 1.285564661026001, - "learning_rate": 4.35318580127729e-06, - "loss": 0.0667, - "step": 57560 - }, - { - "epoch": 4.275211644140799, - "grad_norm": 0.4138168692588806, - "learning_rate": 4.348730135155206e-06, - "loss": 0.0688, - "step": 57570 - }, - { - "epoch": 4.275954255161147, - "grad_norm": 2.414457321166992, - "learning_rate": 4.344274469033121e-06, - "loss": 0.0496, - "step": 57580 - }, - { - "epoch": 4.276696866181494, - "grad_norm": 1.193533182144165, - "learning_rate": 4.339818802911035e-06, - "loss": 0.0693, - "step": 57590 - }, - { - "epoch": 4.277439477201842, - "grad_norm": 2.959575891494751, - "learning_rate": 4.33536313678895e-06, - "loss": 0.0625, - "step": 57600 - }, - { - "epoch": 4.278182088222189, - "grad_norm": 4.037361145019531, - "learning_rate": 4.3309074706668645e-06, - "loss": 0.0413, - "step": 57610 - }, - { - "epoch": 4.278924699242537, - "grad_norm": 2.123981475830078, - "learning_rate": 4.3264518045447795e-06, - "loss": 0.0575, - "step": 57620 - }, - { - "epoch": 4.279667310262885, - "grad_norm": 1.8399453163146973, - "learning_rate": 4.3219961384226945e-06, - "loss": 0.0643, - "step": 57630 - }, - { - "epoch": 4.280409921283232, - "grad_norm": 0.43379709124565125, - "learning_rate": 4.3175404723006095e-06, - "loss": 0.0514, - "step": 57640 - }, - { - "epoch": 4.2811525323035795, - "grad_norm": 0.5563368797302246, - "learning_rate": 4.313084806178524e-06, - "loss": 0.0698, - "step": 57650 - }, - { - "epoch": 4.2818951433239265, - "grad_norm": 4.087998867034912, - "learning_rate": 4.308629140056439e-06, - "loss": 0.0505, - "step": 57660 - }, - { - "epoch": 4.282637754344274, - "grad_norm": 3.5374698638916016, - "learning_rate": 4.304173473934353e-06, - "loss": 0.0892, - "step": 57670 - }, - { - "epoch": 4.283380365364622, - "grad_norm": 1.2814463376998901, - "learning_rate": 4.299717807812268e-06, - "loss": 0.0633, - "step": 57680 - }, - { - "epoch": 4.284122976384969, - "grad_norm": 0.6887046098709106, - "learning_rate": 4.295262141690183e-06, - "loss": 0.0323, - "step": 57690 - }, - { - "epoch": 4.284865587405317, - "grad_norm": 1.5045709609985352, - "learning_rate": 4.290806475568098e-06, - "loss": 0.0363, - "step": 57700 - }, - { - "epoch": 4.285608198425665, - "grad_norm": 0.7762113213539124, - "learning_rate": 4.286350809446012e-06, - "loss": 0.0575, - "step": 57710 - }, - { - "epoch": 4.286350809446012, - "grad_norm": 0.4139329195022583, - "learning_rate": 4.281895143323927e-06, - "loss": 0.0461, - "step": 57720 - }, - { - "epoch": 4.28709342046636, - "grad_norm": 0.7771281599998474, - "learning_rate": 4.277439477201842e-06, - "loss": 0.043, - "step": 57730 - }, - { - "epoch": 4.287836031486707, - "grad_norm": 1.1789294481277466, - "learning_rate": 4.272983811079756e-06, - "loss": 0.0563, - "step": 57740 - }, - { - "epoch": 4.288578642507055, - "grad_norm": 2.2451627254486084, - "learning_rate": 4.268528144957672e-06, - "loss": 0.0681, - "step": 57750 - }, - { - "epoch": 4.289321253527403, - "grad_norm": 0.40132951736450195, - "learning_rate": 4.264072478835586e-06, - "loss": 0.0559, - "step": 57760 - }, - { - "epoch": 4.29006386454775, - "grad_norm": 1.868285059928894, - "learning_rate": 4.259616812713501e-06, - "loss": 0.0517, - "step": 57770 - }, - { - "epoch": 4.290806475568098, - "grad_norm": 1.2447259426116943, - "learning_rate": 4.255161146591415e-06, - "loss": 0.0629, - "step": 57780 - }, - { - "epoch": 4.291549086588445, - "grad_norm": 0.3125772774219513, - "learning_rate": 4.25070548046933e-06, - "loss": 0.0589, - "step": 57790 - }, - { - "epoch": 4.292291697608793, - "grad_norm": 1.9337828159332275, - "learning_rate": 4.246249814347245e-06, - "loss": 0.0771, - "step": 57800 - }, - { - "epoch": 4.2930343086291405, - "grad_norm": 1.6286951303482056, - "learning_rate": 4.24179414822516e-06, - "loss": 0.0577, - "step": 57810 - }, - { - "epoch": 4.2937769196494875, - "grad_norm": 3.2661020755767822, - "learning_rate": 4.237338482103074e-06, - "loss": 0.0565, - "step": 57820 - }, - { - "epoch": 4.294519530669835, - "grad_norm": 2.473935604095459, - "learning_rate": 4.232882815980989e-06, - "loss": 0.0672, - "step": 57830 - }, - { - "epoch": 4.295262141690182, - "grad_norm": 1.5696542263031006, - "learning_rate": 4.2284271498589034e-06, - "loss": 0.0682, - "step": 57840 - }, - { - "epoch": 4.29600475271053, - "grad_norm": 0.4458481967449188, - "learning_rate": 4.223971483736818e-06, - "loss": 0.0444, - "step": 57850 - }, - { - "epoch": 4.296747363730878, - "grad_norm": 2.461646556854248, - "learning_rate": 4.219515817614734e-06, - "loss": 0.0548, - "step": 57860 - }, - { - "epoch": 4.297489974751225, - "grad_norm": 3.5632870197296143, - "learning_rate": 4.215060151492648e-06, - "loss": 0.0551, - "step": 57870 - }, - { - "epoch": 4.298232585771573, - "grad_norm": 0.38127386569976807, - "learning_rate": 4.210604485370563e-06, - "loss": 0.0746, - "step": 57880 - }, - { - "epoch": 4.29897519679192, - "grad_norm": 0.4123340845108032, - "learning_rate": 4.2061488192484775e-06, - "loss": 0.052, - "step": 57890 - }, - { - "epoch": 4.299717807812268, - "grad_norm": 1.4147988557815552, - "learning_rate": 4.2016931531263925e-06, - "loss": 0.0534, - "step": 57900 - }, - { - "epoch": 4.300460418832616, - "grad_norm": 1.1483192443847656, - "learning_rate": 4.197237487004307e-06, - "loss": 0.0762, - "step": 57910 - }, - { - "epoch": 4.301203029852963, - "grad_norm": 1.2446470260620117, - "learning_rate": 4.1927818208822225e-06, - "loss": 0.0827, - "step": 57920 - }, - { - "epoch": 4.301945640873311, - "grad_norm": 1.6271134614944458, - "learning_rate": 4.188326154760137e-06, - "loss": 0.0517, - "step": 57930 - }, - { - "epoch": 4.302688251893658, - "grad_norm": 0.6140291094779968, - "learning_rate": 4.183870488638052e-06, - "loss": 0.045, - "step": 57940 - }, - { - "epoch": 4.303430862914006, - "grad_norm": 2.260127067565918, - "learning_rate": 4.179414822515966e-06, - "loss": 0.0545, - "step": 57950 - }, - { - "epoch": 4.304173473934354, - "grad_norm": 1.3899246454238892, - "learning_rate": 4.174959156393881e-06, - "loss": 0.0625, - "step": 57960 - }, - { - "epoch": 4.304916084954701, - "grad_norm": 3.2012217044830322, - "learning_rate": 4.170503490271795e-06, - "loss": 0.0644, - "step": 57970 - }, - { - "epoch": 4.3056586959750485, - "grad_norm": 1.8873796463012695, - "learning_rate": 4.166047824149711e-06, - "loss": 0.0446, - "step": 57980 - }, - { - "epoch": 4.3064013069953955, - "grad_norm": 1.7632603645324707, - "learning_rate": 4.161592158027626e-06, - "loss": 0.058, - "step": 57990 - }, - { - "epoch": 4.307143918015743, - "grad_norm": 2.116173028945923, - "learning_rate": 4.15713649190554e-06, - "loss": 0.0564, - "step": 58000 - }, - { - "epoch": 4.307886529036091, - "grad_norm": 1.2623026371002197, - "learning_rate": 4.152680825783455e-06, - "loss": 0.0515, - "step": 58010 - }, - { - "epoch": 4.308629140056438, - "grad_norm": 0.5496644377708435, - "learning_rate": 4.148225159661369e-06, - "loss": 0.0891, - "step": 58020 - }, - { - "epoch": 4.309371751076786, - "grad_norm": 1.1486482620239258, - "learning_rate": 4.143769493539284e-06, - "loss": 0.0526, - "step": 58030 - }, - { - "epoch": 4.310114362097133, - "grad_norm": 1.4145158529281616, - "learning_rate": 4.139313827417199e-06, - "loss": 0.0745, - "step": 58040 - }, - { - "epoch": 4.310856973117481, - "grad_norm": 2.3882088661193848, - "learning_rate": 4.134858161295114e-06, - "loss": 0.0881, - "step": 58050 - }, - { - "epoch": 4.311599584137829, - "grad_norm": 1.1562554836273193, - "learning_rate": 4.130402495173028e-06, - "loss": 0.0556, - "step": 58060 - }, - { - "epoch": 4.312342195158176, - "grad_norm": 2.3691024780273438, - "learning_rate": 4.125946829050943e-06, - "loss": 0.0798, - "step": 58070 - }, - { - "epoch": 4.313084806178524, - "grad_norm": 1.2392774820327759, - "learning_rate": 4.121491162928857e-06, - "loss": 0.0689, - "step": 58080 - }, - { - "epoch": 4.313827417198871, - "grad_norm": 1.4039784669876099, - "learning_rate": 4.117035496806772e-06, - "loss": 0.0513, - "step": 58090 - }, - { - "epoch": 4.314570028219219, - "grad_norm": 0.6202054619789124, - "learning_rate": 4.112579830684687e-06, - "loss": 0.0448, - "step": 58100 - }, - { - "epoch": 4.315312639239567, - "grad_norm": 0.5338848829269409, - "learning_rate": 4.108124164562602e-06, - "loss": 0.0619, - "step": 58110 - }, - { - "epoch": 4.316055250259914, - "grad_norm": 0.5443835258483887, - "learning_rate": 4.103668498440517e-06, - "loss": 0.0665, - "step": 58120 - }, - { - "epoch": 4.3167978612802616, - "grad_norm": 2.086144208908081, - "learning_rate": 4.0992128323184315e-06, - "loss": 0.0561, - "step": 58130 - }, - { - "epoch": 4.317540472300609, - "grad_norm": 0.8639087677001953, - "learning_rate": 4.0947571661963465e-06, - "loss": 0.0538, - "step": 58140 - }, - { - "epoch": 4.3182830833209565, - "grad_norm": 2.400470495223999, - "learning_rate": 4.0903015000742615e-06, - "loss": 0.0504, - "step": 58150 - }, - { - "epoch": 4.319025694341304, - "grad_norm": 0.8947586417198181, - "learning_rate": 4.0858458339521765e-06, - "loss": 0.0413, - "step": 58160 - }, - { - "epoch": 4.319768305361651, - "grad_norm": 1.5876610279083252, - "learning_rate": 4.081390167830091e-06, - "loss": 0.0721, - "step": 58170 - }, - { - "epoch": 4.320510916381999, - "grad_norm": 2.025843858718872, - "learning_rate": 4.076934501708006e-06, - "loss": 0.0559, - "step": 58180 - }, - { - "epoch": 4.321253527402346, - "grad_norm": 1.2650307416915894, - "learning_rate": 4.07247883558592e-06, - "loss": 0.0469, - "step": 58190 - }, - { - "epoch": 4.321996138422694, - "grad_norm": 0.4236902892589569, - "learning_rate": 4.068023169463835e-06, - "loss": 0.0319, - "step": 58200 - }, - { - "epoch": 4.322738749443042, - "grad_norm": 1.2939475774765015, - "learning_rate": 4.06356750334175e-06, - "loss": 0.0748, - "step": 58210 - }, - { - "epoch": 4.323481360463389, - "grad_norm": 0.4584546685218811, - "learning_rate": 4.059111837219665e-06, - "loss": 0.0305, - "step": 58220 - }, - { - "epoch": 4.324223971483737, - "grad_norm": 1.0066841840744019, - "learning_rate": 4.054656171097579e-06, - "loss": 0.0638, - "step": 58230 - }, - { - "epoch": 4.324966582504084, - "grad_norm": 1.1847662925720215, - "learning_rate": 4.050200504975494e-06, - "loss": 0.0383, - "step": 58240 - }, - { - "epoch": 4.325709193524432, - "grad_norm": 1.5462864637374878, - "learning_rate": 4.045744838853409e-06, - "loss": 0.0557, - "step": 58250 - }, - { - "epoch": 4.32645180454478, - "grad_norm": 1.220777153968811, - "learning_rate": 4.041289172731323e-06, - "loss": 0.054, - "step": 58260 - }, - { - "epoch": 4.327194415565127, - "grad_norm": 2.4415807723999023, - "learning_rate": 4.036833506609239e-06, - "loss": 0.062, - "step": 58270 - }, - { - "epoch": 4.327937026585475, - "grad_norm": 1.2786998748779297, - "learning_rate": 4.032377840487153e-06, - "loss": 0.0608, - "step": 58280 - }, - { - "epoch": 4.328679637605822, - "grad_norm": 0.7902323007583618, - "learning_rate": 4.027922174365068e-06, - "loss": 0.0624, - "step": 58290 - }, - { - "epoch": 4.3294222486261695, - "grad_norm": 1.4461417198181152, - "learning_rate": 4.023466508242982e-06, - "loss": 0.0546, - "step": 58300 - }, - { - "epoch": 4.330164859646517, - "grad_norm": 0.4553472697734833, - "learning_rate": 4.019010842120897e-06, - "loss": 0.0413, - "step": 58310 - }, - { - "epoch": 4.330907470666864, - "grad_norm": 1.363437294960022, - "learning_rate": 4.014555175998811e-06, - "loss": 0.0512, - "step": 58320 - }, - { - "epoch": 4.331650081687212, - "grad_norm": 1.6967765092849731, - "learning_rate": 4.010099509876727e-06, - "loss": 0.0611, - "step": 58330 - }, - { - "epoch": 4.332392692707559, - "grad_norm": 1.917182207107544, - "learning_rate": 4.005643843754641e-06, - "loss": 0.0887, - "step": 58340 - }, - { - "epoch": 4.333135303727907, - "grad_norm": 2.6857664585113525, - "learning_rate": 4.001188177632556e-06, - "loss": 0.06, - "step": 58350 - }, - { - "epoch": 4.333877914748255, - "grad_norm": 2.381786346435547, - "learning_rate": 3.99673251151047e-06, - "loss": 0.0701, - "step": 58360 - }, - { - "epoch": 4.334620525768602, - "grad_norm": 1.848760724067688, - "learning_rate": 3.992276845388385e-06, - "loss": 0.0649, - "step": 58370 - }, - { - "epoch": 4.33536313678895, - "grad_norm": 1.4879848957061768, - "learning_rate": 3.9878211792663e-06, - "loss": 0.0502, - "step": 58380 - }, - { - "epoch": 4.336105747809297, - "grad_norm": 1.29238760471344, - "learning_rate": 3.983365513144215e-06, - "loss": 0.0527, - "step": 58390 - }, - { - "epoch": 4.336848358829645, - "grad_norm": 0.42699211835861206, - "learning_rate": 3.97890984702213e-06, - "loss": 0.047, - "step": 58400 - }, - { - "epoch": 4.337590969849993, - "grad_norm": 2.1831252574920654, - "learning_rate": 3.9744541809000445e-06, - "loss": 0.0746, - "step": 58410 - }, - { - "epoch": 4.33833358087034, - "grad_norm": 0.27628546953201294, - "learning_rate": 3.9699985147779595e-06, - "loss": 0.0367, - "step": 58420 - }, - { - "epoch": 4.339076191890688, - "grad_norm": 1.2963409423828125, - "learning_rate": 3.965542848655874e-06, - "loss": 0.0693, - "step": 58430 - }, - { - "epoch": 4.339818802911036, - "grad_norm": 3.566678762435913, - "learning_rate": 3.961087182533789e-06, - "loss": 0.0523, - "step": 58440 - }, - { - "epoch": 4.340561413931383, - "grad_norm": 0.6280962228775024, - "learning_rate": 3.956631516411704e-06, - "loss": 0.0329, - "step": 58450 - }, - { - "epoch": 4.3413040249517305, - "grad_norm": 0.4241829812526703, - "learning_rate": 3.952175850289619e-06, - "loss": 0.0304, - "step": 58460 - }, - { - "epoch": 4.3420466359720775, - "grad_norm": 0.3502853214740753, - "learning_rate": 3.947720184167533e-06, - "loss": 0.0429, - "step": 58470 - }, - { - "epoch": 4.342789246992425, - "grad_norm": 3.029207229614258, - "learning_rate": 3.943264518045448e-06, - "loss": 0.0859, - "step": 58480 - }, - { - "epoch": 4.343531858012773, - "grad_norm": 0.4693982005119324, - "learning_rate": 3.938808851923362e-06, - "loss": 0.0478, - "step": 58490 - }, - { - "epoch": 4.34427446903312, - "grad_norm": 1.9650620222091675, - "learning_rate": 3.934353185801278e-06, - "loss": 0.0574, - "step": 58500 - }, - { - "epoch": 4.345017080053468, - "grad_norm": 2.096945285797119, - "learning_rate": 3.929897519679193e-06, - "loss": 0.0566, - "step": 58510 - }, - { - "epoch": 4.345759691073815, - "grad_norm": 0.6621731519699097, - "learning_rate": 3.925441853557107e-06, - "loss": 0.0532, - "step": 58520 - }, - { - "epoch": 4.346502302094163, - "grad_norm": 3.3194026947021484, - "learning_rate": 3.920986187435022e-06, - "loss": 0.0901, - "step": 58530 - }, - { - "epoch": 4.347244913114511, - "grad_norm": 0.2626116871833801, - "learning_rate": 3.916530521312936e-06, - "loss": 0.05, - "step": 58540 - }, - { - "epoch": 4.347987524134858, - "grad_norm": 1.0745980739593506, - "learning_rate": 3.912074855190851e-06, - "loss": 0.0672, - "step": 58550 - }, - { - "epoch": 4.348730135155206, - "grad_norm": 1.1821939945220947, - "learning_rate": 3.907619189068766e-06, - "loss": 0.0755, - "step": 58560 - }, - { - "epoch": 4.349472746175553, - "grad_norm": 3.788940668106079, - "learning_rate": 3.903163522946681e-06, - "loss": 0.0683, - "step": 58570 - }, - { - "epoch": 4.350215357195901, - "grad_norm": 2.9081804752349854, - "learning_rate": 3.898707856824595e-06, - "loss": 0.0677, - "step": 58580 - }, - { - "epoch": 4.350957968216249, - "grad_norm": 1.8848897218704224, - "learning_rate": 3.89425219070251e-06, - "loss": 0.0605, - "step": 58590 - }, - { - "epoch": 4.351700579236596, - "grad_norm": 0.5404842495918274, - "learning_rate": 3.889796524580424e-06, - "loss": 0.04, - "step": 58600 - }, - { - "epoch": 4.352443190256944, - "grad_norm": 1.7580265998840332, - "learning_rate": 3.885340858458339e-06, - "loss": 0.0307, - "step": 58610 - }, - { - "epoch": 4.353185801277291, - "grad_norm": 1.8819963932037354, - "learning_rate": 3.880885192336254e-06, - "loss": 0.0704, - "step": 58620 - }, - { - "epoch": 4.3539284122976385, - "grad_norm": 0.9093202948570251, - "learning_rate": 3.876429526214169e-06, - "loss": 0.0915, - "step": 58630 - }, - { - "epoch": 4.354671023317986, - "grad_norm": 2.8272292613983154, - "learning_rate": 3.871973860092084e-06, - "loss": 0.0886, - "step": 58640 - }, - { - "epoch": 4.355413634338333, - "grad_norm": 2.371199131011963, - "learning_rate": 3.8675181939699985e-06, - "loss": 0.0583, - "step": 58650 - }, - { - "epoch": 4.356156245358681, - "grad_norm": 0.21469803154468536, - "learning_rate": 3.8630625278479135e-06, - "loss": 0.0405, - "step": 58660 - }, - { - "epoch": 4.356898856379028, - "grad_norm": 1.9610093832015991, - "learning_rate": 3.858606861725828e-06, - "loss": 0.031, - "step": 58670 - }, - { - "epoch": 4.357641467399376, - "grad_norm": 3.707371950149536, - "learning_rate": 3.8541511956037435e-06, - "loss": 0.0643, - "step": 58680 - }, - { - "epoch": 4.358384078419724, - "grad_norm": 0.8655148148536682, - "learning_rate": 3.849695529481658e-06, - "loss": 0.0624, - "step": 58690 - }, - { - "epoch": 4.359126689440071, - "grad_norm": 2.9356181621551514, - "learning_rate": 3.845239863359573e-06, - "loss": 0.081, - "step": 58700 - }, - { - "epoch": 4.359869300460419, - "grad_norm": 1.3285295963287354, - "learning_rate": 3.840784197237487e-06, - "loss": 0.0463, - "step": 58710 - }, - { - "epoch": 4.360611911480766, - "grad_norm": 0.44809725880622864, - "learning_rate": 3.836328531115402e-06, - "loss": 0.0769, - "step": 58720 - }, - { - "epoch": 4.361354522501114, - "grad_norm": 1.0158475637435913, - "learning_rate": 3.831872864993316e-06, - "loss": 0.0721, - "step": 58730 - }, - { - "epoch": 4.362097133521462, - "grad_norm": 1.1209678649902344, - "learning_rate": 3.827417198871232e-06, - "loss": 0.0601, - "step": 58740 - }, - { - "epoch": 4.362839744541809, - "grad_norm": 1.2492932081222534, - "learning_rate": 3.822961532749146e-06, - "loss": 0.0776, - "step": 58750 - }, - { - "epoch": 4.363582355562157, - "grad_norm": 0.5209352374076843, - "learning_rate": 3.818505866627061e-06, - "loss": 0.0311, - "step": 58760 - }, - { - "epoch": 4.364324966582504, - "grad_norm": 0.8227794766426086, - "learning_rate": 3.814050200504976e-06, - "loss": 0.0282, - "step": 58770 - }, - { - "epoch": 4.365067577602852, - "grad_norm": 1.6542755365371704, - "learning_rate": 3.8095945343828904e-06, - "loss": 0.0766, - "step": 58780 - }, - { - "epoch": 4.3658101886231995, - "grad_norm": 2.0904018878936768, - "learning_rate": 3.8051388682608054e-06, - "loss": 0.0475, - "step": 58790 - }, - { - "epoch": 4.3665527996435465, - "grad_norm": 1.682500958442688, - "learning_rate": 3.8006832021387196e-06, - "loss": 0.0691, - "step": 58800 - }, - { - "epoch": 4.367295410663894, - "grad_norm": 2.096959352493286, - "learning_rate": 3.796227536016635e-06, - "loss": 0.0475, - "step": 58810 - }, - { - "epoch": 4.368038021684241, - "grad_norm": 2.7535927295684814, - "learning_rate": 3.791771869894549e-06, - "loss": 0.0791, - "step": 58820 - }, - { - "epoch": 4.368780632704589, - "grad_norm": 0.5873667001724243, - "learning_rate": 3.787316203772464e-06, - "loss": 0.0359, - "step": 58830 - }, - { - "epoch": 4.369523243724937, - "grad_norm": 3.4915168285369873, - "learning_rate": 3.7828605376503787e-06, - "loss": 0.0502, - "step": 58840 - }, - { - "epoch": 4.370265854745284, - "grad_norm": 0.34856438636779785, - "learning_rate": 3.7784048715282937e-06, - "loss": 0.0606, - "step": 58850 - }, - { - "epoch": 4.371008465765632, - "grad_norm": 0.42992135882377625, - "learning_rate": 3.773949205406208e-06, - "loss": 0.0645, - "step": 58860 - }, - { - "epoch": 4.37175107678598, - "grad_norm": 1.6763559579849243, - "learning_rate": 3.7694935392841233e-06, - "loss": 0.0786, - "step": 58870 - }, - { - "epoch": 4.372493687806327, - "grad_norm": 2.7361538410186768, - "learning_rate": 3.7650378731620374e-06, - "loss": 0.0677, - "step": 58880 - }, - { - "epoch": 4.373236298826675, - "grad_norm": 2.2586610317230225, - "learning_rate": 3.7605822070399524e-06, - "loss": 0.0419, - "step": 58890 - }, - { - "epoch": 4.373978909847022, - "grad_norm": 0.7950805425643921, - "learning_rate": 3.756126540917868e-06, - "loss": 0.0381, - "step": 58900 - }, - { - "epoch": 4.37472152086737, - "grad_norm": 0.500672459602356, - "learning_rate": 3.751670874795782e-06, - "loss": 0.0576, - "step": 58910 - }, - { - "epoch": 4.375464131887718, - "grad_norm": 1.354580044746399, - "learning_rate": 3.7472152086736965e-06, - "loss": 0.0514, - "step": 58920 - }, - { - "epoch": 4.376206742908065, - "grad_norm": 2.166482925415039, - "learning_rate": 3.7427595425516115e-06, - "loss": 0.0508, - "step": 58930 - }, - { - "epoch": 4.376949353928413, - "grad_norm": 0.8477782011032104, - "learning_rate": 3.738303876429526e-06, - "loss": 0.0472, - "step": 58940 - }, - { - "epoch": 4.37769196494876, - "grad_norm": 0.9658443331718445, - "learning_rate": 3.733848210307441e-06, - "loss": 0.0729, - "step": 58950 - }, - { - "epoch": 4.3784345759691075, - "grad_norm": 3.2751779556274414, - "learning_rate": 3.7293925441853557e-06, - "loss": 0.0389, - "step": 58960 - }, - { - "epoch": 4.379177186989455, - "grad_norm": 0.7033012509346008, - "learning_rate": 3.7249368780632707e-06, - "loss": 0.051, - "step": 58970 - }, - { - "epoch": 4.379919798009802, - "grad_norm": 0.37652888894081116, - "learning_rate": 3.7204812119411857e-06, - "loss": 0.0488, - "step": 58980 - }, - { - "epoch": 4.38066240903015, - "grad_norm": 1.1736301183700562, - "learning_rate": 3.7160255458191002e-06, - "loss": 0.0742, - "step": 58990 - }, - { - "epoch": 4.381405020050497, - "grad_norm": 1.1174696683883667, - "learning_rate": 3.711569879697015e-06, - "loss": 0.0505, - "step": 59000 - }, - { - "epoch": 4.382147631070845, - "grad_norm": 2.9306743144989014, - "learning_rate": 3.70711421357493e-06, - "loss": 0.051, - "step": 59010 - }, - { - "epoch": 4.382890242091193, - "grad_norm": 1.5580140352249146, - "learning_rate": 3.7026585474528444e-06, - "loss": 0.0598, - "step": 59020 - }, - { - "epoch": 4.38363285311154, - "grad_norm": 1.074127435684204, - "learning_rate": 3.698202881330759e-06, - "loss": 0.053, - "step": 59030 - }, - { - "epoch": 4.384375464131888, - "grad_norm": 0.9551728367805481, - "learning_rate": 3.693747215208674e-06, - "loss": 0.0438, - "step": 59040 - }, - { - "epoch": 4.385118075152235, - "grad_norm": 1.77925705909729, - "learning_rate": 3.6892915490865885e-06, - "loss": 0.0489, - "step": 59050 - }, - { - "epoch": 4.385860686172583, - "grad_norm": 0.7532749772071838, - "learning_rate": 3.684835882964503e-06, - "loss": 0.0431, - "step": 59060 - }, - { - "epoch": 4.386603297192931, - "grad_norm": 1.1308987140655518, - "learning_rate": 3.680380216842418e-06, - "loss": 0.0617, - "step": 59070 - }, - { - "epoch": 4.387345908213278, - "grad_norm": 3.694500684738159, - "learning_rate": 3.6759245507203326e-06, - "loss": 0.065, - "step": 59080 - }, - { - "epoch": 4.388088519233626, - "grad_norm": 2.997882127761841, - "learning_rate": 3.671468884598247e-06, - "loss": 0.063, - "step": 59090 - }, - { - "epoch": 4.388831130253973, - "grad_norm": 1.4791189432144165, - "learning_rate": 3.6670132184761626e-06, - "loss": 0.0603, - "step": 59100 - }, - { - "epoch": 4.3895737412743205, - "grad_norm": 1.4636845588684082, - "learning_rate": 3.662557552354077e-06, - "loss": 0.0548, - "step": 59110 - }, - { - "epoch": 4.390316352294668, - "grad_norm": 1.0181242227554321, - "learning_rate": 3.6581018862319918e-06, - "loss": 0.0424, - "step": 59120 - }, - { - "epoch": 4.391058963315015, - "grad_norm": 0.7411527037620544, - "learning_rate": 3.6536462201099068e-06, - "loss": 0.0591, - "step": 59130 - }, - { - "epoch": 4.391801574335363, - "grad_norm": 0.6368983387947083, - "learning_rate": 3.6491905539878213e-06, - "loss": 0.0731, - "step": 59140 - }, - { - "epoch": 4.39254418535571, - "grad_norm": 0.9122620224952698, - "learning_rate": 3.644734887865736e-06, - "loss": 0.0579, - "step": 59150 - }, - { - "epoch": 4.393286796376058, - "grad_norm": 1.5511554479599, - "learning_rate": 3.640279221743651e-06, - "loss": 0.0578, - "step": 59160 - }, - { - "epoch": 4.394029407396406, - "grad_norm": 1.4701236486434937, - "learning_rate": 3.6358235556215655e-06, - "loss": 0.0377, - "step": 59170 - }, - { - "epoch": 4.394772018416753, - "grad_norm": 0.35914507508277893, - "learning_rate": 3.63136788949948e-06, - "loss": 0.0586, - "step": 59180 - }, - { - "epoch": 4.395514629437101, - "grad_norm": 1.4817779064178467, - "learning_rate": 3.626912223377395e-06, - "loss": 0.0664, - "step": 59190 - }, - { - "epoch": 4.396257240457448, - "grad_norm": 2.5295608043670654, - "learning_rate": 3.6224565572553096e-06, - "loss": 0.0454, - "step": 59200 - }, - { - "epoch": 4.396999851477796, - "grad_norm": 0.7852722406387329, - "learning_rate": 3.618000891133224e-06, - "loss": 0.0399, - "step": 59210 - }, - { - "epoch": 4.397742462498144, - "grad_norm": 0.24262398481369019, - "learning_rate": 3.613545225011139e-06, - "loss": 0.0409, - "step": 59220 - }, - { - "epoch": 4.398485073518491, - "grad_norm": 0.33007925748825073, - "learning_rate": 3.609089558889054e-06, - "loss": 0.0593, - "step": 59230 - }, - { - "epoch": 4.399227684538839, - "grad_norm": 1.3206017017364502, - "learning_rate": 3.6046338927669687e-06, - "loss": 0.0687, - "step": 59240 - }, - { - "epoch": 4.399970295559186, - "grad_norm": 2.052076578140259, - "learning_rate": 3.6001782266448837e-06, - "loss": 0.0808, - "step": 59250 - }, - { - "epoch": 4.400712906579534, - "grad_norm": 3.8353078365325928, - "learning_rate": 3.5957225605227983e-06, - "loss": 0.0581, - "step": 59260 - }, - { - "epoch": 4.4014555175998815, - "grad_norm": 0.6021201014518738, - "learning_rate": 3.591266894400713e-06, - "loss": 0.0714, - "step": 59270 - }, - { - "epoch": 4.4021981286202285, - "grad_norm": 2.1962730884552, - "learning_rate": 3.586811228278628e-06, - "loss": 0.0501, - "step": 59280 - }, - { - "epoch": 4.402940739640576, - "grad_norm": 0.2795027792453766, - "learning_rate": 3.5823555621565424e-06, - "loss": 0.0573, - "step": 59290 - }, - { - "epoch": 4.403683350660923, - "grad_norm": 1.4487619400024414, - "learning_rate": 3.5778998960344574e-06, - "loss": 0.0312, - "step": 59300 - }, - { - "epoch": 4.404425961681271, - "grad_norm": 2.9178457260131836, - "learning_rate": 3.573444229912372e-06, - "loss": 0.0564, - "step": 59310 - }, - { - "epoch": 4.405168572701619, - "grad_norm": 1.1441450119018555, - "learning_rate": 3.5689885637902866e-06, - "loss": 0.0618, - "step": 59320 - }, - { - "epoch": 4.405911183721966, - "grad_norm": 2.4585134983062744, - "learning_rate": 3.5645328976682016e-06, - "loss": 0.044, - "step": 59330 - }, - { - "epoch": 4.406653794742314, - "grad_norm": 1.8690593242645264, - "learning_rate": 3.560077231546116e-06, - "loss": 0.0638, - "step": 59340 - }, - { - "epoch": 4.407396405762661, - "grad_norm": 1.3198564052581787, - "learning_rate": 3.5556215654240307e-06, - "loss": 0.0596, - "step": 59350 - }, - { - "epoch": 4.408139016783009, - "grad_norm": 1.511386513710022, - "learning_rate": 3.551165899301946e-06, - "loss": 0.0775, - "step": 59360 - }, - { - "epoch": 4.408881627803357, - "grad_norm": 1.489264965057373, - "learning_rate": 3.5467102331798607e-06, - "loss": 0.0824, - "step": 59370 - }, - { - "epoch": 4.409624238823704, - "grad_norm": 0.6738532185554504, - "learning_rate": 3.5422545670577753e-06, - "loss": 0.1038, - "step": 59380 - }, - { - "epoch": 4.410366849844052, - "grad_norm": 1.928895115852356, - "learning_rate": 3.5377989009356903e-06, - "loss": 0.0687, - "step": 59390 - }, - { - "epoch": 4.411109460864399, - "grad_norm": 2.568911552429199, - "learning_rate": 3.533343234813605e-06, - "loss": 0.0677, - "step": 59400 - }, - { - "epoch": 4.411852071884747, - "grad_norm": 3.7109265327453613, - "learning_rate": 3.5288875686915194e-06, - "loss": 0.0468, - "step": 59410 - }, - { - "epoch": 4.412594682905095, - "grad_norm": 1.1439366340637207, - "learning_rate": 3.5244319025694344e-06, - "loss": 0.0471, - "step": 59420 - }, - { - "epoch": 4.413337293925442, - "grad_norm": 1.701995849609375, - "learning_rate": 3.519976236447349e-06, - "loss": 0.0555, - "step": 59430 - }, - { - "epoch": 4.4140799049457895, - "grad_norm": 3.7509143352508545, - "learning_rate": 3.5155205703252635e-06, - "loss": 0.0486, - "step": 59440 - }, - { - "epoch": 4.4148225159661365, - "grad_norm": 2.0457451343536377, - "learning_rate": 3.5110649042031785e-06, - "loss": 0.0599, - "step": 59450 - }, - { - "epoch": 4.415565126986484, - "grad_norm": 1.8995429277420044, - "learning_rate": 3.506609238081093e-06, - "loss": 0.0546, - "step": 59460 - }, - { - "epoch": 4.416307738006832, - "grad_norm": 1.6824947595596313, - "learning_rate": 3.5021535719590077e-06, - "loss": 0.0679, - "step": 59470 - }, - { - "epoch": 4.417050349027179, - "grad_norm": 2.0449635982513428, - "learning_rate": 3.4976979058369227e-06, - "loss": 0.0552, - "step": 59480 - }, - { - "epoch": 4.417792960047527, - "grad_norm": 0.3595518469810486, - "learning_rate": 3.4932422397148372e-06, - "loss": 0.0452, - "step": 59490 - }, - { - "epoch": 4.418535571067874, - "grad_norm": 0.20933711528778076, - "learning_rate": 3.4887865735927522e-06, - "loss": 0.0662, - "step": 59500 - }, - { - "epoch": 4.419278182088222, - "grad_norm": 2.1247775554656982, - "learning_rate": 3.4843309074706672e-06, - "loss": 0.0577, - "step": 59510 - }, - { - "epoch": 4.42002079310857, - "grad_norm": 1.3232066631317139, - "learning_rate": 3.479875241348582e-06, - "loss": 0.0739, - "step": 59520 - }, - { - "epoch": 4.420763404128917, - "grad_norm": 1.2847281694412231, - "learning_rate": 3.4754195752264964e-06, - "loss": 0.1052, - "step": 59530 - }, - { - "epoch": 4.421506015149265, - "grad_norm": 2.9592535495758057, - "learning_rate": 3.4709639091044114e-06, - "loss": 0.0837, - "step": 59540 - }, - { - "epoch": 4.422248626169612, - "grad_norm": 3.9245753288269043, - "learning_rate": 3.466508242982326e-06, - "loss": 0.0602, - "step": 59550 - }, - { - "epoch": 4.42299123718996, - "grad_norm": 0.8036717176437378, - "learning_rate": 3.4620525768602405e-06, - "loss": 0.0609, - "step": 59560 - }, - { - "epoch": 4.423733848210308, - "grad_norm": 0.5781071782112122, - "learning_rate": 3.4575969107381555e-06, - "loss": 0.056, - "step": 59570 - }, - { - "epoch": 4.424476459230655, - "grad_norm": 2.8381593227386475, - "learning_rate": 3.45314124461607e-06, - "loss": 0.0667, - "step": 59580 - }, - { - "epoch": 4.425219070251003, - "grad_norm": 2.3946638107299805, - "learning_rate": 3.4486855784939846e-06, - "loss": 0.0354, - "step": 59590 - }, - { - "epoch": 4.4259616812713505, - "grad_norm": 1.2118726968765259, - "learning_rate": 3.4442299123718996e-06, - "loss": 0.0549, - "step": 59600 - }, - { - "epoch": 4.4267042922916975, - "grad_norm": 2.8117597103118896, - "learning_rate": 3.439774246249814e-06, - "loss": 0.0692, - "step": 59610 - }, - { - "epoch": 4.427446903312045, - "grad_norm": 2.231323719024658, - "learning_rate": 3.4353185801277288e-06, - "loss": 0.0931, - "step": 59620 - }, - { - "epoch": 4.428189514332392, - "grad_norm": 2.092609405517578, - "learning_rate": 3.430862914005644e-06, - "loss": 0.089, - "step": 59630 - }, - { - "epoch": 4.42893212535274, - "grad_norm": 1.9209469556808472, - "learning_rate": 3.4264072478835588e-06, - "loss": 0.0671, - "step": 59640 - }, - { - "epoch": 4.429674736373088, - "grad_norm": 2.280712842941284, - "learning_rate": 3.4219515817614738e-06, - "loss": 0.0598, - "step": 59650 - }, - { - "epoch": 4.430417347393435, - "grad_norm": 1.3931483030319214, - "learning_rate": 3.4174959156393883e-06, - "loss": 0.063, - "step": 59660 - }, - { - "epoch": 4.431159958413783, - "grad_norm": 1.0616021156311035, - "learning_rate": 3.413040249517303e-06, - "loss": 0.0531, - "step": 59670 - }, - { - "epoch": 4.43190256943413, - "grad_norm": 1.928513765335083, - "learning_rate": 3.408584583395218e-06, - "loss": 0.0707, - "step": 59680 - }, - { - "epoch": 4.432645180454478, - "grad_norm": 0.7961556315422058, - "learning_rate": 3.4041289172731325e-06, - "loss": 0.0584, - "step": 59690 - }, - { - "epoch": 4.433387791474826, - "grad_norm": 1.220015525817871, - "learning_rate": 3.399673251151047e-06, - "loss": 0.0569, - "step": 59700 - }, - { - "epoch": 4.434130402495173, - "grad_norm": 0.7394031882286072, - "learning_rate": 3.395217585028962e-06, - "loss": 0.0611, - "step": 59710 - }, - { - "epoch": 4.434873013515521, - "grad_norm": 2.8178136348724365, - "learning_rate": 3.3907619189068766e-06, - "loss": 0.082, - "step": 59720 - }, - { - "epoch": 4.435615624535868, - "grad_norm": 0.8858090043067932, - "learning_rate": 3.386306252784791e-06, - "loss": 0.064, - "step": 59730 - }, - { - "epoch": 4.436358235556216, - "grad_norm": 1.7345311641693115, - "learning_rate": 3.381850586662706e-06, - "loss": 0.0571, - "step": 59740 - }, - { - "epoch": 4.437100846576564, - "grad_norm": 1.1461231708526611, - "learning_rate": 3.3773949205406207e-06, - "loss": 0.0449, - "step": 59750 - }, - { - "epoch": 4.437843457596911, - "grad_norm": 5.428303241729736, - "learning_rate": 3.3729392544185357e-06, - "loss": 0.0415, - "step": 59760 - }, - { - "epoch": 4.4385860686172585, - "grad_norm": 3.327192544937134, - "learning_rate": 3.3684835882964507e-06, - "loss": 0.0834, - "step": 59770 - }, - { - "epoch": 4.4393286796376055, - "grad_norm": 3.348003387451172, - "learning_rate": 3.3640279221743653e-06, - "loss": 0.0855, - "step": 59780 - }, - { - "epoch": 4.440071290657953, - "grad_norm": 0.42248573899269104, - "learning_rate": 3.35957225605228e-06, - "loss": 0.0454, - "step": 59790 - }, - { - "epoch": 4.440813901678301, - "grad_norm": 0.9625753164291382, - "learning_rate": 3.355116589930195e-06, - "loss": 0.0628, - "step": 59800 - }, - { - "epoch": 4.441556512698648, - "grad_norm": 0.7581101059913635, - "learning_rate": 3.3506609238081094e-06, - "loss": 0.0483, - "step": 59810 - }, - { - "epoch": 4.442299123718996, - "grad_norm": 1.2970561981201172, - "learning_rate": 3.346205257686024e-06, - "loss": 0.039, - "step": 59820 - }, - { - "epoch": 4.443041734739343, - "grad_norm": 2.935488700866699, - "learning_rate": 3.341749591563939e-06, - "loss": 0.0532, - "step": 59830 - }, - { - "epoch": 4.443784345759691, - "grad_norm": 2.0861330032348633, - "learning_rate": 3.3372939254418536e-06, - "loss": 0.0597, - "step": 59840 - }, - { - "epoch": 4.444526956780039, - "grad_norm": 3.6742630004882812, - "learning_rate": 3.332838259319768e-06, - "loss": 0.0661, - "step": 59850 - }, - { - "epoch": 4.445269567800386, - "grad_norm": 2.0556039810180664, - "learning_rate": 3.328382593197683e-06, - "loss": 0.036, - "step": 59860 - }, - { - "epoch": 4.446012178820734, - "grad_norm": 0.8471649289131165, - "learning_rate": 3.3239269270755977e-06, - "loss": 0.0508, - "step": 59870 - }, - { - "epoch": 4.446754789841081, - "grad_norm": 4.926666736602783, - "learning_rate": 3.3194712609535123e-06, - "loss": 0.0731, - "step": 59880 - }, - { - "epoch": 4.447497400861429, - "grad_norm": 0.624416708946228, - "learning_rate": 3.3150155948314277e-06, - "loss": 0.0655, - "step": 59890 - }, - { - "epoch": 4.448240011881777, - "grad_norm": 0.27737393975257874, - "learning_rate": 3.3105599287093423e-06, - "loss": 0.044, - "step": 59900 - }, - { - "epoch": 4.448982622902124, - "grad_norm": 0.9491309523582458, - "learning_rate": 3.306104262587257e-06, - "loss": 0.0543, - "step": 59910 - }, - { - "epoch": 4.4497252339224715, - "grad_norm": 3.4223947525024414, - "learning_rate": 3.301648596465172e-06, - "loss": 0.0775, - "step": 59920 - }, - { - "epoch": 4.4504678449428186, - "grad_norm": 1.4844588041305542, - "learning_rate": 3.2971929303430864e-06, - "loss": 0.0869, - "step": 59930 - }, - { - "epoch": 4.4512104559631664, - "grad_norm": 0.4859474003314972, - "learning_rate": 3.292737264221001e-06, - "loss": 0.0733, - "step": 59940 - }, - { - "epoch": 4.451953066983514, - "grad_norm": 3.157867670059204, - "learning_rate": 3.288281598098916e-06, - "loss": 0.0344, - "step": 59950 - }, - { - "epoch": 4.452695678003861, - "grad_norm": 1.859097957611084, - "learning_rate": 3.2838259319768305e-06, - "loss": 0.0689, - "step": 59960 - }, - { - "epoch": 4.453438289024209, - "grad_norm": 1.4190930128097534, - "learning_rate": 3.279370265854745e-06, - "loss": 0.0691, - "step": 59970 - }, - { - "epoch": 4.454180900044556, - "grad_norm": 1.7417631149291992, - "learning_rate": 3.27491459973266e-06, - "loss": 0.0664, - "step": 59980 - }, - { - "epoch": 4.454923511064904, - "grad_norm": 0.7751902937889099, - "learning_rate": 3.2704589336105747e-06, - "loss": 0.0361, - "step": 59990 - }, - { - "epoch": 4.455666122085252, - "grad_norm": 0.1440928876399994, - "learning_rate": 3.2660032674884897e-06, - "loss": 0.0985, - "step": 60000 - }, - { - "epoch": 4.456408733105599, - "grad_norm": 0.39282867312431335, - "learning_rate": 3.2615476013664042e-06, - "loss": 0.0667, - "step": 60010 - }, - { - "epoch": 4.457151344125947, - "grad_norm": 1.775133728981018, - "learning_rate": 3.2570919352443192e-06, - "loss": 0.0448, - "step": 60020 - }, - { - "epoch": 4.457893955146295, - "grad_norm": 0.5823416709899902, - "learning_rate": 3.2526362691222342e-06, - "loss": 0.0666, - "step": 60030 - }, - { - "epoch": 4.458636566166642, - "grad_norm": 1.3295433521270752, - "learning_rate": 3.248180603000149e-06, - "loss": 0.0311, - "step": 60040 - }, - { - "epoch": 4.45937917718699, - "grad_norm": 2.622669219970703, - "learning_rate": 3.2437249368780634e-06, - "loss": 0.0918, - "step": 60050 - }, - { - "epoch": 4.460121788207337, - "grad_norm": 1.4052746295928955, - "learning_rate": 3.2392692707559784e-06, - "loss": 0.0562, - "step": 60060 - }, - { - "epoch": 4.460864399227685, - "grad_norm": 0.7786046862602234, - "learning_rate": 3.234813604633893e-06, - "loss": 0.0816, - "step": 60070 - }, - { - "epoch": 4.4616070102480325, - "grad_norm": 0.7435610294342041, - "learning_rate": 3.2303579385118075e-06, - "loss": 0.054, - "step": 60080 - }, - { - "epoch": 4.4623496212683795, - "grad_norm": 1.1836934089660645, - "learning_rate": 3.2259022723897225e-06, - "loss": 0.0801, - "step": 60090 - }, - { - "epoch": 4.463092232288727, - "grad_norm": 1.5763529539108276, - "learning_rate": 3.221446606267637e-06, - "loss": 0.0548, - "step": 60100 - }, - { - "epoch": 4.463834843309074, - "grad_norm": 1.6699156761169434, - "learning_rate": 3.2169909401455516e-06, - "loss": 0.0634, - "step": 60110 - }, - { - "epoch": 4.464577454329422, - "grad_norm": 0.8276064991950989, - "learning_rate": 3.2125352740234666e-06, - "loss": 0.0634, - "step": 60120 - }, - { - "epoch": 4.46532006534977, - "grad_norm": 0.7230677008628845, - "learning_rate": 3.208079607901381e-06, - "loss": 0.0656, - "step": 60130 - }, - { - "epoch": 4.466062676370117, - "grad_norm": 1.405157446861267, - "learning_rate": 3.2036239417792958e-06, - "loss": 0.0971, - "step": 60140 - }, - { - "epoch": 4.466805287390465, - "grad_norm": 1.1781624555587769, - "learning_rate": 3.199168275657211e-06, - "loss": 0.0574, - "step": 60150 - }, - { - "epoch": 4.467547898410812, - "grad_norm": 0.2808609902858734, - "learning_rate": 3.1947126095351258e-06, - "loss": 0.0415, - "step": 60160 - }, - { - "epoch": 4.46829050943116, - "grad_norm": 1.525888204574585, - "learning_rate": 3.1902569434130403e-06, - "loss": 0.0791, - "step": 60170 - }, - { - "epoch": 4.469033120451508, - "grad_norm": 0.37916257977485657, - "learning_rate": 3.1858012772909553e-06, - "loss": 0.0406, - "step": 60180 - }, - { - "epoch": 4.469775731471855, - "grad_norm": 1.2917027473449707, - "learning_rate": 3.18134561116887e-06, - "loss": 0.0632, - "step": 60190 - }, - { - "epoch": 4.470518342492203, - "grad_norm": 1.9711061716079712, - "learning_rate": 3.1768899450467845e-06, - "loss": 0.0628, - "step": 60200 - }, - { - "epoch": 4.47126095351255, - "grad_norm": 0.7012740969657898, - "learning_rate": 3.1724342789246995e-06, - "loss": 0.0308, - "step": 60210 - }, - { - "epoch": 4.472003564532898, - "grad_norm": 2.618312358856201, - "learning_rate": 3.167978612802614e-06, - "loss": 0.0659, - "step": 60220 - }, - { - "epoch": 4.472746175553246, - "grad_norm": 0.5947886109352112, - "learning_rate": 3.1635229466805286e-06, - "loss": 0.0411, - "step": 60230 - }, - { - "epoch": 4.473488786573593, - "grad_norm": 4.743587493896484, - "learning_rate": 3.1590672805584436e-06, - "loss": 0.0349, - "step": 60240 - }, - { - "epoch": 4.4742313975939405, - "grad_norm": 1.2651278972625732, - "learning_rate": 3.154611614436358e-06, - "loss": 0.0567, - "step": 60250 - }, - { - "epoch": 4.4749740086142875, - "grad_norm": 2.4540841579437256, - "learning_rate": 3.1501559483142727e-06, - "loss": 0.0446, - "step": 60260 - }, - { - "epoch": 4.475716619634635, - "grad_norm": 3.072303295135498, - "learning_rate": 3.1457002821921877e-06, - "loss": 0.0747, - "step": 60270 - }, - { - "epoch": 4.476459230654983, - "grad_norm": 3.0911707878112793, - "learning_rate": 3.1412446160701023e-06, - "loss": 0.0377, - "step": 60280 - }, - { - "epoch": 4.47720184167533, - "grad_norm": 0.6322579979896545, - "learning_rate": 3.1367889499480173e-06, - "loss": 0.0727, - "step": 60290 - }, - { - "epoch": 4.477944452695678, - "grad_norm": 0.2527351379394531, - "learning_rate": 3.1323332838259323e-06, - "loss": 0.0379, - "step": 60300 - }, - { - "epoch": 4.478687063716025, - "grad_norm": 2.0621883869171143, - "learning_rate": 3.127877617703847e-06, - "loss": 0.0577, - "step": 60310 - }, - { - "epoch": 4.479429674736373, - "grad_norm": 0.48624280095100403, - "learning_rate": 3.1234219515817614e-06, - "loss": 0.048, - "step": 60320 - }, - { - "epoch": 4.480172285756721, - "grad_norm": 0.3645511269569397, - "learning_rate": 3.1189662854596764e-06, - "loss": 0.0462, - "step": 60330 - }, - { - "epoch": 4.480914896777068, - "grad_norm": 0.7930494546890259, - "learning_rate": 3.114510619337591e-06, - "loss": 0.0435, - "step": 60340 - }, - { - "epoch": 4.481657507797416, - "grad_norm": 1.6003566980361938, - "learning_rate": 3.110054953215506e-06, - "loss": 0.0577, - "step": 60350 - }, - { - "epoch": 4.482400118817763, - "grad_norm": 0.8492751717567444, - "learning_rate": 3.1055992870934206e-06, - "loss": 0.049, - "step": 60360 - }, - { - "epoch": 4.483142729838111, - "grad_norm": 2.092189311981201, - "learning_rate": 3.101143620971335e-06, - "loss": 0.057, - "step": 60370 - }, - { - "epoch": 4.483885340858459, - "grad_norm": 0.5786078572273254, - "learning_rate": 3.09668795484925e-06, - "loss": 0.0569, - "step": 60380 - }, - { - "epoch": 4.484627951878806, - "grad_norm": 0.4640141725540161, - "learning_rate": 3.0922322887271647e-06, - "loss": 0.0618, - "step": 60390 - }, - { - "epoch": 4.485370562899154, - "grad_norm": 2.307746171951294, - "learning_rate": 3.0877766226050793e-06, - "loss": 0.0578, - "step": 60400 - }, - { - "epoch": 4.486113173919501, - "grad_norm": 1.5179271697998047, - "learning_rate": 3.0833209564829943e-06, - "loss": 0.0363, - "step": 60410 - }, - { - "epoch": 4.4868557849398485, - "grad_norm": 0.30121228098869324, - "learning_rate": 3.0788652903609093e-06, - "loss": 0.05, - "step": 60420 - }, - { - "epoch": 4.487598395960196, - "grad_norm": 2.5715456008911133, - "learning_rate": 3.074409624238824e-06, - "loss": 0.0622, - "step": 60430 - }, - { - "epoch": 4.488341006980543, - "grad_norm": 2.3509955406188965, - "learning_rate": 3.069953958116739e-06, - "loss": 0.0728, - "step": 60440 - }, - { - "epoch": 4.489083618000891, - "grad_norm": 1.9906114339828491, - "learning_rate": 3.0654982919946534e-06, - "loss": 0.0598, - "step": 60450 - }, - { - "epoch": 4.489826229021238, - "grad_norm": 1.7488093376159668, - "learning_rate": 3.061042625872568e-06, - "loss": 0.0452, - "step": 60460 - }, - { - "epoch": 4.490568840041586, - "grad_norm": 0.5462767481803894, - "learning_rate": 3.056586959750483e-06, - "loss": 0.0505, - "step": 60470 - }, - { - "epoch": 4.491311451061934, - "grad_norm": 0.6967370510101318, - "learning_rate": 3.0521312936283975e-06, - "loss": 0.0968, - "step": 60480 - }, - { - "epoch": 4.492054062082281, - "grad_norm": 0.6573207378387451, - "learning_rate": 3.047675627506312e-06, - "loss": 0.045, - "step": 60490 - }, - { - "epoch": 4.492796673102629, - "grad_norm": 0.9481235146522522, - "learning_rate": 3.043219961384227e-06, - "loss": 0.0377, - "step": 60500 - }, - { - "epoch": 4.493539284122976, - "grad_norm": 0.8041633367538452, - "learning_rate": 3.0387642952621417e-06, - "loss": 0.0376, - "step": 60510 - }, - { - "epoch": 4.494281895143324, - "grad_norm": 0.4451924264431, - "learning_rate": 3.0343086291400562e-06, - "loss": 0.0509, - "step": 60520 - }, - { - "epoch": 4.495024506163672, - "grad_norm": 1.242018222808838, - "learning_rate": 3.0298529630179712e-06, - "loss": 0.0612, - "step": 60530 - }, - { - "epoch": 4.495767117184019, - "grad_norm": 1.5927053689956665, - "learning_rate": 3.025397296895886e-06, - "loss": 0.0466, - "step": 60540 - }, - { - "epoch": 4.496509728204367, - "grad_norm": 0.3729563355445862, - "learning_rate": 3.020941630773801e-06, - "loss": 0.0654, - "step": 60550 - }, - { - "epoch": 4.497252339224714, - "grad_norm": 1.2885338068008423, - "learning_rate": 3.0164859646517158e-06, - "loss": 0.0394, - "step": 60560 - }, - { - "epoch": 4.497994950245062, - "grad_norm": 0.8227145075798035, - "learning_rate": 3.0120302985296304e-06, - "loss": 0.0379, - "step": 60570 - }, - { - "epoch": 4.4987375612654095, - "grad_norm": 1.220255970954895, - "learning_rate": 3.007574632407545e-06, - "loss": 0.0491, - "step": 60580 - }, - { - "epoch": 4.4994801722857565, - "grad_norm": 1.4378726482391357, - "learning_rate": 3.00311896628546e-06, - "loss": 0.0348, - "step": 60590 - }, - { - "epoch": 4.500222783306104, - "grad_norm": 1.8809531927108765, - "learning_rate": 2.9986633001633745e-06, - "loss": 0.0556, - "step": 60600 - }, - { - "epoch": 4.500965394326451, - "grad_norm": 0.39262035489082336, - "learning_rate": 2.994207634041289e-06, - "loss": 0.0387, - "step": 60610 - }, - { - "epoch": 4.501708005346799, - "grad_norm": 1.774888038635254, - "learning_rate": 2.989751967919204e-06, - "loss": 0.0463, - "step": 60620 - }, - { - "epoch": 4.502450616367147, - "grad_norm": 1.312552809715271, - "learning_rate": 2.9852963017971186e-06, - "loss": 0.0963, - "step": 60630 - }, - { - "epoch": 4.503193227387494, - "grad_norm": 1.1189274787902832, - "learning_rate": 2.980840635675033e-06, - "loss": 0.0553, - "step": 60640 - }, - { - "epoch": 4.503935838407842, - "grad_norm": 1.8008004426956177, - "learning_rate": 2.976384969552948e-06, - "loss": 0.0713, - "step": 60650 - }, - { - "epoch": 4.504678449428189, - "grad_norm": 0.9484963417053223, - "learning_rate": 2.9719293034308628e-06, - "loss": 0.0425, - "step": 60660 - }, - { - "epoch": 4.505421060448537, - "grad_norm": 1.8093992471694946, - "learning_rate": 2.9674736373087778e-06, - "loss": 0.0491, - "step": 60670 - }, - { - "epoch": 4.506163671468885, - "grad_norm": 0.9192953705787659, - "learning_rate": 2.9630179711866927e-06, - "loss": 0.0439, - "step": 60680 - }, - { - "epoch": 4.506906282489232, - "grad_norm": 1.6268279552459717, - "learning_rate": 2.9585623050646073e-06, - "loss": 0.0575, - "step": 60690 - }, - { - "epoch": 4.50764889350958, - "grad_norm": 0.32879000902175903, - "learning_rate": 2.9541066389425223e-06, - "loss": 0.0425, - "step": 60700 - }, - { - "epoch": 4.508391504529927, - "grad_norm": 1.2514315843582153, - "learning_rate": 2.949650972820437e-06, - "loss": 0.0541, - "step": 60710 - }, - { - "epoch": 4.509134115550275, - "grad_norm": 1.9278550148010254, - "learning_rate": 2.9451953066983515e-06, - "loss": 0.0818, - "step": 60720 - }, - { - "epoch": 4.5098767265706226, - "grad_norm": 0.9422726035118103, - "learning_rate": 2.9407396405762664e-06, - "loss": 0.0572, - "step": 60730 - }, - { - "epoch": 4.51061933759097, - "grad_norm": 2.648932695388794, - "learning_rate": 2.936283974454181e-06, - "loss": 0.0671, - "step": 60740 - }, - { - "epoch": 4.5113619486113175, - "grad_norm": 2.699385166168213, - "learning_rate": 2.9318283083320956e-06, - "loss": 0.0477, - "step": 60750 - }, - { - "epoch": 4.5121045596316645, - "grad_norm": 0.6302013397216797, - "learning_rate": 2.9273726422100106e-06, - "loss": 0.056, - "step": 60760 - }, - { - "epoch": 4.512847170652012, - "grad_norm": 1.3976058959960938, - "learning_rate": 2.922916976087925e-06, - "loss": 0.0684, - "step": 60770 - }, - { - "epoch": 4.51358978167236, - "grad_norm": 1.1172116994857788, - "learning_rate": 2.9184613099658397e-06, - "loss": 0.0705, - "step": 60780 - }, - { - "epoch": 4.514332392692707, - "grad_norm": 0.6140624284744263, - "learning_rate": 2.9140056438437547e-06, - "loss": 0.0561, - "step": 60790 - }, - { - "epoch": 4.515075003713055, - "grad_norm": 1.3316572904586792, - "learning_rate": 2.9095499777216693e-06, - "loss": 0.0861, - "step": 60800 - }, - { - "epoch": 4.515817614733402, - "grad_norm": 0.5338196754455566, - "learning_rate": 2.9050943115995843e-06, - "loss": 0.0516, - "step": 60810 - }, - { - "epoch": 4.51656022575375, - "grad_norm": 1.8989540338516235, - "learning_rate": 2.9006386454774993e-06, - "loss": 0.0635, - "step": 60820 - }, - { - "epoch": 4.517302836774098, - "grad_norm": 0.7707126140594482, - "learning_rate": 2.896182979355414e-06, - "loss": 0.076, - "step": 60830 - }, - { - "epoch": 4.518045447794445, - "grad_norm": 3.540522575378418, - "learning_rate": 2.8917273132333284e-06, - "loss": 0.0565, - "step": 60840 - }, - { - "epoch": 4.518788058814793, - "grad_norm": 1.879492998123169, - "learning_rate": 2.8872716471112434e-06, - "loss": 0.0722, - "step": 60850 - }, - { - "epoch": 4.519530669835141, - "grad_norm": 2.0404903888702393, - "learning_rate": 2.882815980989158e-06, - "loss": 0.0665, - "step": 60860 - }, - { - "epoch": 4.520273280855488, - "grad_norm": 0.7342921495437622, - "learning_rate": 2.8783603148670726e-06, - "loss": 0.0549, - "step": 60870 - }, - { - "epoch": 4.521015891875836, - "grad_norm": 2.278888463973999, - "learning_rate": 2.8739046487449876e-06, - "loss": 0.0472, - "step": 60880 - }, - { - "epoch": 4.521758502896183, - "grad_norm": 0.994851291179657, - "learning_rate": 2.869448982622902e-06, - "loss": 0.0613, - "step": 60890 - }, - { - "epoch": 4.5225011139165305, - "grad_norm": 1.3378137350082397, - "learning_rate": 2.8649933165008167e-06, - "loss": 0.0629, - "step": 60900 - }, - { - "epoch": 4.523243724936878, - "grad_norm": 1.5899670124053955, - "learning_rate": 2.8605376503787317e-06, - "loss": 0.0526, - "step": 60910 - }, - { - "epoch": 4.523986335957225, - "grad_norm": 0.4787708520889282, - "learning_rate": 2.8560819842566463e-06, - "loss": 0.0414, - "step": 60920 - }, - { - "epoch": 4.524728946977573, - "grad_norm": 1.1397485733032227, - "learning_rate": 2.851626318134561e-06, - "loss": 0.0578, - "step": 60930 - }, - { - "epoch": 4.52547155799792, - "grad_norm": 1.5434584617614746, - "learning_rate": 2.8471706520124762e-06, - "loss": 0.0783, - "step": 60940 - }, - { - "epoch": 4.526214169018268, - "grad_norm": 0.4989255666732788, - "learning_rate": 2.842714985890391e-06, - "loss": 0.0374, - "step": 60950 - }, - { - "epoch": 4.526956780038616, - "grad_norm": 1.2678778171539307, - "learning_rate": 2.8382593197683054e-06, - "loss": 0.0509, - "step": 60960 - }, - { - "epoch": 4.527699391058963, - "grad_norm": 1.7154262065887451, - "learning_rate": 2.8338036536462204e-06, - "loss": 0.0404, - "step": 60970 - }, - { - "epoch": 4.528442002079311, - "grad_norm": 0.7340508103370667, - "learning_rate": 2.829347987524135e-06, - "loss": 0.0711, - "step": 60980 - }, - { - "epoch": 4.529184613099658, - "grad_norm": 1.5638376474380493, - "learning_rate": 2.8248923214020495e-06, - "loss": 0.0575, - "step": 60990 - }, - { - "epoch": 4.529927224120006, - "grad_norm": 1.4016845226287842, - "learning_rate": 2.8204366552799645e-06, - "loss": 0.0277, - "step": 61000 - }, - { - "epoch": 4.530669835140354, - "grad_norm": 0.4884537160396576, - "learning_rate": 2.815980989157879e-06, - "loss": 0.0537, - "step": 61010 - }, - { - "epoch": 4.531412446160701, - "grad_norm": 1.8895937204360962, - "learning_rate": 2.811525323035794e-06, - "loss": 0.08, - "step": 61020 - }, - { - "epoch": 4.532155057181049, - "grad_norm": 0.5907909274101257, - "learning_rate": 2.8070696569137087e-06, - "loss": 0.0555, - "step": 61030 - }, - { - "epoch": 4.532897668201396, - "grad_norm": 3.3188436031341553, - "learning_rate": 2.8026139907916232e-06, - "loss": 0.0834, - "step": 61040 - }, - { - "epoch": 4.533640279221744, - "grad_norm": 1.3350552320480347, - "learning_rate": 2.7981583246695382e-06, - "loss": 0.0544, - "step": 61050 - }, - { - "epoch": 4.5343828902420915, - "grad_norm": 1.2445580959320068, - "learning_rate": 2.793702658547453e-06, - "loss": 0.0694, - "step": 61060 - }, - { - "epoch": 4.5351255012624385, - "grad_norm": 2.0901408195495605, - "learning_rate": 2.7892469924253674e-06, - "loss": 0.0721, - "step": 61070 - }, - { - "epoch": 4.535868112282786, - "grad_norm": 1.3586374521255493, - "learning_rate": 2.7847913263032828e-06, - "loss": 0.0749, - "step": 61080 - }, - { - "epoch": 4.536610723303134, - "grad_norm": 0.6412113904953003, - "learning_rate": 2.7803356601811973e-06, - "loss": 0.0538, - "step": 61090 - }, - { - "epoch": 4.537353334323481, - "grad_norm": 0.7717999815940857, - "learning_rate": 2.775879994059112e-06, - "loss": 0.0578, - "step": 61100 - }, - { - "epoch": 4.538095945343829, - "grad_norm": 0.7013200521469116, - "learning_rate": 2.771424327937027e-06, - "loss": 0.0765, - "step": 61110 - }, - { - "epoch": 4.538838556364176, - "grad_norm": 1.3296678066253662, - "learning_rate": 2.7669686618149415e-06, - "loss": 0.0531, - "step": 61120 - }, - { - "epoch": 4.539581167384524, - "grad_norm": 1.7592175006866455, - "learning_rate": 2.762512995692856e-06, - "loss": 0.051, - "step": 61130 - }, - { - "epoch": 4.540323778404872, - "grad_norm": 1.7775698900222778, - "learning_rate": 2.758057329570771e-06, - "loss": 0.05, - "step": 61140 - }, - { - "epoch": 4.541066389425219, - "grad_norm": 1.649628758430481, - "learning_rate": 2.7536016634486856e-06, - "loss": 0.0464, - "step": 61150 - }, - { - "epoch": 4.541809000445567, - "grad_norm": 1.310793399810791, - "learning_rate": 2.7491459973266e-06, - "loss": 0.0686, - "step": 61160 - }, - { - "epoch": 4.542551611465914, - "grad_norm": 1.5081290006637573, - "learning_rate": 2.744690331204515e-06, - "loss": 0.0675, - "step": 61170 - }, - { - "epoch": 4.543294222486262, - "grad_norm": 1.5002572536468506, - "learning_rate": 2.7402346650824298e-06, - "loss": 0.0595, - "step": 61180 - }, - { - "epoch": 4.54403683350661, - "grad_norm": 1.7009143829345703, - "learning_rate": 2.7357789989603443e-06, - "loss": 0.0824, - "step": 61190 - }, - { - "epoch": 4.544779444526957, - "grad_norm": 2.5938093662261963, - "learning_rate": 2.7313233328382593e-06, - "loss": 0.052, - "step": 61200 - }, - { - "epoch": 4.545522055547305, - "grad_norm": 1.4305498600006104, - "learning_rate": 2.7268676667161743e-06, - "loss": 0.0621, - "step": 61210 - }, - { - "epoch": 4.546264666567652, - "grad_norm": 1.7838150262832642, - "learning_rate": 2.722412000594089e-06, - "loss": 0.0711, - "step": 61220 - }, - { - "epoch": 4.5470072775879995, - "grad_norm": 0.737729549407959, - "learning_rate": 2.717956334472004e-06, - "loss": 0.0374, - "step": 61230 - }, - { - "epoch": 4.547749888608347, - "grad_norm": 1.021944522857666, - "learning_rate": 2.7135006683499184e-06, - "loss": 0.0631, - "step": 61240 - }, - { - "epoch": 4.548492499628694, - "grad_norm": 0.5011008977890015, - "learning_rate": 2.709045002227833e-06, - "loss": 0.0448, - "step": 61250 - }, - { - "epoch": 4.549235110649042, - "grad_norm": 3.05027437210083, - "learning_rate": 2.704589336105748e-06, - "loss": 0.0467, - "step": 61260 - }, - { - "epoch": 4.549977721669389, - "grad_norm": 3.195746660232544, - "learning_rate": 2.7001336699836626e-06, - "loss": 0.0584, - "step": 61270 - }, - { - "epoch": 4.550720332689737, - "grad_norm": 1.1807307004928589, - "learning_rate": 2.695678003861577e-06, - "loss": 0.0474, - "step": 61280 - }, - { - "epoch": 4.551462943710085, - "grad_norm": 2.7736690044403076, - "learning_rate": 2.691222337739492e-06, - "loss": 0.0686, - "step": 61290 - }, - { - "epoch": 4.552205554730432, - "grad_norm": 1.5270766019821167, - "learning_rate": 2.6867666716174067e-06, - "loss": 0.0425, - "step": 61300 - }, - { - "epoch": 4.55294816575078, - "grad_norm": 1.2185275554656982, - "learning_rate": 2.6823110054953213e-06, - "loss": 0.0443, - "step": 61310 - }, - { - "epoch": 4.553690776771127, - "grad_norm": 1.2609001398086548, - "learning_rate": 2.6778553393732363e-06, - "loss": 0.0793, - "step": 61320 - }, - { - "epoch": 4.554433387791475, - "grad_norm": 2.8986945152282715, - "learning_rate": 2.673399673251151e-06, - "loss": 0.0704, - "step": 61330 - }, - { - "epoch": 4.555175998811823, - "grad_norm": 1.5010956525802612, - "learning_rate": 2.668944007129066e-06, - "loss": 0.0629, - "step": 61340 - }, - { - "epoch": 4.55591860983217, - "grad_norm": 0.4704782962799072, - "learning_rate": 2.664488341006981e-06, - "loss": 0.0613, - "step": 61350 - }, - { - "epoch": 4.556661220852518, - "grad_norm": 0.9099952578544617, - "learning_rate": 2.6600326748848954e-06, - "loss": 0.0481, - "step": 61360 - }, - { - "epoch": 4.557403831872865, - "grad_norm": 3.606095790863037, - "learning_rate": 2.6555770087628104e-06, - "loss": 0.0792, - "step": 61370 - }, - { - "epoch": 4.558146442893213, - "grad_norm": 1.9132657051086426, - "learning_rate": 2.651121342640725e-06, - "loss": 0.0379, - "step": 61380 - }, - { - "epoch": 4.5588890539135605, - "grad_norm": 1.5124993324279785, - "learning_rate": 2.6466656765186396e-06, - "loss": 0.0654, - "step": 61390 - }, - { - "epoch": 4.5596316649339075, - "grad_norm": 0.876136839389801, - "learning_rate": 2.6422100103965545e-06, - "loss": 0.0769, - "step": 61400 - }, - { - "epoch": 4.560374275954255, - "grad_norm": 2.7477357387542725, - "learning_rate": 2.637754344274469e-06, - "loss": 0.0912, - "step": 61410 - }, - { - "epoch": 4.561116886974602, - "grad_norm": 0.6993147730827332, - "learning_rate": 2.6332986781523837e-06, - "loss": 0.0432, - "step": 61420 - }, - { - "epoch": 4.56185949799495, - "grad_norm": 2.7787704467773438, - "learning_rate": 2.6288430120302987e-06, - "loss": 0.0649, - "step": 61430 - }, - { - "epoch": 4.562602109015298, - "grad_norm": 1.603108286857605, - "learning_rate": 2.6243873459082133e-06, - "loss": 0.0468, - "step": 61440 - }, - { - "epoch": 4.563344720035645, - "grad_norm": 2.7773287296295166, - "learning_rate": 2.619931679786128e-06, - "loss": 0.0923, - "step": 61450 - }, - { - "epoch": 4.564087331055993, - "grad_norm": 0.5554677844047546, - "learning_rate": 2.615476013664043e-06, - "loss": 0.1035, - "step": 61460 - }, - { - "epoch": 4.56482994207634, - "grad_norm": 0.5234212279319763, - "learning_rate": 2.611020347541958e-06, - "loss": 0.0597, - "step": 61470 - }, - { - "epoch": 4.565572553096688, - "grad_norm": 0.9597153067588806, - "learning_rate": 2.6065646814198724e-06, - "loss": 0.0679, - "step": 61480 - }, - { - "epoch": 4.566315164117036, - "grad_norm": 3.1372969150543213, - "learning_rate": 2.6021090152977874e-06, - "loss": 0.0755, - "step": 61490 - }, - { - "epoch": 4.567057775137383, - "grad_norm": 1.275407314300537, - "learning_rate": 2.597653349175702e-06, - "loss": 0.0494, - "step": 61500 - }, - { - "epoch": 4.567800386157731, - "grad_norm": 1.3001893758773804, - "learning_rate": 2.5931976830536165e-06, - "loss": 0.0581, - "step": 61510 - }, - { - "epoch": 4.568542997178078, - "grad_norm": 1.2099862098693848, - "learning_rate": 2.5887420169315315e-06, - "loss": 0.1197, - "step": 61520 - }, - { - "epoch": 4.569285608198426, - "grad_norm": 1.1752756834030151, - "learning_rate": 2.584286350809446e-06, - "loss": 0.055, - "step": 61530 - }, - { - "epoch": 4.570028219218774, - "grad_norm": 1.800934076309204, - "learning_rate": 2.5798306846873607e-06, - "loss": 0.0495, - "step": 61540 - }, - { - "epoch": 4.570770830239121, - "grad_norm": 1.4622337818145752, - "learning_rate": 2.5753750185652756e-06, - "loss": 0.0713, - "step": 61550 - }, - { - "epoch": 4.5715134412594685, - "grad_norm": 3.0324299335479736, - "learning_rate": 2.5709193524431902e-06, - "loss": 0.0626, - "step": 61560 - }, - { - "epoch": 4.5722560522798155, - "grad_norm": 3.204300880432129, - "learning_rate": 2.5664636863211048e-06, - "loss": 0.05, - "step": 61570 - }, - { - "epoch": 4.572998663300163, - "grad_norm": 0.8802090883255005, - "learning_rate": 2.5620080201990198e-06, - "loss": 0.0416, - "step": 61580 - }, - { - "epoch": 4.573741274320511, - "grad_norm": 1.4025449752807617, - "learning_rate": 2.5575523540769344e-06, - "loss": 0.0891, - "step": 61590 - }, - { - "epoch": 4.574483885340858, - "grad_norm": 2.5383615493774414, - "learning_rate": 2.5530966879548493e-06, - "loss": 0.0421, - "step": 61600 - }, - { - "epoch": 4.575226496361206, - "grad_norm": 0.5844364166259766, - "learning_rate": 2.5486410218327643e-06, - "loss": 0.0615, - "step": 61610 - }, - { - "epoch": 4.575969107381553, - "grad_norm": 1.9807744026184082, - "learning_rate": 2.544185355710679e-06, - "loss": 0.0521, - "step": 61620 - }, - { - "epoch": 4.576711718401901, - "grad_norm": 2.7843782901763916, - "learning_rate": 2.5397296895885935e-06, - "loss": 0.0369, - "step": 61630 - }, - { - "epoch": 4.577454329422249, - "grad_norm": 2.3830394744873047, - "learning_rate": 2.5352740234665085e-06, - "loss": 0.031, - "step": 61640 - }, - { - "epoch": 4.578196940442596, - "grad_norm": 3.3283472061157227, - "learning_rate": 2.530818357344423e-06, - "loss": 0.0679, - "step": 61650 - }, - { - "epoch": 4.578939551462944, - "grad_norm": 0.3360592722892761, - "learning_rate": 2.5263626912223376e-06, - "loss": 0.0799, - "step": 61660 - }, - { - "epoch": 4.579682162483291, - "grad_norm": 1.3740484714508057, - "learning_rate": 2.5219070251002526e-06, - "loss": 0.0593, - "step": 61670 - }, - { - "epoch": 4.580424773503639, - "grad_norm": 1.121627926826477, - "learning_rate": 2.517451358978167e-06, - "loss": 0.0485, - "step": 61680 - }, - { - "epoch": 4.581167384523987, - "grad_norm": 1.13548743724823, - "learning_rate": 2.512995692856082e-06, - "loss": 0.0602, - "step": 61690 - }, - { - "epoch": 4.581909995544334, - "grad_norm": 1.0693154335021973, - "learning_rate": 2.5085400267339967e-06, - "loss": 0.0607, - "step": 61700 - }, - { - "epoch": 4.5826526065646815, - "grad_norm": 0.1895827353000641, - "learning_rate": 2.5040843606119113e-06, - "loss": 0.052, - "step": 61710 - }, - { - "epoch": 4.5833952175850285, - "grad_norm": 1.7912211418151855, - "learning_rate": 2.4996286944898263e-06, - "loss": 0.0385, - "step": 61720 - }, - { - "epoch": 4.584137828605376, - "grad_norm": 1.460799217224121, - "learning_rate": 2.495173028367741e-06, - "loss": 0.0519, - "step": 61730 - }, - { - "epoch": 4.584880439625724, - "grad_norm": 0.9446871280670166, - "learning_rate": 2.490717362245656e-06, - "loss": 0.0334, - "step": 61740 - }, - { - "epoch": 4.585623050646071, - "grad_norm": 2.315859317779541, - "learning_rate": 2.486261696123571e-06, - "loss": 0.0522, - "step": 61750 - }, - { - "epoch": 4.586365661666419, - "grad_norm": 1.3986417055130005, - "learning_rate": 2.4818060300014854e-06, - "loss": 0.0468, - "step": 61760 - }, - { - "epoch": 4.587108272686766, - "grad_norm": 0.41333481669425964, - "learning_rate": 2.4773503638794e-06, - "loss": 0.0342, - "step": 61770 - }, - { - "epoch": 4.587850883707114, - "grad_norm": 1.8333367109298706, - "learning_rate": 2.472894697757315e-06, - "loss": 0.085, - "step": 61780 - }, - { - "epoch": 4.588593494727462, - "grad_norm": 2.2030510902404785, - "learning_rate": 2.4684390316352296e-06, - "loss": 0.0629, - "step": 61790 - }, - { - "epoch": 4.589336105747809, - "grad_norm": 1.6054326295852661, - "learning_rate": 2.463983365513144e-06, - "loss": 0.0628, - "step": 61800 - }, - { - "epoch": 4.590078716768157, - "grad_norm": 0.6966524720191956, - "learning_rate": 2.459527699391059e-06, - "loss": 0.0585, - "step": 61810 - }, - { - "epoch": 4.590821327788504, - "grad_norm": 0.6496719717979431, - "learning_rate": 2.4550720332689737e-06, - "loss": 0.0488, - "step": 61820 - }, - { - "epoch": 4.591563938808852, - "grad_norm": 1.0793461799621582, - "learning_rate": 2.4506163671468883e-06, - "loss": 0.0609, - "step": 61830 - }, - { - "epoch": 4.5923065498292, - "grad_norm": 2.4242899417877197, - "learning_rate": 2.4461607010248033e-06, - "loss": 0.0906, - "step": 61840 - }, - { - "epoch": 4.593049160849547, - "grad_norm": 1.3996238708496094, - "learning_rate": 2.441705034902718e-06, - "loss": 0.07, - "step": 61850 - }, - { - "epoch": 4.593791771869895, - "grad_norm": 1.6079001426696777, - "learning_rate": 2.4372493687806324e-06, - "loss": 0.0418, - "step": 61860 - }, - { - "epoch": 4.594534382890242, - "grad_norm": 0.46042919158935547, - "learning_rate": 2.432793702658548e-06, - "loss": 0.0495, - "step": 61870 - }, - { - "epoch": 4.5952769939105895, - "grad_norm": 0.7299936413764954, - "learning_rate": 2.4283380365364624e-06, - "loss": 0.0731, - "step": 61880 - }, - { - "epoch": 4.596019604930937, - "grad_norm": 0.81382817029953, - "learning_rate": 2.423882370414377e-06, - "loss": 0.0321, - "step": 61890 - }, - { - "epoch": 4.596762215951284, - "grad_norm": 1.3235479593276978, - "learning_rate": 2.419426704292292e-06, - "loss": 0.0521, - "step": 61900 - }, - { - "epoch": 4.597504826971632, - "grad_norm": 2.247610330581665, - "learning_rate": 2.4149710381702065e-06, - "loss": 0.0735, - "step": 61910 - }, - { - "epoch": 4.598247437991979, - "grad_norm": 0.9639174938201904, - "learning_rate": 2.410515372048121e-06, - "loss": 0.1068, - "step": 61920 - }, - { - "epoch": 4.598990049012327, - "grad_norm": 2.1627914905548096, - "learning_rate": 2.406059705926036e-06, - "loss": 0.0592, - "step": 61930 - }, - { - "epoch": 4.599732660032675, - "grad_norm": 0.5631018877029419, - "learning_rate": 2.4016040398039507e-06, - "loss": 0.0608, - "step": 61940 - }, - { - "epoch": 4.600475271053022, - "grad_norm": 1.5326188802719116, - "learning_rate": 2.3971483736818653e-06, - "loss": 0.0571, - "step": 61950 - }, - { - "epoch": 4.60121788207337, - "grad_norm": 1.1006548404693604, - "learning_rate": 2.3926927075597802e-06, - "loss": 0.0602, - "step": 61960 - }, - { - "epoch": 4.601960493093717, - "grad_norm": 2.1133296489715576, - "learning_rate": 2.388237041437695e-06, - "loss": 0.0895, - "step": 61970 - }, - { - "epoch": 4.602703104114065, - "grad_norm": 0.42553678154945374, - "learning_rate": 2.3837813753156094e-06, - "loss": 0.049, - "step": 61980 - }, - { - "epoch": 4.603445715134413, - "grad_norm": 1.3893156051635742, - "learning_rate": 2.3793257091935244e-06, - "loss": 0.0459, - "step": 61990 - }, - { - "epoch": 4.60418832615476, - "grad_norm": 0.9664128422737122, - "learning_rate": 2.3748700430714394e-06, - "loss": 0.0587, - "step": 62000 - }, - { - "epoch": 4.604930937175108, - "grad_norm": 0.9554176330566406, - "learning_rate": 2.370414376949354e-06, - "loss": 0.0503, - "step": 62010 - }, - { - "epoch": 4.605673548195456, - "grad_norm": 1.3135801553726196, - "learning_rate": 2.365958710827269e-06, - "loss": 0.07, - "step": 62020 - }, - { - "epoch": 4.606416159215803, - "grad_norm": 4.117845058441162, - "learning_rate": 2.3615030447051835e-06, - "loss": 0.0766, - "step": 62030 - }, - { - "epoch": 4.6071587702361505, - "grad_norm": 1.4412838220596313, - "learning_rate": 2.3570473785830985e-06, - "loss": 0.0376, - "step": 62040 - }, - { - "epoch": 4.6079013812564975, - "grad_norm": 2.425473690032959, - "learning_rate": 2.352591712461013e-06, - "loss": 0.0399, - "step": 62050 - }, - { - "epoch": 4.608643992276845, - "grad_norm": 0.8025254011154175, - "learning_rate": 2.3481360463389276e-06, - "loss": 0.0716, - "step": 62060 - }, - { - "epoch": 4.609386603297193, - "grad_norm": 1.2991340160369873, - "learning_rate": 2.3436803802168426e-06, - "loss": 0.0495, - "step": 62070 - }, - { - "epoch": 4.61012921431754, - "grad_norm": 0.7377578020095825, - "learning_rate": 2.339224714094757e-06, - "loss": 0.0436, - "step": 62080 - }, - { - "epoch": 4.610871825337888, - "grad_norm": 0.37221047282218933, - "learning_rate": 2.3347690479726718e-06, - "loss": 0.0418, - "step": 62090 - }, - { - "epoch": 4.611614436358235, - "grad_norm": 1.457234501838684, - "learning_rate": 2.3303133818505868e-06, - "loss": 0.0741, - "step": 62100 - }, - { - "epoch": 4.612357047378583, - "grad_norm": 1.0528523921966553, - "learning_rate": 2.3258577157285013e-06, - "loss": 0.0518, - "step": 62110 - }, - { - "epoch": 4.613099658398931, - "grad_norm": 1.0995347499847412, - "learning_rate": 2.321402049606416e-06, - "loss": 0.0827, - "step": 62120 - }, - { - "epoch": 4.613842269419278, - "grad_norm": 2.883272409439087, - "learning_rate": 2.3169463834843313e-06, - "loss": 0.0476, - "step": 62130 - }, - { - "epoch": 4.614584880439626, - "grad_norm": 1.5344513654708862, - "learning_rate": 2.312490717362246e-06, - "loss": 0.0426, - "step": 62140 - }, - { - "epoch": 4.615327491459973, - "grad_norm": 2.136598587036133, - "learning_rate": 2.3080350512401605e-06, - "loss": 0.0948, - "step": 62150 - }, - { - "epoch": 4.616070102480321, - "grad_norm": 1.1575771570205688, - "learning_rate": 2.3035793851180755e-06, - "loss": 0.0708, - "step": 62160 - }, - { - "epoch": 4.616812713500669, - "grad_norm": 0.935723066329956, - "learning_rate": 2.29912371899599e-06, - "loss": 0.0624, - "step": 62170 - }, - { - "epoch": 4.617555324521016, - "grad_norm": 2.6540639400482178, - "learning_rate": 2.2946680528739046e-06, - "loss": 0.0716, - "step": 62180 - }, - { - "epoch": 4.618297935541364, - "grad_norm": 0.7408058047294617, - "learning_rate": 2.2902123867518196e-06, - "loss": 0.0612, - "step": 62190 - }, - { - "epoch": 4.619040546561711, - "grad_norm": 1.5161961317062378, - "learning_rate": 2.285756720629734e-06, - "loss": 0.0565, - "step": 62200 - }, - { - "epoch": 4.6197831575820585, - "grad_norm": 1.0326826572418213, - "learning_rate": 2.2813010545076487e-06, - "loss": 0.0633, - "step": 62210 - }, - { - "epoch": 4.620525768602406, - "grad_norm": 1.720488429069519, - "learning_rate": 2.2768453883855637e-06, - "loss": 0.0651, - "step": 62220 - }, - { - "epoch": 4.621268379622753, - "grad_norm": 0.7917996048927307, - "learning_rate": 2.2723897222634783e-06, - "loss": 0.0629, - "step": 62230 - }, - { - "epoch": 4.622010990643101, - "grad_norm": 1.6765140295028687, - "learning_rate": 2.267934056141393e-06, - "loss": 0.0662, - "step": 62240 - }, - { - "epoch": 4.622753601663449, - "grad_norm": 0.8106739521026611, - "learning_rate": 2.263478390019308e-06, - "loss": 0.0489, - "step": 62250 - }, - { - "epoch": 4.623496212683796, - "grad_norm": 0.6915829181671143, - "learning_rate": 2.259022723897223e-06, - "loss": 0.0594, - "step": 62260 - }, - { - "epoch": 4.624238823704144, - "grad_norm": 0.5459581613540649, - "learning_rate": 2.2545670577751374e-06, - "loss": 0.0536, - "step": 62270 - }, - { - "epoch": 4.624981434724491, - "grad_norm": 1.8974658250808716, - "learning_rate": 2.2501113916530524e-06, - "loss": 0.0486, - "step": 62280 - }, - { - "epoch": 4.625724045744839, - "grad_norm": 2.0770936012268066, - "learning_rate": 2.245655725530967e-06, - "loss": 0.072, - "step": 62290 - }, - { - "epoch": 4.626466656765187, - "grad_norm": 1.3229902982711792, - "learning_rate": 2.2412000594088816e-06, - "loss": 0.0599, - "step": 62300 - }, - { - "epoch": 4.627209267785534, - "grad_norm": 0.6747040748596191, - "learning_rate": 2.2367443932867966e-06, - "loss": 0.0734, - "step": 62310 - }, - { - "epoch": 4.627951878805882, - "grad_norm": 2.171114921569824, - "learning_rate": 2.232288727164711e-06, - "loss": 0.0809, - "step": 62320 - }, - { - "epoch": 4.628694489826229, - "grad_norm": 1.1108207702636719, - "learning_rate": 2.2278330610426257e-06, - "loss": 0.0447, - "step": 62330 - }, - { - "epoch": 4.629437100846577, - "grad_norm": 1.5883184671401978, - "learning_rate": 2.2233773949205407e-06, - "loss": 0.0599, - "step": 62340 - }, - { - "epoch": 4.630179711866925, - "grad_norm": 1.2950087785720825, - "learning_rate": 2.2189217287984553e-06, - "loss": 0.0346, - "step": 62350 - }, - { - "epoch": 4.630922322887272, - "grad_norm": 2.8710503578186035, - "learning_rate": 2.21446606267637e-06, - "loss": 0.0433, - "step": 62360 - }, - { - "epoch": 4.6316649339076195, - "grad_norm": 0.5784642696380615, - "learning_rate": 2.210010396554285e-06, - "loss": 0.0717, - "step": 62370 - }, - { - "epoch": 4.6324075449279665, - "grad_norm": 1.163934350013733, - "learning_rate": 2.2055547304321994e-06, - "loss": 0.0449, - "step": 62380 - }, - { - "epoch": 4.633150155948314, - "grad_norm": 2.270219564437866, - "learning_rate": 2.201099064310115e-06, - "loss": 0.0608, - "step": 62390 - }, - { - "epoch": 4.633892766968662, - "grad_norm": 2.067028045654297, - "learning_rate": 2.1966433981880294e-06, - "loss": 0.0698, - "step": 62400 - }, - { - "epoch": 4.634635377989009, - "grad_norm": 1.985038161277771, - "learning_rate": 2.192187732065944e-06, - "loss": 0.0819, - "step": 62410 - }, - { - "epoch": 4.635377989009357, - "grad_norm": 3.1525087356567383, - "learning_rate": 2.187732065943859e-06, - "loss": 0.0757, - "step": 62420 - }, - { - "epoch": 4.636120600029704, - "grad_norm": 2.7132487297058105, - "learning_rate": 2.1832763998217735e-06, - "loss": 0.046, - "step": 62430 - }, - { - "epoch": 4.636863211050052, - "grad_norm": 1.3874857425689697, - "learning_rate": 2.178820733699688e-06, - "loss": 0.0706, - "step": 62440 - }, - { - "epoch": 4.6376058220704, - "grad_norm": 1.2080367803573608, - "learning_rate": 2.174365067577603e-06, - "loss": 0.0525, - "step": 62450 - }, - { - "epoch": 4.638348433090747, - "grad_norm": 1.1241955757141113, - "learning_rate": 2.1699094014555177e-06, - "loss": 0.0514, - "step": 62460 - }, - { - "epoch": 4.639091044111095, - "grad_norm": 1.7691078186035156, - "learning_rate": 2.1654537353334322e-06, - "loss": 0.0997, - "step": 62470 - }, - { - "epoch": 4.639833655131442, - "grad_norm": 1.5121815204620361, - "learning_rate": 2.1609980692113472e-06, - "loss": 0.0648, - "step": 62480 - }, - { - "epoch": 4.64057626615179, - "grad_norm": 0.8992467522621155, - "learning_rate": 2.156542403089262e-06, - "loss": 0.0425, - "step": 62490 - }, - { - "epoch": 4.641318877172138, - "grad_norm": 0.8572467565536499, - "learning_rate": 2.1520867369671764e-06, - "loss": 0.064, - "step": 62500 - }, - { - "epoch": 4.642061488192485, - "grad_norm": 0.3243890106678009, - "learning_rate": 2.1476310708450914e-06, - "loss": 0.0469, - "step": 62510 - }, - { - "epoch": 4.6428040992128325, - "grad_norm": 0.47468477487564087, - "learning_rate": 2.143175404723006e-06, - "loss": 0.0805, - "step": 62520 - }, - { - "epoch": 4.6435467102331796, - "grad_norm": 0.49177148938179016, - "learning_rate": 2.138719738600921e-06, - "loss": 0.0608, - "step": 62530 - }, - { - "epoch": 4.6442893212535274, - "grad_norm": 0.7259443402290344, - "learning_rate": 2.134264072478836e-06, - "loss": 0.0658, - "step": 62540 - }, - { - "epoch": 4.645031932273875, - "grad_norm": 1.287712812423706, - "learning_rate": 2.1298084063567505e-06, - "loss": 0.0708, - "step": 62550 - }, - { - "epoch": 4.645774543294222, - "grad_norm": 0.7995294332504272, - "learning_rate": 2.125352740234665e-06, - "loss": 0.0502, - "step": 62560 - }, - { - "epoch": 4.64651715431457, - "grad_norm": 0.879058837890625, - "learning_rate": 2.12089707411258e-06, - "loss": 0.0922, - "step": 62570 - }, - { - "epoch": 4.647259765334917, - "grad_norm": 1.0613620281219482, - "learning_rate": 2.1164414079904946e-06, - "loss": 0.0492, - "step": 62580 - }, - { - "epoch": 4.648002376355265, - "grad_norm": 0.7447329759597778, - "learning_rate": 2.111985741868409e-06, - "loss": 0.0548, - "step": 62590 - }, - { - "epoch": 4.648744987375613, - "grad_norm": 1.1669635772705078, - "learning_rate": 2.107530075746324e-06, - "loss": 0.062, - "step": 62600 - }, - { - "epoch": 4.64948759839596, - "grad_norm": 0.3157268464565277, - "learning_rate": 2.1030744096242388e-06, - "loss": 0.0661, - "step": 62610 - }, - { - "epoch": 4.650230209416308, - "grad_norm": 0.468450665473938, - "learning_rate": 2.0986187435021533e-06, - "loss": 0.0502, - "step": 62620 - }, - { - "epoch": 4.650972820436655, - "grad_norm": 0.6913983821868896, - "learning_rate": 2.0941630773800683e-06, - "loss": 0.0732, - "step": 62630 - }, - { - "epoch": 4.651715431457003, - "grad_norm": 0.5963155031204224, - "learning_rate": 2.089707411257983e-06, - "loss": 0.0588, - "step": 62640 - }, - { - "epoch": 4.652458042477351, - "grad_norm": 0.6475550532341003, - "learning_rate": 2.0852517451358975e-06, - "loss": 0.0436, - "step": 62650 - }, - { - "epoch": 4.653200653497698, - "grad_norm": 0.8035712242126465, - "learning_rate": 2.080796079013813e-06, - "loss": 0.0555, - "step": 62660 - }, - { - "epoch": 4.653943264518046, - "grad_norm": 0.43951982259750366, - "learning_rate": 2.0763404128917275e-06, - "loss": 0.0567, - "step": 62670 - }, - { - "epoch": 4.654685875538393, - "grad_norm": 2.8251731395721436, - "learning_rate": 2.071884746769642e-06, - "loss": 0.0697, - "step": 62680 - }, - { - "epoch": 4.6554284865587405, - "grad_norm": 2.3607585430145264, - "learning_rate": 2.067429080647557e-06, - "loss": 0.0586, - "step": 62690 - }, - { - "epoch": 4.656171097579088, - "grad_norm": 1.1953966617584229, - "learning_rate": 2.0629734145254716e-06, - "loss": 0.0645, - "step": 62700 - }, - { - "epoch": 4.656913708599435, - "grad_norm": 2.366037130355835, - "learning_rate": 2.058517748403386e-06, - "loss": 0.0635, - "step": 62710 - }, - { - "epoch": 4.657656319619783, - "grad_norm": 0.9777756333351135, - "learning_rate": 2.054062082281301e-06, - "loss": 0.0642, - "step": 62720 - }, - { - "epoch": 4.65839893064013, - "grad_norm": 2.669673442840576, - "learning_rate": 2.0496064161592157e-06, - "loss": 0.0608, - "step": 62730 - }, - { - "epoch": 4.659141541660478, - "grad_norm": 2.5014798641204834, - "learning_rate": 2.0451507500371307e-06, - "loss": 0.0718, - "step": 62740 - }, - { - "epoch": 4.659884152680826, - "grad_norm": 2.0952281951904297, - "learning_rate": 2.0406950839150453e-06, - "loss": 0.0731, - "step": 62750 - }, - { - "epoch": 4.660626763701173, - "grad_norm": 1.361879587173462, - "learning_rate": 2.03623941779296e-06, - "loss": 0.0482, - "step": 62760 - }, - { - "epoch": 4.661369374721521, - "grad_norm": 0.5971656441688538, - "learning_rate": 2.031783751670875e-06, - "loss": 0.0543, - "step": 62770 - }, - { - "epoch": 4.662111985741868, - "grad_norm": 2.2407009601593018, - "learning_rate": 2.0273280855487894e-06, - "loss": 0.0915, - "step": 62780 - }, - { - "epoch": 4.662854596762216, - "grad_norm": 0.24493630230426788, - "learning_rate": 2.0228724194267044e-06, - "loss": 0.046, - "step": 62790 - }, - { - "epoch": 4.663597207782564, - "grad_norm": 1.5833337306976318, - "learning_rate": 2.0184167533046194e-06, - "loss": 0.0539, - "step": 62800 - }, - { - "epoch": 4.664339818802911, - "grad_norm": 0.7038244605064392, - "learning_rate": 2.013961087182534e-06, - "loss": 0.0618, - "step": 62810 - }, - { - "epoch": 4.665082429823259, - "grad_norm": 0.6795600652694702, - "learning_rate": 2.0095054210604486e-06, - "loss": 0.0821, - "step": 62820 - }, - { - "epoch": 4.665825040843606, - "grad_norm": 1.3816779851913452, - "learning_rate": 2.0050497549383636e-06, - "loss": 0.047, - "step": 62830 - }, - { - "epoch": 4.666567651863954, - "grad_norm": 3.097158193588257, - "learning_rate": 2.000594088816278e-06, - "loss": 0.0707, - "step": 62840 - }, - { - "epoch": 4.6673102628843015, - "grad_norm": 3.206883192062378, - "learning_rate": 1.9961384226941927e-06, - "loss": 0.0426, - "step": 62850 - }, - { - "epoch": 4.6680528739046485, - "grad_norm": 3.3937978744506836, - "learning_rate": 1.9916827565721077e-06, - "loss": 0.0737, - "step": 62860 - }, - { - "epoch": 4.668795484924996, - "grad_norm": 0.8779681921005249, - "learning_rate": 1.9872270904500223e-06, - "loss": 0.039, - "step": 62870 - }, - { - "epoch": 4.669538095945343, - "grad_norm": 2.1197397708892822, - "learning_rate": 1.982771424327937e-06, - "loss": 0.0465, - "step": 62880 - }, - { - "epoch": 4.670280706965691, - "grad_norm": 3.1833298206329346, - "learning_rate": 1.978315758205852e-06, - "loss": 0.0657, - "step": 62890 - }, - { - "epoch": 4.671023317986039, - "grad_norm": 0.6295514702796936, - "learning_rate": 1.9738600920837664e-06, - "loss": 0.0515, - "step": 62900 - }, - { - "epoch": 4.671765929006386, - "grad_norm": 0.9463853240013123, - "learning_rate": 1.969404425961681e-06, - "loss": 0.0732, - "step": 62910 - }, - { - "epoch": 4.672508540026734, - "grad_norm": 2.7689971923828125, - "learning_rate": 1.9649487598395964e-06, - "loss": 0.0551, - "step": 62920 - }, - { - "epoch": 4.673251151047081, - "grad_norm": 1.0542500019073486, - "learning_rate": 1.960493093717511e-06, - "loss": 0.078, - "step": 62930 - }, - { - "epoch": 4.673993762067429, - "grad_norm": 6.436036586761475, - "learning_rate": 1.9560374275954255e-06, - "loss": 0.0641, - "step": 62940 - }, - { - "epoch": 4.674736373087777, - "grad_norm": 0.5600406527519226, - "learning_rate": 1.9515817614733405e-06, - "loss": 0.0637, - "step": 62950 - }, - { - "epoch": 4.675478984108124, - "grad_norm": 1.567610263824463, - "learning_rate": 1.947126095351255e-06, - "loss": 0.0449, - "step": 62960 - }, - { - "epoch": 4.676221595128472, - "grad_norm": 2.6825263500213623, - "learning_rate": 1.9426704292291697e-06, - "loss": 0.0558, - "step": 62970 - }, - { - "epoch": 4.676964206148819, - "grad_norm": 0.2970806658267975, - "learning_rate": 1.9382147631070847e-06, - "loss": 0.0522, - "step": 62980 - }, - { - "epoch": 4.677706817169167, - "grad_norm": 1.4133620262145996, - "learning_rate": 1.9337590969849992e-06, - "loss": 0.0462, - "step": 62990 - }, - { - "epoch": 4.678449428189515, - "grad_norm": 1.4297685623168945, - "learning_rate": 1.929303430862914e-06, - "loss": 0.0552, - "step": 63000 - }, - { - "epoch": 4.679192039209862, - "grad_norm": 2.5894458293914795, - "learning_rate": 1.924847764740829e-06, - "loss": 0.0763, - "step": 63010 - }, - { - "epoch": 4.6799346502302095, - "grad_norm": 2.202799081802368, - "learning_rate": 1.9203920986187434e-06, - "loss": 0.0616, - "step": 63020 - }, - { - "epoch": 4.6806772612505565, - "grad_norm": 1.195757269859314, - "learning_rate": 1.915936432496658e-06, - "loss": 0.0802, - "step": 63030 - }, - { - "epoch": 4.681419872270904, - "grad_norm": 0.8509438633918762, - "learning_rate": 1.911480766374573e-06, - "loss": 0.0413, - "step": 63040 - }, - { - "epoch": 4.682162483291252, - "grad_norm": 2.7623627185821533, - "learning_rate": 1.907025100252488e-06, - "loss": 0.0487, - "step": 63050 - }, - { - "epoch": 4.682905094311599, - "grad_norm": 1.9826782941818237, - "learning_rate": 1.9025694341304027e-06, - "loss": 0.0715, - "step": 63060 - }, - { - "epoch": 4.683647705331947, - "grad_norm": 0.5094510316848755, - "learning_rate": 1.8981137680083175e-06, - "loss": 0.0596, - "step": 63070 - }, - { - "epoch": 4.684390316352294, - "grad_norm": 1.9838165044784546, - "learning_rate": 1.893658101886232e-06, - "loss": 0.0584, - "step": 63080 - }, - { - "epoch": 4.685132927372642, - "grad_norm": 0.8991755247116089, - "learning_rate": 1.8892024357641469e-06, - "loss": 0.0556, - "step": 63090 - }, - { - "epoch": 4.68587553839299, - "grad_norm": 1.3264687061309814, - "learning_rate": 1.8847467696420616e-06, - "loss": 0.0679, - "step": 63100 - }, - { - "epoch": 4.686618149413337, - "grad_norm": 1.1737868785858154, - "learning_rate": 1.8802911035199762e-06, - "loss": 0.079, - "step": 63110 - }, - { - "epoch": 4.687360760433685, - "grad_norm": 1.7225794792175293, - "learning_rate": 1.875835437397891e-06, - "loss": 0.0473, - "step": 63120 - }, - { - "epoch": 4.688103371454032, - "grad_norm": 0.6222664713859558, - "learning_rate": 1.8713797712758058e-06, - "loss": 0.0525, - "step": 63130 - }, - { - "epoch": 4.68884598247438, - "grad_norm": 2.4344029426574707, - "learning_rate": 1.8669241051537206e-06, - "loss": 0.0497, - "step": 63140 - }, - { - "epoch": 4.689588593494728, - "grad_norm": 0.5827934741973877, - "learning_rate": 1.8624684390316353e-06, - "loss": 0.0458, - "step": 63150 - }, - { - "epoch": 4.690331204515075, - "grad_norm": 4.944514274597168, - "learning_rate": 1.8580127729095501e-06, - "loss": 0.0326, - "step": 63160 - }, - { - "epoch": 4.691073815535423, - "grad_norm": 1.2443019151687622, - "learning_rate": 1.853557106787465e-06, - "loss": 0.0435, - "step": 63170 - }, - { - "epoch": 4.6918164265557705, - "grad_norm": 0.31377652287483215, - "learning_rate": 1.8491014406653795e-06, - "loss": 0.0562, - "step": 63180 - }, - { - "epoch": 4.6925590375761175, - "grad_norm": 1.3072824478149414, - "learning_rate": 1.8446457745432943e-06, - "loss": 0.0721, - "step": 63190 - }, - { - "epoch": 4.693301648596465, - "grad_norm": 1.9341398477554321, - "learning_rate": 1.840190108421209e-06, - "loss": 0.0605, - "step": 63200 - }, - { - "epoch": 4.694044259616812, - "grad_norm": 0.47264960408210754, - "learning_rate": 1.8357344422991236e-06, - "loss": 0.0632, - "step": 63210 - }, - { - "epoch": 4.69478687063716, - "grad_norm": 2.389784574508667, - "learning_rate": 1.8312787761770386e-06, - "loss": 0.0605, - "step": 63220 - }, - { - "epoch": 4.695529481657508, - "grad_norm": 2.27815842628479, - "learning_rate": 1.8268231100549534e-06, - "loss": 0.0653, - "step": 63230 - }, - { - "epoch": 4.696272092677855, - "grad_norm": 1.2528233528137207, - "learning_rate": 1.822367443932868e-06, - "loss": 0.0568, - "step": 63240 - }, - { - "epoch": 4.697014703698203, - "grad_norm": 2.5911924839019775, - "learning_rate": 1.8179117778107827e-06, - "loss": 0.0576, - "step": 63250 - }, - { - "epoch": 4.69775731471855, - "grad_norm": 1.9090697765350342, - "learning_rate": 1.8134561116886975e-06, - "loss": 0.0368, - "step": 63260 - }, - { - "epoch": 4.698499925738898, - "grad_norm": 0.29426848888397217, - "learning_rate": 1.809000445566612e-06, - "loss": 0.0387, - "step": 63270 - }, - { - "epoch": 4.699242536759246, - "grad_norm": 0.4976974129676819, - "learning_rate": 1.804544779444527e-06, - "loss": 0.0605, - "step": 63280 - }, - { - "epoch": 4.699985147779593, - "grad_norm": 2.4460465908050537, - "learning_rate": 1.8000891133224419e-06, - "loss": 0.0946, - "step": 63290 - }, - { - "epoch": 4.700727758799941, - "grad_norm": 1.7145735025405884, - "learning_rate": 1.7956334472003564e-06, - "loss": 0.1029, - "step": 63300 - }, - { - "epoch": 4.701470369820288, - "grad_norm": 3.410822868347168, - "learning_rate": 1.7911777810782712e-06, - "loss": 0.0448, - "step": 63310 - }, - { - "epoch": 4.702212980840636, - "grad_norm": 2.253828525543213, - "learning_rate": 1.786722114956186e-06, - "loss": 0.0709, - "step": 63320 - }, - { - "epoch": 4.7029555918609836, - "grad_norm": 0.8834261894226074, - "learning_rate": 1.7822664488341008e-06, - "loss": 0.0709, - "step": 63330 - }, - { - "epoch": 4.703698202881331, - "grad_norm": 0.5737817883491516, - "learning_rate": 1.7778107827120154e-06, - "loss": 0.0317, - "step": 63340 - }, - { - "epoch": 4.7044408139016785, - "grad_norm": 2.2672598361968994, - "learning_rate": 1.7733551165899303e-06, - "loss": 0.0572, - "step": 63350 - }, - { - "epoch": 4.7051834249220255, - "grad_norm": 0.7184361219406128, - "learning_rate": 1.7688994504678451e-06, - "loss": 0.0242, - "step": 63360 - }, - { - "epoch": 4.705926035942373, - "grad_norm": 2.6737210750579834, - "learning_rate": 1.7644437843457597e-06, - "loss": 0.063, - "step": 63370 - }, - { - "epoch": 4.706668646962721, - "grad_norm": 1.496285080909729, - "learning_rate": 1.7599881182236745e-06, - "loss": 0.0321, - "step": 63380 - }, - { - "epoch": 4.707411257983068, - "grad_norm": 1.187997579574585, - "learning_rate": 1.7555324521015893e-06, - "loss": 0.0588, - "step": 63390 - }, - { - "epoch": 4.708153869003416, - "grad_norm": 0.6710416078567505, - "learning_rate": 1.7510767859795038e-06, - "loss": 0.04, - "step": 63400 - }, - { - "epoch": 4.708896480023764, - "grad_norm": 1.344570279121399, - "learning_rate": 1.7466211198574186e-06, - "loss": 0.0616, - "step": 63410 - }, - { - "epoch": 4.709639091044111, - "grad_norm": 1.7704460620880127, - "learning_rate": 1.7421654537353336e-06, - "loss": 0.0577, - "step": 63420 - }, - { - "epoch": 4.710381702064459, - "grad_norm": 1.4288161993026733, - "learning_rate": 1.7377097876132482e-06, - "loss": 0.0438, - "step": 63430 - }, - { - "epoch": 4.711124313084806, - "grad_norm": 0.8680292367935181, - "learning_rate": 1.733254121491163e-06, - "loss": 0.0569, - "step": 63440 - }, - { - "epoch": 4.711866924105154, - "grad_norm": 0.42400112748146057, - "learning_rate": 1.7287984553690777e-06, - "loss": 0.0361, - "step": 63450 - }, - { - "epoch": 4.712609535125502, - "grad_norm": 0.3633495271205902, - "learning_rate": 1.7243427892469923e-06, - "loss": 0.0364, - "step": 63460 - }, - { - "epoch": 4.713352146145849, - "grad_norm": 0.9491689205169678, - "learning_rate": 1.719887123124907e-06, - "loss": 0.0824, - "step": 63470 - }, - { - "epoch": 4.714094757166197, - "grad_norm": 2.717560052871704, - "learning_rate": 1.715431457002822e-06, - "loss": 0.0632, - "step": 63480 - }, - { - "epoch": 4.714837368186544, - "grad_norm": 2.0803704261779785, - "learning_rate": 1.7109757908807369e-06, - "loss": 0.05, - "step": 63490 - }, - { - "epoch": 4.7155799792068915, - "grad_norm": 1.6950163841247559, - "learning_rate": 1.7065201247586514e-06, - "loss": 0.0636, - "step": 63500 - }, - { - "epoch": 4.716322590227239, - "grad_norm": 0.8007744550704956, - "learning_rate": 1.7020644586365662e-06, - "loss": 0.0632, - "step": 63510 - }, - { - "epoch": 4.717065201247586, - "grad_norm": 0.830875039100647, - "learning_rate": 1.697608792514481e-06, - "loss": 0.0272, - "step": 63520 - }, - { - "epoch": 4.717807812267934, - "grad_norm": 0.48793575167655945, - "learning_rate": 1.6931531263923956e-06, - "loss": 0.043, - "step": 63530 - }, - { - "epoch": 4.718550423288281, - "grad_norm": 1.9636634588241577, - "learning_rate": 1.6886974602703104e-06, - "loss": 0.0448, - "step": 63540 - }, - { - "epoch": 4.719293034308629, - "grad_norm": 1.2597278356552124, - "learning_rate": 1.6842417941482254e-06, - "loss": 0.0393, - "step": 63550 - }, - { - "epoch": 4.720035645328977, - "grad_norm": 1.140265941619873, - "learning_rate": 1.67978612802614e-06, - "loss": 0.0668, - "step": 63560 - }, - { - "epoch": 4.720778256349324, - "grad_norm": 1.5204119682312012, - "learning_rate": 1.6753304619040547e-06, - "loss": 0.0372, - "step": 63570 - }, - { - "epoch": 4.721520867369672, - "grad_norm": 1.4788594245910645, - "learning_rate": 1.6708747957819695e-06, - "loss": 0.0902, - "step": 63580 - }, - { - "epoch": 4.722263478390019, - "grad_norm": 1.6881049871444702, - "learning_rate": 1.666419129659884e-06, - "loss": 0.0413, - "step": 63590 - }, - { - "epoch": 4.723006089410367, - "grad_norm": 2.068535327911377, - "learning_rate": 1.6619634635377988e-06, - "loss": 0.0732, - "step": 63600 - }, - { - "epoch": 4.723748700430715, - "grad_norm": 2.630728006362915, - "learning_rate": 1.6575077974157138e-06, - "loss": 0.0431, - "step": 63610 - }, - { - "epoch": 4.724491311451062, - "grad_norm": 0.42184069752693176, - "learning_rate": 1.6530521312936284e-06, - "loss": 0.0364, - "step": 63620 - }, - { - "epoch": 4.72523392247141, - "grad_norm": 2.413302421569824, - "learning_rate": 1.6485964651715432e-06, - "loss": 0.0655, - "step": 63630 - }, - { - "epoch": 4.725976533491757, - "grad_norm": 0.9263471364974976, - "learning_rate": 1.644140799049458e-06, - "loss": 0.0456, - "step": 63640 - }, - { - "epoch": 4.726719144512105, - "grad_norm": 1.1987792253494263, - "learning_rate": 1.6396851329273726e-06, - "loss": 0.0405, - "step": 63650 - }, - { - "epoch": 4.7274617555324525, - "grad_norm": 3.2439706325531006, - "learning_rate": 1.6352294668052873e-06, - "loss": 0.0617, - "step": 63660 - }, - { - "epoch": 4.7282043665527995, - "grad_norm": 0.45291805267333984, - "learning_rate": 1.6307738006832021e-06, - "loss": 0.0573, - "step": 63670 - }, - { - "epoch": 4.728946977573147, - "grad_norm": 0.45011815428733826, - "learning_rate": 1.6263181345611171e-06, - "loss": 0.0522, - "step": 63680 - }, - { - "epoch": 4.729689588593494, - "grad_norm": 0.6326998472213745, - "learning_rate": 1.6218624684390317e-06, - "loss": 0.0621, - "step": 63690 - }, - { - "epoch": 4.730432199613842, - "grad_norm": 0.5624150633811951, - "learning_rate": 1.6174068023169465e-06, - "loss": 0.0394, - "step": 63700 - }, - { - "epoch": 4.73117481063419, - "grad_norm": 0.9579383134841919, - "learning_rate": 1.6129511361948612e-06, - "loss": 0.0856, - "step": 63710 - }, - { - "epoch": 4.731917421654537, - "grad_norm": 1.3412039279937744, - "learning_rate": 1.6084954700727758e-06, - "loss": 0.0584, - "step": 63720 - }, - { - "epoch": 4.732660032674885, - "grad_norm": 2.0105788707733154, - "learning_rate": 1.6040398039506906e-06, - "loss": 0.0573, - "step": 63730 - }, - { - "epoch": 4.733402643695232, - "grad_norm": 1.5913349390029907, - "learning_rate": 1.5995841378286056e-06, - "loss": 0.0634, - "step": 63740 - }, - { - "epoch": 4.73414525471558, - "grad_norm": 0.57387375831604, - "learning_rate": 1.5951284717065202e-06, - "loss": 0.037, - "step": 63750 - }, - { - "epoch": 4.734887865735928, - "grad_norm": 0.6416048407554626, - "learning_rate": 1.590672805584435e-06, - "loss": 0.0565, - "step": 63760 - }, - { - "epoch": 4.735630476756275, - "grad_norm": 0.8845180869102478, - "learning_rate": 1.5862171394623497e-06, - "loss": 0.0461, - "step": 63770 - }, - { - "epoch": 4.736373087776623, - "grad_norm": 2.478346347808838, - "learning_rate": 1.5817614733402643e-06, - "loss": 0.0446, - "step": 63780 - }, - { - "epoch": 4.73711569879697, - "grad_norm": 0.9896259307861328, - "learning_rate": 1.577305807218179e-06, - "loss": 0.085, - "step": 63790 - }, - { - "epoch": 4.737858309817318, - "grad_norm": 1.2818790674209595, - "learning_rate": 1.5728501410960939e-06, - "loss": 0.0503, - "step": 63800 - }, - { - "epoch": 4.738600920837666, - "grad_norm": 1.4775575399398804, - "learning_rate": 1.5683944749740086e-06, - "loss": 0.0388, - "step": 63810 - }, - { - "epoch": 4.739343531858013, - "grad_norm": 0.6616837978363037, - "learning_rate": 1.5639388088519234e-06, - "loss": 0.0482, - "step": 63820 - }, - { - "epoch": 4.7400861428783605, - "grad_norm": 0.6766378879547119, - "learning_rate": 1.5594831427298382e-06, - "loss": 0.0487, - "step": 63830 - }, - { - "epoch": 4.7408287538987075, - "grad_norm": 4.399589538574219, - "learning_rate": 1.555027476607753e-06, - "loss": 0.0676, - "step": 63840 - }, - { - "epoch": 4.741571364919055, - "grad_norm": 0.8377204537391663, - "learning_rate": 1.5505718104856676e-06, - "loss": 0.0473, - "step": 63850 - }, - { - "epoch": 4.742313975939403, - "grad_norm": 0.36241453886032104, - "learning_rate": 1.5461161443635823e-06, - "loss": 0.0365, - "step": 63860 - }, - { - "epoch": 4.74305658695975, - "grad_norm": 3.0188138484954834, - "learning_rate": 1.5416604782414971e-06, - "loss": 0.0621, - "step": 63870 - }, - { - "epoch": 4.743799197980098, - "grad_norm": 1.3264600038528442, - "learning_rate": 1.537204812119412e-06, - "loss": 0.0571, - "step": 63880 - }, - { - "epoch": 4.744541809000445, - "grad_norm": 2.425934076309204, - "learning_rate": 1.5327491459973267e-06, - "loss": 0.0562, - "step": 63890 - }, - { - "epoch": 4.745284420020793, - "grad_norm": 1.697583794593811, - "learning_rate": 1.5282934798752415e-06, - "loss": 0.0656, - "step": 63900 - }, - { - "epoch": 4.746027031041141, - "grad_norm": 1.6972935199737549, - "learning_rate": 1.523837813753156e-06, - "loss": 0.0846, - "step": 63910 - }, - { - "epoch": 4.746769642061488, - "grad_norm": 0.5245199203491211, - "learning_rate": 1.5193821476310708e-06, - "loss": 0.048, - "step": 63920 - }, - { - "epoch": 4.747512253081836, - "grad_norm": 1.2065707445144653, - "learning_rate": 1.5149264815089856e-06, - "loss": 0.0582, - "step": 63930 - }, - { - "epoch": 4.748254864102183, - "grad_norm": 2.5891146659851074, - "learning_rate": 1.5104708153869004e-06, - "loss": 0.0675, - "step": 63940 - }, - { - "epoch": 4.748997475122531, - "grad_norm": 1.8636987209320068, - "learning_rate": 1.5060151492648152e-06, - "loss": 0.0364, - "step": 63950 - }, - { - "epoch": 4.749740086142879, - "grad_norm": 0.759530246257782, - "learning_rate": 1.50155948314273e-06, - "loss": 0.0409, - "step": 63960 - }, - { - "epoch": 4.750482697163226, - "grad_norm": 1.0763570070266724, - "learning_rate": 1.4971038170206445e-06, - "loss": 0.0513, - "step": 63970 - }, - { - "epoch": 4.751225308183574, - "grad_norm": 0.6795147657394409, - "learning_rate": 1.4926481508985593e-06, - "loss": 0.0513, - "step": 63980 - }, - { - "epoch": 4.751967919203921, - "grad_norm": 1.966013789176941, - "learning_rate": 1.488192484776474e-06, - "loss": 0.078, - "step": 63990 - }, - { - "epoch": 4.7527105302242685, - "grad_norm": 1.7441941499710083, - "learning_rate": 1.4837368186543889e-06, - "loss": 0.0454, - "step": 64000 - }, - { - "epoch": 4.753453141244616, - "grad_norm": 0.6811540722846985, - "learning_rate": 1.4792811525323037e-06, - "loss": 0.0395, - "step": 64010 - }, - { - "epoch": 4.754195752264963, - "grad_norm": 0.5677528381347656, - "learning_rate": 1.4748254864102184e-06, - "loss": 0.0369, - "step": 64020 - }, - { - "epoch": 4.754938363285311, - "grad_norm": 0.3127375841140747, - "learning_rate": 1.4703698202881332e-06, - "loss": 0.0519, - "step": 64030 - }, - { - "epoch": 4.755680974305658, - "grad_norm": 1.2747459411621094, - "learning_rate": 1.4659141541660478e-06, - "loss": 0.0507, - "step": 64040 - }, - { - "epoch": 4.756423585326006, - "grad_norm": 1.9824333190917969, - "learning_rate": 1.4614584880439626e-06, - "loss": 0.0485, - "step": 64050 - }, - { - "epoch": 4.757166196346354, - "grad_norm": 0.8089881539344788, - "learning_rate": 1.4570028219218774e-06, - "loss": 0.0495, - "step": 64060 - }, - { - "epoch": 4.757908807366701, - "grad_norm": 0.6729184985160828, - "learning_rate": 1.4525471557997921e-06, - "loss": 0.0291, - "step": 64070 - }, - { - "epoch": 4.758651418387049, - "grad_norm": 1.3998851776123047, - "learning_rate": 1.448091489677707e-06, - "loss": 0.0749, - "step": 64080 - }, - { - "epoch": 4.759394029407396, - "grad_norm": 1.4854328632354736, - "learning_rate": 1.4436358235556217e-06, - "loss": 0.0579, - "step": 64090 - }, - { - "epoch": 4.760136640427744, - "grad_norm": 0.500187873840332, - "learning_rate": 1.4391801574335363e-06, - "loss": 0.0491, - "step": 64100 - }, - { - "epoch": 4.760879251448092, - "grad_norm": 2.0688111782073975, - "learning_rate": 1.434724491311451e-06, - "loss": 0.0442, - "step": 64110 - }, - { - "epoch": 4.761621862468439, - "grad_norm": 0.8344265222549438, - "learning_rate": 1.4302688251893658e-06, - "loss": 0.0608, - "step": 64120 - }, - { - "epoch": 4.762364473488787, - "grad_norm": 0.7157622575759888, - "learning_rate": 1.4258131590672804e-06, - "loss": 0.0717, - "step": 64130 - }, - { - "epoch": 4.763107084509134, - "grad_norm": 2.2929582595825195, - "learning_rate": 1.4213574929451954e-06, - "loss": 0.0681, - "step": 64140 - }, - { - "epoch": 4.763849695529482, - "grad_norm": 1.3758022785186768, - "learning_rate": 1.4169018268231102e-06, - "loss": 0.0542, - "step": 64150 - }, - { - "epoch": 4.7645923065498295, - "grad_norm": 1.4729808568954468, - "learning_rate": 1.4124461607010248e-06, - "loss": 0.0578, - "step": 64160 - }, - { - "epoch": 4.7653349175701765, - "grad_norm": 3.680039882659912, - "learning_rate": 1.4079904945789395e-06, - "loss": 0.0774, - "step": 64170 - }, - { - "epoch": 4.766077528590524, - "grad_norm": 0.58192378282547, - "learning_rate": 1.4035348284568543e-06, - "loss": 0.0501, - "step": 64180 - }, - { - "epoch": 4.766820139610871, - "grad_norm": 0.3413379192352295, - "learning_rate": 1.3990791623347691e-06, - "loss": 0.0271, - "step": 64190 - }, - { - "epoch": 4.767562750631219, - "grad_norm": 1.4650648832321167, - "learning_rate": 1.3946234962126837e-06, - "loss": 0.0451, - "step": 64200 - }, - { - "epoch": 4.768305361651567, - "grad_norm": 0.2223232537508011, - "learning_rate": 1.3901678300905987e-06, - "loss": 0.0705, - "step": 64210 - }, - { - "epoch": 4.769047972671914, - "grad_norm": 3.3965070247650146, - "learning_rate": 1.3857121639685135e-06, - "loss": 0.0701, - "step": 64220 - }, - { - "epoch": 4.769790583692262, - "grad_norm": 1.7762930393218994, - "learning_rate": 1.381256497846428e-06, - "loss": 0.0888, - "step": 64230 - }, - { - "epoch": 4.770533194712609, - "grad_norm": 0.9293942451477051, - "learning_rate": 1.3768008317243428e-06, - "loss": 0.0653, - "step": 64240 - }, - { - "epoch": 4.771275805732957, - "grad_norm": 1.2319601774215698, - "learning_rate": 1.3723451656022576e-06, - "loss": 0.0603, - "step": 64250 - }, - { - "epoch": 4.772018416753305, - "grad_norm": 0.7350060343742371, - "learning_rate": 1.3678894994801722e-06, - "loss": 0.0535, - "step": 64260 - }, - { - "epoch": 4.772761027773652, - "grad_norm": 0.3854759633541107, - "learning_rate": 1.3634338333580872e-06, - "loss": 0.041, - "step": 64270 - }, - { - "epoch": 4.773503638794, - "grad_norm": 1.7290388345718384, - "learning_rate": 1.358978167236002e-06, - "loss": 0.0524, - "step": 64280 - }, - { - "epoch": 4.774246249814347, - "grad_norm": 1.4731237888336182, - "learning_rate": 1.3545225011139165e-06, - "loss": 0.0818, - "step": 64290 - }, - { - "epoch": 4.774988860834695, - "grad_norm": 1.8884638547897339, - "learning_rate": 1.3500668349918313e-06, - "loss": 0.0443, - "step": 64300 - }, - { - "epoch": 4.7757314718550425, - "grad_norm": 1.970607042312622, - "learning_rate": 1.345611168869746e-06, - "loss": 0.0766, - "step": 64310 - }, - { - "epoch": 4.7764740828753895, - "grad_norm": 0.458402544260025, - "learning_rate": 1.3411555027476606e-06, - "loss": 0.067, - "step": 64320 - }, - { - "epoch": 4.777216693895737, - "grad_norm": 1.3864773511886597, - "learning_rate": 1.3366998366255754e-06, - "loss": 0.0479, - "step": 64330 - }, - { - "epoch": 4.777959304916085, - "grad_norm": 3.7125790119171143, - "learning_rate": 1.3322441705034904e-06, - "loss": 0.088, - "step": 64340 - }, - { - "epoch": 4.778701915936432, - "grad_norm": 0.38081493973731995, - "learning_rate": 1.3277885043814052e-06, - "loss": 0.0677, - "step": 64350 - }, - { - "epoch": 4.77944452695678, - "grad_norm": 1.2474788427352905, - "learning_rate": 1.3233328382593198e-06, - "loss": 0.0681, - "step": 64360 - }, - { - "epoch": 4.780187137977127, - "grad_norm": 0.9167222380638123, - "learning_rate": 1.3188771721372346e-06, - "loss": 0.0884, - "step": 64370 - }, - { - "epoch": 4.780929748997475, - "grad_norm": 1.0856194496154785, - "learning_rate": 1.3144215060151493e-06, - "loss": 0.0562, - "step": 64380 - }, - { - "epoch": 4.781672360017823, - "grad_norm": 1.6708109378814697, - "learning_rate": 1.309965839893064e-06, - "loss": 0.1026, - "step": 64390 - }, - { - "epoch": 4.78241497103817, - "grad_norm": 1.4255729913711548, - "learning_rate": 1.305510173770979e-06, - "loss": 0.064, - "step": 64400 - }, - { - "epoch": 4.783157582058518, - "grad_norm": 0.8202357292175293, - "learning_rate": 1.3010545076488937e-06, - "loss": 0.0501, - "step": 64410 - }, - { - "epoch": 4.783900193078865, - "grad_norm": 1.9395742416381836, - "learning_rate": 1.2965988415268083e-06, - "loss": 0.0641, - "step": 64420 - }, - { - "epoch": 4.784642804099213, - "grad_norm": 0.4814720153808594, - "learning_rate": 1.292143175404723e-06, - "loss": 0.0567, - "step": 64430 - }, - { - "epoch": 4.785385415119561, - "grad_norm": 1.5214014053344727, - "learning_rate": 1.2876875092826378e-06, - "loss": 0.0261, - "step": 64440 - }, - { - "epoch": 4.786128026139908, - "grad_norm": 1.4035849571228027, - "learning_rate": 1.2832318431605524e-06, - "loss": 0.0424, - "step": 64450 - }, - { - "epoch": 4.786870637160256, - "grad_norm": 1.3969433307647705, - "learning_rate": 1.2787761770384672e-06, - "loss": 0.0411, - "step": 64460 - }, - { - "epoch": 4.787613248180603, - "grad_norm": 0.7165231704711914, - "learning_rate": 1.2743205109163822e-06, - "loss": 0.0555, - "step": 64470 - }, - { - "epoch": 4.7883558592009505, - "grad_norm": 2.941373825073242, - "learning_rate": 1.2698648447942967e-06, - "loss": 0.0755, - "step": 64480 - }, - { - "epoch": 4.789098470221298, - "grad_norm": 0.6511389017105103, - "learning_rate": 1.2654091786722115e-06, - "loss": 0.0216, - "step": 64490 - }, - { - "epoch": 4.789841081241645, - "grad_norm": 0.8992854952812195, - "learning_rate": 1.2609535125501263e-06, - "loss": 0.0711, - "step": 64500 - }, - { - "epoch": 4.790583692261993, - "grad_norm": 1.678450107574463, - "learning_rate": 1.256497846428041e-06, - "loss": 0.0658, - "step": 64510 - }, - { - "epoch": 4.79132630328234, - "grad_norm": 1.1826390027999878, - "learning_rate": 1.2520421803059557e-06, - "loss": 0.041, - "step": 64520 - }, - { - "epoch": 4.792068914302688, - "grad_norm": 1.712506651878357, - "learning_rate": 1.2475865141838704e-06, - "loss": 0.0582, - "step": 64530 - }, - { - "epoch": 4.792811525323036, - "grad_norm": 1.0903819799423218, - "learning_rate": 1.2431308480617854e-06, - "loss": 0.0657, - "step": 64540 - }, - { - "epoch": 4.793554136343383, - "grad_norm": 1.190956950187683, - "learning_rate": 1.2386751819397e-06, - "loss": 0.0733, - "step": 64550 - }, - { - "epoch": 4.794296747363731, - "grad_norm": 3.9811456203460693, - "learning_rate": 1.2342195158176148e-06, - "loss": 0.0726, - "step": 64560 - }, - { - "epoch": 4.795039358384079, - "grad_norm": 0.5482442378997803, - "learning_rate": 1.2297638496955296e-06, - "loss": 0.0801, - "step": 64570 - }, - { - "epoch": 4.795781969404426, - "grad_norm": 0.27074581384658813, - "learning_rate": 1.2253081835734441e-06, - "loss": 0.0592, - "step": 64580 - }, - { - "epoch": 4.796524580424774, - "grad_norm": 1.4098689556121826, - "learning_rate": 1.220852517451359e-06, - "loss": 0.0702, - "step": 64590 - }, - { - "epoch": 4.797267191445121, - "grad_norm": 1.838658332824707, - "learning_rate": 1.216396851329274e-06, - "loss": 0.0521, - "step": 64600 - }, - { - "epoch": 4.798009802465469, - "grad_norm": 1.1644221544265747, - "learning_rate": 1.2119411852071885e-06, - "loss": 0.0518, - "step": 64610 - }, - { - "epoch": 4.798752413485817, - "grad_norm": 1.8020392656326294, - "learning_rate": 1.2074855190851033e-06, - "loss": 0.0675, - "step": 64620 - }, - { - "epoch": 4.799495024506164, - "grad_norm": 1.1796648502349854, - "learning_rate": 1.203029852963018e-06, - "loss": 0.0479, - "step": 64630 - }, - { - "epoch": 4.8002376355265115, - "grad_norm": 2.24526047706604, - "learning_rate": 1.1985741868409326e-06, - "loss": 0.0374, - "step": 64640 - }, - { - "epoch": 4.8009802465468585, - "grad_norm": 0.3404271900653839, - "learning_rate": 1.1941185207188474e-06, - "loss": 0.0318, - "step": 64650 - }, - { - "epoch": 4.801722857567206, - "grad_norm": 0.40079450607299805, - "learning_rate": 1.1896628545967622e-06, - "loss": 0.0397, - "step": 64660 - }, - { - "epoch": 4.802465468587554, - "grad_norm": 1.3549119234085083, - "learning_rate": 1.185207188474677e-06, - "loss": 0.0597, - "step": 64670 - }, - { - "epoch": 4.803208079607901, - "grad_norm": 0.6947437524795532, - "learning_rate": 1.1807515223525918e-06, - "loss": 0.0653, - "step": 64680 - }, - { - "epoch": 4.803950690628249, - "grad_norm": 0.9724387526512146, - "learning_rate": 1.1762958562305065e-06, - "loss": 0.0544, - "step": 64690 - }, - { - "epoch": 4.804693301648596, - "grad_norm": 0.9956182241439819, - "learning_rate": 1.1718401901084213e-06, - "loss": 0.0393, - "step": 64700 - }, - { - "epoch": 4.805435912668944, - "grad_norm": 0.9624569416046143, - "learning_rate": 1.1673845239863359e-06, - "loss": 0.0534, - "step": 64710 - }, - { - "epoch": 4.806178523689292, - "grad_norm": 2.9609172344207764, - "learning_rate": 1.1629288578642507e-06, - "loss": 0.054, - "step": 64720 - }, - { - "epoch": 4.806921134709639, - "grad_norm": 1.744933009147644, - "learning_rate": 1.1584731917421657e-06, - "loss": 0.0621, - "step": 64730 - }, - { - "epoch": 4.807663745729987, - "grad_norm": 0.723945677280426, - "learning_rate": 1.1540175256200802e-06, - "loss": 0.042, - "step": 64740 - }, - { - "epoch": 4.808406356750334, - "grad_norm": 1.1456420421600342, - "learning_rate": 1.149561859497995e-06, - "loss": 0.0395, - "step": 64750 - }, - { - "epoch": 4.809148967770682, - "grad_norm": 2.3802542686462402, - "learning_rate": 1.1451061933759098e-06, - "loss": 0.0922, - "step": 64760 - }, - { - "epoch": 4.80989157879103, - "grad_norm": 0.3124806880950928, - "learning_rate": 1.1406505272538244e-06, - "loss": 0.0576, - "step": 64770 - }, - { - "epoch": 4.810634189811377, - "grad_norm": 0.9946483373641968, - "learning_rate": 1.1361948611317392e-06, - "loss": 0.0601, - "step": 64780 - }, - { - "epoch": 4.811376800831725, - "grad_norm": 2.7371938228607178, - "learning_rate": 1.131739195009654e-06, - "loss": 0.0527, - "step": 64790 - }, - { - "epoch": 4.812119411852072, - "grad_norm": 1.2650768756866455, - "learning_rate": 1.1272835288875687e-06, - "loss": 0.0535, - "step": 64800 - }, - { - "epoch": 4.8128620228724195, - "grad_norm": 1.1405729055404663, - "learning_rate": 1.1228278627654835e-06, - "loss": 0.0361, - "step": 64810 - }, - { - "epoch": 4.813604633892767, - "grad_norm": 1.991651177406311, - "learning_rate": 1.1183721966433983e-06, - "loss": 0.0721, - "step": 64820 - }, - { - "epoch": 4.814347244913114, - "grad_norm": 2.725224018096924, - "learning_rate": 1.1139165305213129e-06, - "loss": 0.0446, - "step": 64830 - }, - { - "epoch": 4.815089855933462, - "grad_norm": 0.7116602063179016, - "learning_rate": 1.1094608643992276e-06, - "loss": 0.0459, - "step": 64840 - }, - { - "epoch": 4.815832466953809, - "grad_norm": 1.70388925075531, - "learning_rate": 1.1050051982771424e-06, - "loss": 0.07, - "step": 64850 - }, - { - "epoch": 4.816575077974157, - "grad_norm": 0.45246338844299316, - "learning_rate": 1.1005495321550574e-06, - "loss": 0.0477, - "step": 64860 - }, - { - "epoch": 4.817317688994505, - "grad_norm": 2.529362678527832, - "learning_rate": 1.096093866032972e-06, - "loss": 0.0773, - "step": 64870 - }, - { - "epoch": 4.818060300014852, - "grad_norm": 2.60267972946167, - "learning_rate": 1.0916381999108868e-06, - "loss": 0.0683, - "step": 64880 - }, - { - "epoch": 4.8188029110352, - "grad_norm": 2.9916200637817383, - "learning_rate": 1.0871825337888016e-06, - "loss": 0.0413, - "step": 64890 - }, - { - "epoch": 4.819545522055547, - "grad_norm": 1.2470331192016602, - "learning_rate": 1.0827268676667161e-06, - "loss": 0.0669, - "step": 64900 - }, - { - "epoch": 4.820288133075895, - "grad_norm": 1.2445175647735596, - "learning_rate": 1.078271201544631e-06, - "loss": 0.0672, - "step": 64910 - }, - { - "epoch": 4.821030744096243, - "grad_norm": 0.964076042175293, - "learning_rate": 1.0738155354225457e-06, - "loss": 0.0642, - "step": 64920 - }, - { - "epoch": 4.82177335511659, - "grad_norm": 0.8829526901245117, - "learning_rate": 1.0693598693004605e-06, - "loss": 0.0695, - "step": 64930 - }, - { - "epoch": 4.822515966136938, - "grad_norm": 1.5874435901641846, - "learning_rate": 1.0649042031783753e-06, - "loss": 0.052, - "step": 64940 - }, - { - "epoch": 4.823258577157285, - "grad_norm": 1.9532781839370728, - "learning_rate": 1.06044853705629e-06, - "loss": 0.0823, - "step": 64950 - }, - { - "epoch": 4.824001188177633, - "grad_norm": 1.2501765489578247, - "learning_rate": 1.0559928709342046e-06, - "loss": 0.076, - "step": 64960 - }, - { - "epoch": 4.8247437991979805, - "grad_norm": 1.186736822128296, - "learning_rate": 1.0515372048121194e-06, - "loss": 0.0524, - "step": 64970 - }, - { - "epoch": 4.8254864102183275, - "grad_norm": 1.532196044921875, - "learning_rate": 1.0470815386900342e-06, - "loss": 0.0546, - "step": 64980 - }, - { - "epoch": 4.826229021238675, - "grad_norm": 0.5501788854598999, - "learning_rate": 1.0426258725679487e-06, - "loss": 0.0407, - "step": 64990 - }, - { - "epoch": 4.826971632259022, - "grad_norm": 0.5295414328575134, - "learning_rate": 1.0381702064458637e-06, - "loss": 0.0698, - "step": 65000 - }, - { - "epoch": 4.82771424327937, - "grad_norm": 2.7445316314697266, - "learning_rate": 1.0337145403237785e-06, - "loss": 0.0864, - "step": 65010 - }, - { - "epoch": 4.828456854299718, - "grad_norm": 2.2121715545654297, - "learning_rate": 1.029258874201693e-06, - "loss": 0.0606, - "step": 65020 - }, - { - "epoch": 4.829199465320065, - "grad_norm": 1.5435631275177002, - "learning_rate": 1.0248032080796079e-06, - "loss": 0.0572, - "step": 65030 - }, - { - "epoch": 4.829942076340413, - "grad_norm": 1.4504753351211548, - "learning_rate": 1.0203475419575227e-06, - "loss": 0.0387, - "step": 65040 - }, - { - "epoch": 4.83068468736076, - "grad_norm": 2.8633036613464355, - "learning_rate": 1.0158918758354374e-06, - "loss": 0.0621, - "step": 65050 - }, - { - "epoch": 4.831427298381108, - "grad_norm": 2.3440101146698, - "learning_rate": 1.0114362097133522e-06, - "loss": 0.0797, - "step": 65060 - }, - { - "epoch": 4.832169909401456, - "grad_norm": 0.962746262550354, - "learning_rate": 1.006980543591267e-06, - "loss": 0.0636, - "step": 65070 - }, - { - "epoch": 4.832912520421803, - "grad_norm": 0.8248947262763977, - "learning_rate": 1.0025248774691818e-06, - "loss": 0.0721, - "step": 65080 - }, - { - "epoch": 4.833655131442151, - "grad_norm": 1.0095185041427612, - "learning_rate": 9.980692113470964e-07, - "loss": 0.036, - "step": 65090 - }, - { - "epoch": 4.834397742462498, - "grad_norm": 1.019490122795105, - "learning_rate": 9.936135452250111e-07, - "loss": 0.0576, - "step": 65100 - }, - { - "epoch": 4.835140353482846, - "grad_norm": 0.30174383521080017, - "learning_rate": 9.89157879102926e-07, - "loss": 0.0483, - "step": 65110 - }, - { - "epoch": 4.8358829645031935, - "grad_norm": 1.3941235542297363, - "learning_rate": 9.847022129808405e-07, - "loss": 0.0886, - "step": 65120 - }, - { - "epoch": 4.8366255755235406, - "grad_norm": 2.1527700424194336, - "learning_rate": 9.802465468587555e-07, - "loss": 0.039, - "step": 65130 - }, - { - "epoch": 4.8373681865438884, - "grad_norm": 1.5553241968154907, - "learning_rate": 9.757908807366703e-07, - "loss": 0.0417, - "step": 65140 - }, - { - "epoch": 4.8381107975642355, - "grad_norm": 1.5481144189834595, - "learning_rate": 9.713352146145848e-07, - "loss": 0.0638, - "step": 65150 - }, - { - "epoch": 4.838853408584583, - "grad_norm": 2.373396873474121, - "learning_rate": 9.668795484924996e-07, - "loss": 0.0551, - "step": 65160 - }, - { - "epoch": 4.839596019604931, - "grad_norm": 1.2677528858184814, - "learning_rate": 9.624238823704144e-07, - "loss": 0.0624, - "step": 65170 - }, - { - "epoch": 4.840338630625278, - "grad_norm": 3.198784589767456, - "learning_rate": 9.57968216248329e-07, - "loss": 0.0483, - "step": 65180 - }, - { - "epoch": 4.841081241645626, - "grad_norm": 0.36906376481056213, - "learning_rate": 9.53512550126244e-07, - "loss": 0.0587, - "step": 65190 - }, - { - "epoch": 4.841823852665973, - "grad_norm": 1.4597629308700562, - "learning_rate": 9.490568840041587e-07, - "loss": 0.0588, - "step": 65200 - }, - { - "epoch": 4.842566463686321, - "grad_norm": 1.0740894079208374, - "learning_rate": 9.446012178820734e-07, - "loss": 0.0312, - "step": 65210 - }, - { - "epoch": 4.843309074706669, - "grad_norm": 2.1893422603607178, - "learning_rate": 9.401455517599881e-07, - "loss": 0.088, - "step": 65220 - }, - { - "epoch": 4.844051685727016, - "grad_norm": 2.3881919384002686, - "learning_rate": 9.356898856379029e-07, - "loss": 0.0389, - "step": 65230 - }, - { - "epoch": 4.844794296747364, - "grad_norm": 0.9090191721916199, - "learning_rate": 9.312342195158177e-07, - "loss": 0.0574, - "step": 65240 - }, - { - "epoch": 4.845536907767711, - "grad_norm": 2.5130159854888916, - "learning_rate": 9.267785533937324e-07, - "loss": 0.0519, - "step": 65250 - }, - { - "epoch": 4.846279518788059, - "grad_norm": 0.5649994611740112, - "learning_rate": 9.223228872716471e-07, - "loss": 0.0314, - "step": 65260 - }, - { - "epoch": 4.847022129808407, - "grad_norm": 1.371864914894104, - "learning_rate": 9.178672211495618e-07, - "loss": 0.0652, - "step": 65270 - }, - { - "epoch": 4.847764740828754, - "grad_norm": 0.3888418972492218, - "learning_rate": 9.134115550274767e-07, - "loss": 0.0693, - "step": 65280 - }, - { - "epoch": 4.8485073518491015, - "grad_norm": 2.402367115020752, - "learning_rate": 9.089558889053914e-07, - "loss": 0.0578, - "step": 65290 - }, - { - "epoch": 4.8492499628694485, - "grad_norm": 3.145559310913086, - "learning_rate": 9.04500222783306e-07, - "loss": 0.0581, - "step": 65300 - }, - { - "epoch": 4.849992573889796, - "grad_norm": 0.6334661245346069, - "learning_rate": 9.000445566612209e-07, - "loss": 0.07, - "step": 65310 - }, - { - "epoch": 4.850735184910144, - "grad_norm": 1.6239862442016602, - "learning_rate": 8.955888905391356e-07, - "loss": 0.0437, - "step": 65320 - }, - { - "epoch": 4.851477795930491, - "grad_norm": 2.169163942337036, - "learning_rate": 8.911332244170504e-07, - "loss": 0.0579, - "step": 65330 - }, - { - "epoch": 4.852220406950839, - "grad_norm": 0.9393569827079773, - "learning_rate": 8.866775582949652e-07, - "loss": 0.0424, - "step": 65340 - }, - { - "epoch": 4.852963017971186, - "grad_norm": 0.3016482889652252, - "learning_rate": 8.822218921728799e-07, - "loss": 0.0443, - "step": 65350 - }, - { - "epoch": 4.853705628991534, - "grad_norm": 0.676237165927887, - "learning_rate": 8.777662260507946e-07, - "loss": 0.0511, - "step": 65360 - }, - { - "epoch": 4.854448240011882, - "grad_norm": 0.3696301579475403, - "learning_rate": 8.733105599287093e-07, - "loss": 0.0598, - "step": 65370 - }, - { - "epoch": 4.855190851032229, - "grad_norm": 4.8189921379089355, - "learning_rate": 8.688548938066241e-07, - "loss": 0.0632, - "step": 65380 - }, - { - "epoch": 4.855933462052577, - "grad_norm": 0.5955801010131836, - "learning_rate": 8.643992276845389e-07, - "loss": 0.029, - "step": 65390 - }, - { - "epoch": 4.856676073072924, - "grad_norm": 1.868445634841919, - "learning_rate": 8.599435615624536e-07, - "loss": 0.0496, - "step": 65400 - }, - { - "epoch": 4.857418684093272, - "grad_norm": 0.6771336197853088, - "learning_rate": 8.554878954403684e-07, - "loss": 0.0484, - "step": 65410 - }, - { - "epoch": 4.85816129511362, - "grad_norm": 1.803080677986145, - "learning_rate": 8.510322293182831e-07, - "loss": 0.0599, - "step": 65420 - }, - { - "epoch": 4.858903906133967, - "grad_norm": 2.175628900527954, - "learning_rate": 8.465765631961978e-07, - "loss": 0.0448, - "step": 65430 - }, - { - "epoch": 4.859646517154315, - "grad_norm": 1.489719033241272, - "learning_rate": 8.421208970741127e-07, - "loss": 0.0435, - "step": 65440 - }, - { - "epoch": 4.860389128174662, - "grad_norm": 0.7035216689109802, - "learning_rate": 8.376652309520274e-07, - "loss": 0.0551, - "step": 65450 - }, - { - "epoch": 4.8611317391950095, - "grad_norm": 1.251814603805542, - "learning_rate": 8.33209564829942e-07, - "loss": 0.0537, - "step": 65460 - }, - { - "epoch": 4.861874350215357, - "grad_norm": 0.5528387427330017, - "learning_rate": 8.287538987078569e-07, - "loss": 0.0571, - "step": 65470 - }, - { - "epoch": 4.862616961235704, - "grad_norm": 2.852384328842163, - "learning_rate": 8.242982325857716e-07, - "loss": 0.0791, - "step": 65480 - }, - { - "epoch": 4.863359572256052, - "grad_norm": 1.3060245513916016, - "learning_rate": 8.198425664636863e-07, - "loss": 0.0547, - "step": 65490 - }, - { - "epoch": 4.8641021832764, - "grad_norm": 2.287452220916748, - "learning_rate": 8.153869003416011e-07, - "loss": 0.056, - "step": 65500 - }, - { - "epoch": 4.864844794296747, - "grad_norm": 1.873404860496521, - "learning_rate": 8.109312342195158e-07, - "loss": 0.0859, - "step": 65510 - }, - { - "epoch": 4.865587405317095, - "grad_norm": 1.0122098922729492, - "learning_rate": 8.064755680974306e-07, - "loss": 0.0531, - "step": 65520 - }, - { - "epoch": 4.866330016337442, - "grad_norm": 1.163620948791504, - "learning_rate": 8.020199019753453e-07, - "loss": 0.0633, - "step": 65530 - }, - { - "epoch": 4.86707262735779, - "grad_norm": 2.736693859100342, - "learning_rate": 7.975642358532601e-07, - "loss": 0.0622, - "step": 65540 - }, - { - "epoch": 4.867815238378138, - "grad_norm": 0.43964171409606934, - "learning_rate": 7.931085697311749e-07, - "loss": 0.027, - "step": 65550 - }, - { - "epoch": 4.868557849398485, - "grad_norm": 0.9539849162101746, - "learning_rate": 7.886529036090895e-07, - "loss": 0.043, - "step": 65560 - }, - { - "epoch": 4.869300460418833, - "grad_norm": 0.3715505003929138, - "learning_rate": 7.841972374870043e-07, - "loss": 0.0631, - "step": 65570 - }, - { - "epoch": 4.87004307143918, - "grad_norm": 1.210249423980713, - "learning_rate": 7.797415713649191e-07, - "loss": 0.0706, - "step": 65580 - }, - { - "epoch": 4.870785682459528, - "grad_norm": 2.480217456817627, - "learning_rate": 7.752859052428338e-07, - "loss": 0.0576, - "step": 65590 - }, - { - "epoch": 4.871528293479876, - "grad_norm": 2.4984142780303955, - "learning_rate": 7.708302391207486e-07, - "loss": 0.0893, - "step": 65600 - }, - { - "epoch": 4.872270904500223, - "grad_norm": 0.5741029381752014, - "learning_rate": 7.663745729986633e-07, - "loss": 0.057, - "step": 65610 - }, - { - "epoch": 4.8730135155205705, - "grad_norm": 2.359872341156006, - "learning_rate": 7.61918906876578e-07, - "loss": 0.0451, - "step": 65620 - }, - { - "epoch": 4.8737561265409175, - "grad_norm": 1.7714812755584717, - "learning_rate": 7.574632407544928e-07, - "loss": 0.083, - "step": 65630 - }, - { - "epoch": 4.874498737561265, - "grad_norm": 3.7563037872314453, - "learning_rate": 7.530075746324076e-07, - "loss": 0.0763, - "step": 65640 - }, - { - "epoch": 4.875241348581613, - "grad_norm": 0.9975684285163879, - "learning_rate": 7.485519085103223e-07, - "loss": 0.0359, - "step": 65650 - }, - { - "epoch": 4.87598395960196, - "grad_norm": 1.6247881650924683, - "learning_rate": 7.44096242388237e-07, - "loss": 0.0349, - "step": 65660 - }, - { - "epoch": 4.876726570622308, - "grad_norm": 2.389453887939453, - "learning_rate": 7.396405762661518e-07, - "loss": 0.0537, - "step": 65670 - }, - { - "epoch": 4.877469181642655, - "grad_norm": 2.3572044372558594, - "learning_rate": 7.351849101440666e-07, - "loss": 0.0356, - "step": 65680 - }, - { - "epoch": 4.878211792663003, - "grad_norm": 2.2939834594726562, - "learning_rate": 7.307292440219813e-07, - "loss": 0.0741, - "step": 65690 - }, - { - "epoch": 4.878954403683351, - "grad_norm": 1.0709495544433594, - "learning_rate": 7.262735778998961e-07, - "loss": 0.0741, - "step": 65700 - }, - { - "epoch": 4.879697014703698, - "grad_norm": 1.768277645111084, - "learning_rate": 7.218179117778109e-07, - "loss": 0.0616, - "step": 65710 - }, - { - "epoch": 4.880439625724046, - "grad_norm": 1.0159682035446167, - "learning_rate": 7.173622456557255e-07, - "loss": 0.0614, - "step": 65720 - }, - { - "epoch": 4.881182236744394, - "grad_norm": 0.6093847751617432, - "learning_rate": 7.129065795336402e-07, - "loss": 0.0293, - "step": 65730 - }, - { - "epoch": 4.881924847764741, - "grad_norm": 0.5181077718734741, - "learning_rate": 7.084509134115551e-07, - "loss": 0.0546, - "step": 65740 - }, - { - "epoch": 4.882667458785089, - "grad_norm": 0.47307220101356506, - "learning_rate": 7.039952472894698e-07, - "loss": 0.0434, - "step": 65750 - }, - { - "epoch": 4.883410069805436, - "grad_norm": 1.3281551599502563, - "learning_rate": 6.995395811673846e-07, - "loss": 0.0622, - "step": 65760 - }, - { - "epoch": 4.884152680825784, - "grad_norm": 1.8027498722076416, - "learning_rate": 6.950839150452993e-07, - "loss": 0.057, - "step": 65770 - }, - { - "epoch": 4.8848952918461315, - "grad_norm": 1.0578224658966064, - "learning_rate": 6.90628248923214e-07, - "loss": 0.0633, - "step": 65780 - }, - { - "epoch": 4.8856379028664785, - "grad_norm": 1.2955671548843384, - "learning_rate": 6.861725828011288e-07, - "loss": 0.0574, - "step": 65790 - }, - { - "epoch": 4.886380513886826, - "grad_norm": 2.2496156692504883, - "learning_rate": 6.817169166790436e-07, - "loss": 0.0629, - "step": 65800 - }, - { - "epoch": 4.887123124907173, - "grad_norm": 1.6367658376693726, - "learning_rate": 6.772612505569583e-07, - "loss": 0.0392, - "step": 65810 - }, - { - "epoch": 4.887865735927521, - "grad_norm": 0.9300660490989685, - "learning_rate": 6.72805584434873e-07, - "loss": 0.0769, - "step": 65820 - }, - { - "epoch": 4.888608346947869, - "grad_norm": 0.5818589329719543, - "learning_rate": 6.683499183127877e-07, - "loss": 0.0558, - "step": 65830 - }, - { - "epoch": 4.889350957968216, - "grad_norm": 2.5883166790008545, - "learning_rate": 6.638942521907026e-07, - "loss": 0.0754, - "step": 65840 - }, - { - "epoch": 4.890093568988564, - "grad_norm": 2.193958282470703, - "learning_rate": 6.594385860686173e-07, - "loss": 0.0583, - "step": 65850 - }, - { - "epoch": 4.890836180008911, - "grad_norm": 1.7549993991851807, - "learning_rate": 6.54982919946532e-07, - "loss": 0.0608, - "step": 65860 - }, - { - "epoch": 4.891578791029259, - "grad_norm": 1.5535106658935547, - "learning_rate": 6.505272538244468e-07, - "loss": 0.0469, - "step": 65870 - }, - { - "epoch": 4.892321402049607, - "grad_norm": 4.540996074676514, - "learning_rate": 6.460715877023615e-07, - "loss": 0.0707, - "step": 65880 - }, - { - "epoch": 4.893064013069954, - "grad_norm": 0.570190966129303, - "learning_rate": 6.416159215802762e-07, - "loss": 0.0562, - "step": 65890 - }, - { - "epoch": 4.893806624090302, - "grad_norm": 2.189643621444702, - "learning_rate": 6.371602554581911e-07, - "loss": 0.0557, - "step": 65900 - }, - { - "epoch": 4.894549235110649, - "grad_norm": 1.114089846611023, - "learning_rate": 6.327045893361058e-07, - "loss": 0.0915, - "step": 65910 - }, - { - "epoch": 4.895291846130997, - "grad_norm": 1.200714349746704, - "learning_rate": 6.282489232140205e-07, - "loss": 0.037, - "step": 65920 - }, - { - "epoch": 4.8960344571513446, - "grad_norm": 1.7422734498977661, - "learning_rate": 6.237932570919352e-07, - "loss": 0.0929, - "step": 65930 - }, - { - "epoch": 4.896777068171692, - "grad_norm": 1.0818594694137573, - "learning_rate": 6.1933759096985e-07, - "loss": 0.0399, - "step": 65940 - }, - { - "epoch": 4.8975196791920395, - "grad_norm": 0.6319842338562012, - "learning_rate": 6.148819248477648e-07, - "loss": 0.0436, - "step": 65950 - }, - { - "epoch": 4.8982622902123865, - "grad_norm": 1.0906407833099365, - "learning_rate": 6.104262587256795e-07, - "loss": 0.0474, - "step": 65960 - }, - { - "epoch": 4.899004901232734, - "grad_norm": 0.6139366626739502, - "learning_rate": 6.059705926035942e-07, - "loss": 0.052, - "step": 65970 - }, - { - "epoch": 4.899747512253082, - "grad_norm": 1.4248707294464111, - "learning_rate": 6.01514926481509e-07, - "loss": 0.0459, - "step": 65980 - }, - { - "epoch": 4.900490123273429, - "grad_norm": 0.5179479122161865, - "learning_rate": 5.970592603594237e-07, - "loss": 0.0582, - "step": 65990 - }, - { - "epoch": 4.901232734293777, - "grad_norm": 4.057821273803711, - "learning_rate": 5.926035942373385e-07, - "loss": 0.0822, - "step": 66000 - }, - { - "epoch": 4.901975345314124, - "grad_norm": 0.7307072877883911, - "learning_rate": 5.881479281152533e-07, - "loss": 0.0771, - "step": 66010 - }, - { - "epoch": 4.902717956334472, - "grad_norm": 1.7318345308303833, - "learning_rate": 5.836922619931679e-07, - "loss": 0.0519, - "step": 66020 - }, - { - "epoch": 4.90346056735482, - "grad_norm": 0.3162935972213745, - "learning_rate": 5.792365958710828e-07, - "loss": 0.0516, - "step": 66030 - }, - { - "epoch": 4.904203178375167, - "grad_norm": 1.487998127937317, - "learning_rate": 5.747809297489975e-07, - "loss": 0.0676, - "step": 66040 - }, - { - "epoch": 4.904945789395515, - "grad_norm": 1.0163052082061768, - "learning_rate": 5.703252636269122e-07, - "loss": 0.0582, - "step": 66050 - }, - { - "epoch": 4.905688400415862, - "grad_norm": 2.9083852767944336, - "learning_rate": 5.65869597504827e-07, - "loss": 0.0487, - "step": 66060 - }, - { - "epoch": 4.90643101143621, - "grad_norm": 1.4686026573181152, - "learning_rate": 5.614139313827418e-07, - "loss": 0.0548, - "step": 66070 - }, - { - "epoch": 4.907173622456558, - "grad_norm": 2.3733696937561035, - "learning_rate": 5.569582652606564e-07, - "loss": 0.0789, - "step": 66080 - }, - { - "epoch": 4.907916233476905, - "grad_norm": 0.551871120929718, - "learning_rate": 5.525025991385712e-07, - "loss": 0.044, - "step": 66090 - }, - { - "epoch": 4.9086588444972525, - "grad_norm": 0.9300947189331055, - "learning_rate": 5.48046933016486e-07, - "loss": 0.0611, - "step": 66100 - }, - { - "epoch": 4.9094014555175995, - "grad_norm": 0.5513383746147156, - "learning_rate": 5.435912668944008e-07, - "loss": 0.0764, - "step": 66110 - }, - { - "epoch": 4.910144066537947, - "grad_norm": 1.1287389993667603, - "learning_rate": 5.391356007723155e-07, - "loss": 0.0416, - "step": 66120 - }, - { - "epoch": 4.910886677558295, - "grad_norm": 0.9969607591629028, - "learning_rate": 5.346799346502302e-07, - "loss": 0.0528, - "step": 66130 - }, - { - "epoch": 4.911629288578642, - "grad_norm": 1.272377371788025, - "learning_rate": 5.30224268528145e-07, - "loss": 0.0528, - "step": 66140 - }, - { - "epoch": 4.91237189959899, - "grad_norm": 0.5094819068908691, - "learning_rate": 5.257686024060597e-07, - "loss": 0.0744, - "step": 66150 - }, - { - "epoch": 4.913114510619337, - "grad_norm": 1.5360733270645142, - "learning_rate": 5.213129362839744e-07, - "loss": 0.0431, - "step": 66160 - }, - { - "epoch": 4.913857121639685, - "grad_norm": 2.1293842792510986, - "learning_rate": 5.168572701618893e-07, - "loss": 0.0791, - "step": 66170 - }, - { - "epoch": 4.914599732660033, - "grad_norm": 2.370560646057129, - "learning_rate": 5.124016040398039e-07, - "loss": 0.0784, - "step": 66180 - }, - { - "epoch": 4.91534234368038, - "grad_norm": 0.7912160158157349, - "learning_rate": 5.079459379177187e-07, - "loss": 0.0877, - "step": 66190 - }, - { - "epoch": 4.916084954700728, - "grad_norm": 2.006499767303467, - "learning_rate": 5.034902717956335e-07, - "loss": 0.0906, - "step": 66200 - }, - { - "epoch": 4.916827565721075, - "grad_norm": 2.1745078563690186, - "learning_rate": 4.990346056735482e-07, - "loss": 0.0558, - "step": 66210 - }, - { - "epoch": 4.917570176741423, - "grad_norm": 1.09943425655365, - "learning_rate": 4.94578939551463e-07, - "loss": 0.041, - "step": 66220 - }, - { - "epoch": 4.918312787761771, - "grad_norm": 1.0442047119140625, - "learning_rate": 4.901232734293777e-07, - "loss": 0.0659, - "step": 66230 - }, - { - "epoch": 4.919055398782118, - "grad_norm": 4.601158618927002, - "learning_rate": 4.856676073072924e-07, - "loss": 0.0584, - "step": 66240 - }, - { - "epoch": 4.919798009802466, - "grad_norm": 0.6347880363464355, - "learning_rate": 4.812119411852072e-07, - "loss": 0.0551, - "step": 66250 - }, - { - "epoch": 4.920540620822813, - "grad_norm": 1.1013524532318115, - "learning_rate": 4.76756275063122e-07, - "loss": 0.0558, - "step": 66260 - }, - { - "epoch": 4.9212832318431605, - "grad_norm": 1.1002392768859863, - "learning_rate": 4.723006089410367e-07, - "loss": 0.0531, - "step": 66270 - }, - { - "epoch": 4.922025842863508, - "grad_norm": 1.1361026763916016, - "learning_rate": 4.6784494281895144e-07, - "loss": 0.0227, - "step": 66280 - }, - { - "epoch": 4.922768453883855, - "grad_norm": 0.28624916076660156, - "learning_rate": 4.633892766968662e-07, - "loss": 0.0618, - "step": 66290 - }, - { - "epoch": 4.923511064904203, - "grad_norm": 0.3221977651119232, - "learning_rate": 4.589336105747809e-07, - "loss": 0.0617, - "step": 66300 - }, - { - "epoch": 4.92425367592455, - "grad_norm": 2.581843137741089, - "learning_rate": 4.544779444526957e-07, - "loss": 0.046, - "step": 66310 - }, - { - "epoch": 4.924996286944898, - "grad_norm": 1.0967390537261963, - "learning_rate": 4.5002227833061047e-07, - "loss": 0.0466, - "step": 66320 - }, - { - "epoch": 4.925738897965246, - "grad_norm": 2.719881296157837, - "learning_rate": 4.455666122085252e-07, - "loss": 0.0515, - "step": 66330 - }, - { - "epoch": 4.926481508985593, - "grad_norm": 1.5159376859664917, - "learning_rate": 4.411109460864399e-07, - "loss": 0.0527, - "step": 66340 - }, - { - "epoch": 4.927224120005941, - "grad_norm": 1.2129600048065186, - "learning_rate": 4.3665527996435465e-07, - "loss": 0.0918, - "step": 66350 - }, - { - "epoch": 4.927966731026288, - "grad_norm": 3.8259670734405518, - "learning_rate": 4.3219961384226944e-07, - "loss": 0.0785, - "step": 66360 - }, - { - "epoch": 4.928709342046636, - "grad_norm": 0.3441977798938751, - "learning_rate": 4.277439477201842e-07, - "loss": 0.0542, - "step": 66370 - }, - { - "epoch": 4.929451953066984, - "grad_norm": 0.8108426332473755, - "learning_rate": 4.232882815980989e-07, - "loss": 0.0517, - "step": 66380 - }, - { - "epoch": 4.930194564087331, - "grad_norm": 3.1225900650024414, - "learning_rate": 4.188326154760137e-07, - "loss": 0.0525, - "step": 66390 - }, - { - "epoch": 4.930937175107679, - "grad_norm": 2.4837539196014404, - "learning_rate": 4.1437694935392846e-07, - "loss": 0.0445, - "step": 66400 - }, - { - "epoch": 4.931679786128026, - "grad_norm": 0.961150050163269, - "learning_rate": 4.0992128323184314e-07, - "loss": 0.0613, - "step": 66410 - }, - { - "epoch": 4.932422397148374, - "grad_norm": 0.4817768931388855, - "learning_rate": 4.054656171097579e-07, - "loss": 0.0348, - "step": 66420 - }, - { - "epoch": 4.9331650081687215, - "grad_norm": 3.031409502029419, - "learning_rate": 4.0100995098767265e-07, - "loss": 0.0548, - "step": 66430 - }, - { - "epoch": 4.9339076191890685, - "grad_norm": 0.8737612366676331, - "learning_rate": 3.9655428486558743e-07, - "loss": 0.0305, - "step": 66440 - }, - { - "epoch": 4.934650230209416, - "grad_norm": 1.2402093410491943, - "learning_rate": 3.9209861874350216e-07, - "loss": 0.0571, - "step": 66450 - }, - { - "epoch": 4.935392841229763, - "grad_norm": 1.8115334510803223, - "learning_rate": 3.876429526214169e-07, - "loss": 0.037, - "step": 66460 - }, - { - "epoch": 4.936135452250111, - "grad_norm": 0.969933271408081, - "learning_rate": 3.8318728649933167e-07, - "loss": 0.0609, - "step": 66470 - }, - { - "epoch": 4.936878063270459, - "grad_norm": 1.2570370435714722, - "learning_rate": 3.787316203772464e-07, - "loss": 0.0489, - "step": 66480 - }, - { - "epoch": 4.937620674290806, - "grad_norm": 0.9259024262428284, - "learning_rate": 3.7427595425516113e-07, - "loss": 0.0624, - "step": 66490 - }, - { - "epoch": 4.938363285311154, - "grad_norm": 1.2812902927398682, - "learning_rate": 3.698202881330759e-07, - "loss": 0.0582, - "step": 66500 - }, - { - "epoch": 4.939105896331501, - "grad_norm": 1.1236120462417603, - "learning_rate": 3.6536462201099064e-07, - "loss": 0.0588, - "step": 66510 - }, - { - "epoch": 4.939848507351849, - "grad_norm": 2.4268791675567627, - "learning_rate": 3.6090895588890543e-07, - "loss": 0.0463, - "step": 66520 - }, - { - "epoch": 4.940591118372197, - "grad_norm": 1.1556020975112915, - "learning_rate": 3.564532897668201e-07, - "loss": 0.0474, - "step": 66530 - }, - { - "epoch": 4.941333729392544, - "grad_norm": 0.47208043932914734, - "learning_rate": 3.519976236447349e-07, - "loss": 0.054, - "step": 66540 - }, - { - "epoch": 4.942076340412892, - "grad_norm": 2.0012338161468506, - "learning_rate": 3.4754195752264967e-07, - "loss": 0.0446, - "step": 66550 - }, - { - "epoch": 4.942818951433239, - "grad_norm": 6.049137592315674, - "learning_rate": 3.430862914005644e-07, - "loss": 0.0627, - "step": 66560 - }, - { - "epoch": 4.943561562453587, - "grad_norm": 1.165906548500061, - "learning_rate": 3.3863062527847913e-07, - "loss": 0.067, - "step": 66570 - }, - { - "epoch": 4.944304173473935, - "grad_norm": 2.933342218399048, - "learning_rate": 3.3417495915639386e-07, - "loss": 0.0588, - "step": 66580 - }, - { - "epoch": 4.945046784494282, - "grad_norm": 0.5011084675788879, - "learning_rate": 3.2971929303430864e-07, - "loss": 0.0507, - "step": 66590 - }, - { - "epoch": 4.9457893955146295, - "grad_norm": 0.8596967458724976, - "learning_rate": 3.252636269122234e-07, - "loss": 0.0707, - "step": 66600 - }, - { - "epoch": 4.9465320065349765, - "grad_norm": 2.5871288776397705, - "learning_rate": 3.208079607901381e-07, - "loss": 0.0587, - "step": 66610 - }, - { - "epoch": 4.947274617555324, - "grad_norm": 0.5067526698112488, - "learning_rate": 3.163522946680529e-07, - "loss": 0.0519, - "step": 66620 - }, - { - "epoch": 4.948017228575672, - "grad_norm": 0.439694344997406, - "learning_rate": 3.118966285459676e-07, - "loss": 0.0436, - "step": 66630 - }, - { - "epoch": 4.948759839596019, - "grad_norm": 2.3128857612609863, - "learning_rate": 3.074409624238824e-07, - "loss": 0.0436, - "step": 66640 - }, - { - "epoch": 4.949502450616367, - "grad_norm": 1.841361403465271, - "learning_rate": 3.029852963017971e-07, - "loss": 0.0492, - "step": 66650 - }, - { - "epoch": 4.950245061636715, - "grad_norm": 1.778883457183838, - "learning_rate": 2.9852963017971185e-07, - "loss": 0.038, - "step": 66660 - }, - { - "epoch": 4.950987672657062, - "grad_norm": 1.8489772081375122, - "learning_rate": 2.9407396405762663e-07, - "loss": 0.0729, - "step": 66670 - }, - { - "epoch": 4.95173028367741, - "grad_norm": 2.0135104656219482, - "learning_rate": 2.896182979355414e-07, - "loss": 0.0562, - "step": 66680 - }, - { - "epoch": 4.952472894697757, - "grad_norm": 0.24336954951286316, - "learning_rate": 2.851626318134561e-07, - "loss": 0.0569, - "step": 66690 - }, - { - "epoch": 4.953215505718105, - "grad_norm": 0.4141107201576233, - "learning_rate": 2.807069656913709e-07, - "loss": 0.0319, - "step": 66700 - }, - { - "epoch": 4.953958116738453, - "grad_norm": 0.26303309202194214, - "learning_rate": 2.762512995692856e-07, - "loss": 0.0629, - "step": 66710 - }, - { - "epoch": 4.9547007277588, - "grad_norm": 0.37212514877319336, - "learning_rate": 2.717956334472004e-07, - "loss": 0.0382, - "step": 66720 - }, - { - "epoch": 4.955443338779148, - "grad_norm": 2.371757745742798, - "learning_rate": 2.673399673251151e-07, - "loss": 0.093, - "step": 66730 - }, - { - "epoch": 4.956185949799495, - "grad_norm": 2.2808210849761963, - "learning_rate": 2.6288430120302985e-07, - "loss": 0.0723, - "step": 66740 - }, - { - "epoch": 4.956928560819843, - "grad_norm": 2.8832616806030273, - "learning_rate": 2.5842863508094463e-07, - "loss": 0.0392, - "step": 66750 - }, - { - "epoch": 4.9576711718401905, - "grad_norm": 0.5029025077819824, - "learning_rate": 2.5397296895885936e-07, - "loss": 0.0486, - "step": 66760 - }, - { - "epoch": 4.9584137828605375, - "grad_norm": 2.287649154663086, - "learning_rate": 2.495173028367741e-07, - "loss": 0.0458, - "step": 66770 - }, - { - "epoch": 4.959156393880885, - "grad_norm": 2.1999528408050537, - "learning_rate": 2.4506163671468887e-07, - "loss": 0.0567, - "step": 66780 - }, - { - "epoch": 4.959899004901232, - "grad_norm": 3.0611720085144043, - "learning_rate": 2.406059705926036e-07, - "loss": 0.0937, - "step": 66790 - }, - { - "epoch": 4.96064161592158, - "grad_norm": 2.270627737045288, - "learning_rate": 2.3615030447051836e-07, - "loss": 0.0653, - "step": 66800 - }, - { - "epoch": 4.961384226941928, - "grad_norm": 0.9296445250511169, - "learning_rate": 2.316946383484331e-07, - "loss": 0.0645, - "step": 66810 - }, - { - "epoch": 4.962126837962275, - "grad_norm": 3.700183868408203, - "learning_rate": 2.2723897222634784e-07, - "loss": 0.0964, - "step": 66820 - }, - { - "epoch": 4.962869448982623, - "grad_norm": 1.576913595199585, - "learning_rate": 2.227833061042626e-07, - "loss": 0.0419, - "step": 66830 - }, - { - "epoch": 4.96361206000297, - "grad_norm": 3.2986557483673096, - "learning_rate": 2.1832763998217733e-07, - "loss": 0.0588, - "step": 66840 - }, - { - "epoch": 4.964354671023318, - "grad_norm": 1.3685272932052612, - "learning_rate": 2.138719738600921e-07, - "loss": 0.0492, - "step": 66850 - }, - { - "epoch": 4.965097282043666, - "grad_norm": 0.872087836265564, - "learning_rate": 2.0941630773800684e-07, - "loss": 0.0318, - "step": 66860 - }, - { - "epoch": 4.965839893064013, - "grad_norm": 0.4379057288169861, - "learning_rate": 2.0496064161592157e-07, - "loss": 0.0529, - "step": 66870 - }, - { - "epoch": 4.966582504084361, - "grad_norm": 1.2920769453048706, - "learning_rate": 2.0050497549383632e-07, - "loss": 0.0887, - "step": 66880 - }, - { - "epoch": 4.967325115104709, - "grad_norm": 1.0842275619506836, - "learning_rate": 1.9604930937175108e-07, - "loss": 0.0444, - "step": 66890 - }, - { - "epoch": 4.968067726125056, - "grad_norm": 1.154530644416809, - "learning_rate": 1.9159364324966584e-07, - "loss": 0.0532, - "step": 66900 - }, - { - "epoch": 4.9688103371454035, - "grad_norm": 0.3080504834651947, - "learning_rate": 1.8713797712758057e-07, - "loss": 0.0344, - "step": 66910 - }, - { - "epoch": 4.9695529481657505, - "grad_norm": 2.1118993759155273, - "learning_rate": 1.8268231100549532e-07, - "loss": 0.063, - "step": 66920 - }, - { - "epoch": 4.9702955591860984, - "grad_norm": 3.921952486038208, - "learning_rate": 1.7822664488341005e-07, - "loss": 0.0543, - "step": 66930 - }, - { - "epoch": 4.971038170206446, - "grad_norm": 1.0275546312332153, - "learning_rate": 1.7377097876132483e-07, - "loss": 0.0334, - "step": 66940 - }, - { - "epoch": 4.971780781226793, - "grad_norm": 3.7615277767181396, - "learning_rate": 1.6931531263923956e-07, - "loss": 0.0489, - "step": 66950 - }, - { - "epoch": 4.972523392247141, - "grad_norm": 2.601187229156494, - "learning_rate": 1.6485964651715432e-07, - "loss": 0.0696, - "step": 66960 - }, - { - "epoch": 4.973266003267488, - "grad_norm": 2.8414504528045654, - "learning_rate": 1.6040398039506905e-07, - "loss": 0.0683, - "step": 66970 - }, - { - "epoch": 4.974008614287836, - "grad_norm": 0.3260228633880615, - "learning_rate": 1.559483142729838e-07, - "loss": 0.0238, - "step": 66980 - }, - { - "epoch": 4.974751225308184, - "grad_norm": 0.6493045091629028, - "learning_rate": 1.5149264815089856e-07, - "loss": 0.0541, - "step": 66990 - }, - { - "epoch": 4.975493836328531, - "grad_norm": 1.2345525026321411, - "learning_rate": 1.4703698202881332e-07, - "loss": 0.0544, - "step": 67000 - }, - { - "epoch": 4.976236447348879, - "grad_norm": 1.9710088968276978, - "learning_rate": 1.4258131590672805e-07, - "loss": 0.0466, - "step": 67010 - }, - { - "epoch": 4.976979058369226, - "grad_norm": 0.2602188289165497, - "learning_rate": 1.381256497846428e-07, - "loss": 0.0513, - "step": 67020 - }, - { - "epoch": 4.977721669389574, - "grad_norm": 2.1740708351135254, - "learning_rate": 1.3366998366255756e-07, - "loss": 0.0645, - "step": 67030 - }, - { - "epoch": 4.978464280409922, - "grad_norm": 2.4682114124298096, - "learning_rate": 1.2921431754047231e-07, - "loss": 0.0687, - "step": 67040 - }, - { - "epoch": 4.979206891430269, - "grad_norm": 0.23082710802555084, - "learning_rate": 1.2475865141838704e-07, - "loss": 0.0425, - "step": 67050 - }, - { - "epoch": 4.979949502450617, - "grad_norm": 0.2882836163043976, - "learning_rate": 1.203029852963018e-07, - "loss": 0.0423, - "step": 67060 - }, - { - "epoch": 4.980692113470964, - "grad_norm": 1.1012338399887085, - "learning_rate": 1.1584731917421656e-07, - "loss": 0.0695, - "step": 67070 - }, - { - "epoch": 4.9814347244913115, - "grad_norm": 1.7664963006973267, - "learning_rate": 1.113916530521313e-07, - "loss": 0.051, - "step": 67080 - }, - { - "epoch": 4.982177335511659, - "grad_norm": 3.3410887718200684, - "learning_rate": 1.0693598693004605e-07, - "loss": 0.0547, - "step": 67090 - }, - { - "epoch": 4.982919946532006, - "grad_norm": 1.7740800380706787, - "learning_rate": 1.0248032080796078e-07, - "loss": 0.0407, - "step": 67100 - }, - { - "epoch": 4.983662557552354, - "grad_norm": 1.093974232673645, - "learning_rate": 9.802465468587554e-08, - "loss": 0.0456, - "step": 67110 - }, - { - "epoch": 4.984405168572701, - "grad_norm": 1.4569469690322876, - "learning_rate": 9.356898856379028e-08, - "loss": 0.0691, - "step": 67120 - }, - { - "epoch": 4.985147779593049, - "grad_norm": 2.364649534225464, - "learning_rate": 8.911332244170503e-08, - "loss": 0.0769, - "step": 67130 - }, - { - "epoch": 4.985890390613397, - "grad_norm": 0.6497974991798401, - "learning_rate": 8.465765631961978e-08, - "loss": 0.0882, - "step": 67140 - }, - { - "epoch": 4.986633001633744, - "grad_norm": 1.16084623336792, - "learning_rate": 8.020199019753452e-08, - "loss": 0.0376, - "step": 67150 - }, - { - "epoch": 4.987375612654092, - "grad_norm": 2.3974056243896484, - "learning_rate": 7.574632407544928e-08, - "loss": 0.03, - "step": 67160 - }, - { - "epoch": 4.988118223674439, - "grad_norm": 0.8052327036857605, - "learning_rate": 7.129065795336402e-08, - "loss": 0.0542, - "step": 67170 - }, - { - "epoch": 4.988860834694787, - "grad_norm": 0.5777415037155151, - "learning_rate": 6.683499183127878e-08, - "loss": 0.0494, - "step": 67180 - }, - { - "epoch": 4.989603445715135, - "grad_norm": 1.4238885641098022, - "learning_rate": 6.237932570919352e-08, - "loss": 0.0537, - "step": 67190 - }, - { - "epoch": 4.990346056735482, - "grad_norm": 3.352360963821411, - "learning_rate": 5.792365958710828e-08, - "loss": 0.0523, - "step": 67200 - }, - { - "epoch": 4.99108866775583, - "grad_norm": 1.1126995086669922, - "learning_rate": 5.346799346502303e-08, - "loss": 0.05, - "step": 67210 - }, - { - "epoch": 4.991831278776177, - "grad_norm": 1.2887812852859497, - "learning_rate": 4.901232734293777e-08, - "loss": 0.0496, - "step": 67220 - }, - { - "epoch": 4.992573889796525, - "grad_norm": 4.443135738372803, - "learning_rate": 4.455666122085251e-08, - "loss": 0.0526, - "step": 67230 - }, - { - "epoch": 4.9933165008168725, - "grad_norm": 1.4799822568893433, - "learning_rate": 4.010099509876726e-08, - "loss": 0.0601, - "step": 67240 - }, - { - "epoch": 4.9940591118372195, - "grad_norm": 0.5479670166969299, - "learning_rate": 3.564532897668201e-08, - "loss": 0.0607, - "step": 67250 - }, - { - "epoch": 4.994801722857567, - "grad_norm": 2.0314948558807373, - "learning_rate": 3.118966285459676e-08, - "loss": 0.0603, - "step": 67260 - }, - { - "epoch": 4.995544333877914, - "grad_norm": 0.9696687459945679, - "learning_rate": 2.6733996732511514e-08, - "loss": 0.0572, - "step": 67270 - }, - { - "epoch": 4.996286944898262, - "grad_norm": 1.398507833480835, - "learning_rate": 2.2278330610426256e-08, - "loss": 0.0886, - "step": 67280 - }, - { - "epoch": 4.99702955591861, - "grad_norm": 1.3897038698196411, - "learning_rate": 1.7822664488341006e-08, - "loss": 0.0742, - "step": 67290 - }, - { - "epoch": 4.997772166938957, - "grad_norm": 1.0769445896148682, - "learning_rate": 1.3366998366255757e-08, - "loss": 0.0252, - "step": 67300 - }, - { - "epoch": 4.998514777959305, - "grad_norm": 2.8286406993865967, - "learning_rate": 8.911332244170503e-09, - "loss": 0.05, - "step": 67310 - }, - { - "epoch": 4.999257388979652, - "grad_norm": 2.435650110244751, - "learning_rate": 4.4556661220852515e-09, - "loss": 0.0417, - "step": 67320 - }, - { - "epoch": 5.0, - "grad_norm": 6.435393333435059, - "learning_rate": 0.0, - "loss": 0.0683, - "step": 67330 - }, { "epoch": 5.0, - "eval_f1": 0.0, - "eval_loss": 0.05167969688773155, - "eval_runtime": 794.3074, - "eval_samples_per_second": 47.864, - "eval_steps_per_second": 2.993, - "step": 67330 + "eval_f1": 0.33031292965957215, + "eval_loss": 0.041273970156908035, + "eval_runtime": 1003.0923, + "eval_samples_per_second": 37.902, + "eval_steps_per_second": 0.593, + "step": 16835 }, { "epoch": 5.0, - "step": 67330, - "total_flos": 8.34383324330106e+19, - "train_loss": 0.0, - "train_runtime": 0.0747, - "train_samples_per_second": 14413399.944, - "train_steps_per_second": 900900.217 + "step": 16835, + "total_flos": 8.348297673446728e+19, + "train_loss": 0.062463992788064436, + "train_runtime": 18625.883, + "train_samples_per_second": 57.834, + "train_steps_per_second": 0.904 } ], "logging_steps": 10, - "max_steps": 67330, + "max_steps": 16835, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, @@ -47211,8 +11861,8 @@ "attributes": {} } }, - "total_flos": 8.34383324330106e+19, - "train_batch_size": 16, + "total_flos": 8.348297673446728e+19, + "train_batch_size": 64, "trial_name": null, "trial_params": null }