diff --git "a/trainer_state.json" "b/trainer_state.json" --- "a/trainer_state.json" +++ "b/trainer_state.json" @@ -1,7 +1,7 @@ { - "best_metric": 0.05167969688773155, - "best_model_checkpoint": "./microsoft_beit-base-patch16-224-pt22k-ft22k_epoch_5/checkpoint-67330", - "epoch": 5.0, + "best_metric": 0.05480470508337021, + "best_model_checkpoint": "./default_model/checkpoint-67330", + "epoch": 10.0, "eval_steps": 500, "global_step": 67330, "is_hyper_param_search": false, @@ -9,47195 +9,47240 @@ "is_world_process_zero": true, "log_history": [ { - "epoch": 0.0007426110203475419, - "grad_norm": 19.504955291748047, - "learning_rate": 2.9995544333877916e-05, - "loss": 0.4424, + "epoch": 0.0014852220406950838, + "grad_norm": 1.9820189476013184, + "learning_rate": 9.998514777959307e-06, + "loss": 0.3847, "step": 10 }, { - "epoch": 0.0014852220406950838, - "grad_norm": 5.601377964019775, - "learning_rate": 2.999108866775583e-05, - "loss": 0.238, + "epoch": 0.0029704440813901676, + "grad_norm": 1.3927024602890015, + "learning_rate": 9.99702955591861e-06, + "loss": 0.2215, "step": 20 }, { - "epoch": 0.002227833061042626, - "grad_norm": 8.971532821655273, - "learning_rate": 2.9986633001633746e-05, - "loss": 0.2289, + "epoch": 0.004455666122085252, + "grad_norm": 1.5467994213104248, + "learning_rate": 9.995544333877916e-06, + "loss": 0.1743, "step": 30 }, { - "epoch": 0.0029704440813901676, - "grad_norm": 3.3992252349853516, - "learning_rate": 2.998217733551166e-05, - "loss": 0.2005, + "epoch": 0.005940888162780335, + "grad_norm": 2.4779608249664307, + "learning_rate": 9.99405911183722e-06, + "loss": 0.1726, "step": 40 }, { - "epoch": 0.0037130551017377097, - "grad_norm": 13.932947158813477, - "learning_rate": 2.9977721669389572e-05, - "loss": 0.2329, + "epoch": 0.007426110203475419, + "grad_norm": 1.2798831462860107, + "learning_rate": 9.992573889796525e-06, + "loss": 0.1409, "step": 50 }, { - "epoch": 0.004455666122085252, - "grad_norm": 2.9520821571350098, - "learning_rate": 2.997326600326749e-05, - "loss": 0.2114, + "epoch": 0.008911332244170504, + "grad_norm": 1.459090232849121, + "learning_rate": 9.99108866775583e-06, + "loss": 0.1564, "step": 60 }, { - "epoch": 0.005198277142432793, - "grad_norm": 4.807623386383057, - "learning_rate": 2.9968810337145402e-05, - "loss": 0.1468, + "epoch": 0.010396554284865587, + "grad_norm": 1.5025994777679443, + "learning_rate": 9.989603445715135e-06, + "loss": 0.148, "step": 70 }, { - "epoch": 0.005940888162780335, - "grad_norm": 4.0346903800964355, - "learning_rate": 2.9964354671023317e-05, - "loss": 0.2179, + "epoch": 0.01188177632556067, + "grad_norm": 0.8423486948013306, + "learning_rate": 9.98811822367444e-06, + "loss": 0.1185, "step": 80 }, { - "epoch": 0.006683499183127877, - "grad_norm": 4.7647199630737305, - "learning_rate": 2.9959899004901236e-05, - "loss": 0.1647, + "epoch": 0.013366998366255755, + "grad_norm": 2.3561830520629883, + "learning_rate": 9.986633001633746e-06, + "loss": 0.1449, "step": 90 }, { - "epoch": 0.007426110203475419, - "grad_norm": 9.930302619934082, - "learning_rate": 2.9955443338779147e-05, - "loss": 0.1436, + "epoch": 0.014852220406950839, + "grad_norm": 1.7730501890182495, + "learning_rate": 9.98514777959305e-06, + "loss": 0.1161, "step": 100 }, { - "epoch": 0.008168721223822962, - "grad_norm": 2.7145912647247314, - "learning_rate": 2.9950987672657062e-05, - "loss": 0.1408, + "epoch": 0.016337442447645924, + "grad_norm": 4.120682716369629, + "learning_rate": 9.983662557552355e-06, + "loss": 0.1291, "step": 110 }, { - "epoch": 0.008911332244170504, - "grad_norm": 4.7143940925598145, - "learning_rate": 2.994653200653498e-05, - "loss": 0.1411, + "epoch": 0.01782266448834101, + "grad_norm": 1.2402255535125732, + "learning_rate": 9.98217733551166e-06, + "loss": 0.1095, "step": 120 }, { - "epoch": 0.009653943264518046, - "grad_norm": 5.331055641174316, - "learning_rate": 2.9942076340412892e-05, - "loss": 0.1392, + "epoch": 0.019307886529036093, + "grad_norm": 1.8307442665100098, + "learning_rate": 9.980692113470965e-06, + "loss": 0.118, "step": 130 }, { - "epoch": 0.010396554284865587, - "grad_norm": 4.96783447265625, - "learning_rate": 2.9937620674290807e-05, - "loss": 0.1772, + "epoch": 0.020793108569731173, + "grad_norm": 2.220118761062622, + "learning_rate": 9.97920689143027e-06, + "loss": 0.1311, "step": 140 }, { - "epoch": 0.011139165305213129, - "grad_norm": 3.5116193294525146, - "learning_rate": 2.9933165008168722e-05, - "loss": 0.111, + "epoch": 0.022278330610426257, + "grad_norm": 1.129960060119629, + "learning_rate": 9.977721669389576e-06, + "loss": 0.1183, "step": 150 }, { - "epoch": 0.01188177632556067, - "grad_norm": 3.521408796310425, - "learning_rate": 2.9928709342046637e-05, - "loss": 0.1418, + "epoch": 0.02376355265112134, + "grad_norm": 1.3577617406845093, + "learning_rate": 9.976236447348879e-06, + "loss": 0.1153, "step": 160 }, { - "epoch": 0.012624387345908213, - "grad_norm": 2.7895073890686035, - "learning_rate": 2.9924253675924552e-05, - "loss": 0.1338, + "epoch": 0.025248774691816425, + "grad_norm": 2.288347005844116, + "learning_rate": 9.974751225308185e-06, + "loss": 0.1126, "step": 170 }, { - "epoch": 0.013366998366255755, - "grad_norm": 5.244328022003174, - "learning_rate": 2.9919798009802464e-05, - "loss": 0.1252, + "epoch": 0.02673399673251151, + "grad_norm": 1.5640208721160889, + "learning_rate": 9.973266003267489e-06, + "loss": 0.1199, "step": 180 }, { - "epoch": 0.014109609386603297, - "grad_norm": 1.7632620334625244, - "learning_rate": 2.9915342343680382e-05, - "loss": 0.1122, + "epoch": 0.028219218773206593, + "grad_norm": 1.3363808393478394, + "learning_rate": 9.971780781226794e-06, + "loss": 0.109, "step": 190 }, { - "epoch": 0.014852220406950839, - "grad_norm": 3.5791783332824707, - "learning_rate": 2.9910886677558297e-05, - "loss": 0.1217, + "epoch": 0.029704440813901677, + "grad_norm": 1.5698567628860474, + "learning_rate": 9.9702955591861e-06, + "loss": 0.121, "step": 200 }, { - "epoch": 0.01559483142729838, - "grad_norm": 5.1304450035095215, - "learning_rate": 2.990643101143621e-05, - "loss": 0.1361, + "epoch": 0.03118966285459676, + "grad_norm": 1.7361061573028564, + "learning_rate": 9.968810337145404e-06, + "loss": 0.1346, "step": 210 }, { - "epoch": 0.016337442447645924, - "grad_norm": 5.422621250152588, - "learning_rate": 2.9901975345314124e-05, - "loss": 0.1404, + "epoch": 0.03267488489529185, + "grad_norm": 1.4597039222717285, + "learning_rate": 9.967325115104709e-06, + "loss": 0.1177, "step": 220 }, { - "epoch": 0.017080053467993465, - "grad_norm": 7.065879821777344, - "learning_rate": 2.9897519679192042e-05, - "loss": 0.1371, + "epoch": 0.03416010693598693, + "grad_norm": 0.9901100397109985, + "learning_rate": 9.965839893064015e-06, + "loss": 0.1083, "step": 230 }, { - "epoch": 0.01782266448834101, - "grad_norm": 2.3558316230773926, - "learning_rate": 2.9893064013069954e-05, - "loss": 0.1386, + "epoch": 0.03564532897668202, + "grad_norm": 1.2590155601501465, + "learning_rate": 9.964354671023319e-06, + "loss": 0.1275, "step": 240 }, { - "epoch": 0.01856527550868855, - "grad_norm": 1.8473409414291382, - "learning_rate": 2.988860834694787e-05, - "loss": 0.166, + "epoch": 0.0371305510173771, + "grad_norm": 1.403277039527893, + "learning_rate": 9.962869448982624e-06, + "loss": 0.125, "step": 250 }, { - "epoch": 0.019307886529036093, - "grad_norm": 4.502384662628174, - "learning_rate": 2.9884152680825784e-05, - "loss": 0.1241, + "epoch": 0.038615773058072185, + "grad_norm": 0.7854731678962708, + "learning_rate": 9.961384226941928e-06, + "loss": 0.1176, "step": 260 }, { - "epoch": 0.020050497549383633, - "grad_norm": 1.857735514640808, - "learning_rate": 2.98796970147037e-05, - "loss": 0.1369, + "epoch": 0.040100995098767266, + "grad_norm": 1.5352309942245483, + "learning_rate": 9.959899004901234e-06, + "loss": 0.1202, "step": 270 }, { - "epoch": 0.020793108569731173, - "grad_norm": 6.109134674072266, - "learning_rate": 2.9875241348581614e-05, - "loss": 0.1139, + "epoch": 0.041586217139462346, + "grad_norm": 2.2147843837738037, + "learning_rate": 9.958413782860539e-06, + "loss": 0.1113, "step": 280 }, { - "epoch": 0.021535719590078717, - "grad_norm": 3.0833802223205566, - "learning_rate": 2.987078568245953e-05, - "loss": 0.1203, + "epoch": 0.043071439180157434, + "grad_norm": 2.406499147415161, + "learning_rate": 9.956928560819843e-06, + "loss": 0.1182, "step": 290 }, { - "epoch": 0.022278330610426257, - "grad_norm": 2.37730073928833, - "learning_rate": 2.9866330016337444e-05, - "loss": 0.12, + "epoch": 0.044556661220852514, + "grad_norm": 0.9340627193450928, + "learning_rate": 9.955443338779149e-06, + "loss": 0.1318, "step": 300 }, { - "epoch": 0.0230209416307738, - "grad_norm": 2.950272798538208, - "learning_rate": 2.986187435021536e-05, - "loss": 0.1139, + "epoch": 0.0460418832615476, + "grad_norm": 1.3280363082885742, + "learning_rate": 9.953958116738453e-06, + "loss": 0.1009, "step": 310 }, { - "epoch": 0.02376355265112134, - "grad_norm": 4.4759521484375, - "learning_rate": 2.985741868409327e-05, - "loss": 0.146, + "epoch": 0.04752710530224268, + "grad_norm": 2.685227394104004, + "learning_rate": 9.952472894697758e-06, + "loss": 0.1327, "step": 320 }, { - "epoch": 0.024506163671468885, - "grad_norm": 1.08765709400177, - "learning_rate": 2.985296301797119e-05, - "loss": 0.1132, + "epoch": 0.04901232734293777, + "grad_norm": 2.1112072467803955, + "learning_rate": 9.950987672657062e-06, + "loss": 0.1405, "step": 330 }, { - "epoch": 0.025248774691816425, - "grad_norm": 2.5022499561309814, - "learning_rate": 2.9848507351849104e-05, - "loss": 0.0975, + "epoch": 0.05049754938363285, + "grad_norm": 1.7051947116851807, + "learning_rate": 9.949502450616368e-06, + "loss": 0.1002, "step": 340 }, { - "epoch": 0.02599138571216397, - "grad_norm": 6.558846950531006, - "learning_rate": 2.9844051685727015e-05, - "loss": 0.1128, + "epoch": 0.05198277142432794, + "grad_norm": 1.2203819751739502, + "learning_rate": 9.948017228575673e-06, + "loss": 0.1308, "step": 350 }, { - "epoch": 0.02673399673251151, - "grad_norm": 2.4449667930603027, - "learning_rate": 2.9839596019604934e-05, - "loss": 0.146, + "epoch": 0.05346799346502302, + "grad_norm": 1.344948172569275, + "learning_rate": 9.946532006534977e-06, + "loss": 0.1053, "step": 360 }, { - "epoch": 0.027476607752859053, - "grad_norm": 3.488586902618408, - "learning_rate": 2.9835140353482845e-05, - "loss": 0.1066, + "epoch": 0.054953215505718106, + "grad_norm": 1.8618378639221191, + "learning_rate": 9.945046784494282e-06, + "loss": 0.1331, "step": 370 }, { - "epoch": 0.028219218773206593, - "grad_norm": 3.4595577716827393, - "learning_rate": 2.983068468736076e-05, - "loss": 0.1295, + "epoch": 0.05643843754641319, + "grad_norm": 1.7152793407440186, + "learning_rate": 9.943561562453588e-06, + "loss": 0.1065, "step": 380 }, { - "epoch": 0.028961829793554137, - "grad_norm": 5.9359259605407715, - "learning_rate": 2.9826229021238675e-05, - "loss": 0.1566, + "epoch": 0.057923659587108274, + "grad_norm": 1.1750344038009644, + "learning_rate": 9.942076340412892e-06, + "loss": 0.1122, "step": 390 }, { - "epoch": 0.029704440813901677, - "grad_norm": 2.771470546722412, - "learning_rate": 2.982177335511659e-05, - "loss": 0.0896, + "epoch": 0.059408881627803355, + "grad_norm": 1.1157976388931274, + "learning_rate": 9.940591118372197e-06, + "loss": 0.0954, "step": 400 }, { - "epoch": 0.03044705183424922, - "grad_norm": 5.8616862297058105, - "learning_rate": 2.9817317688994505e-05, - "loss": 0.1435, + "epoch": 0.06089410366849844, + "grad_norm": 1.8320587873458862, + "learning_rate": 9.939105896331503e-06, + "loss": 0.1009, "step": 410 }, { - "epoch": 0.03118966285459676, - "grad_norm": 3.5416555404663086, - "learning_rate": 2.981286202287242e-05, - "loss": 0.133, + "epoch": 0.06237932570919352, + "grad_norm": 1.6388369798660278, + "learning_rate": 9.937620674290807e-06, + "loss": 0.1146, "step": 420 }, { - "epoch": 0.0319322738749443, - "grad_norm": 2.3944127559661865, - "learning_rate": 2.9808406356750335e-05, - "loss": 0.1329, + "epoch": 0.0638645477498886, + "grad_norm": 1.4120920896530151, + "learning_rate": 9.936135452250112e-06, + "loss": 0.1177, "step": 430 }, { - "epoch": 0.03267488489529185, - "grad_norm": 2.402383804321289, - "learning_rate": 2.980395069062825e-05, - "loss": 0.1159, + "epoch": 0.0653497697905837, + "grad_norm": 3.5797083377838135, + "learning_rate": 9.934650230209418e-06, + "loss": 0.0851, "step": 440 }, { - "epoch": 0.03341749591563939, - "grad_norm": 8.119304656982422, - "learning_rate": 2.9799495024506165e-05, - "loss": 0.0956, + "epoch": 0.06683499183127878, + "grad_norm": 1.3494925498962402, + "learning_rate": 9.933165008168722e-06, + "loss": 0.1109, "step": 450 }, { - "epoch": 0.03416010693598693, - "grad_norm": 7.150251865386963, - "learning_rate": 2.9795039358384077e-05, - "loss": 0.1152, + "epoch": 0.06832021387197386, + "grad_norm": 2.6755356788635254, + "learning_rate": 9.931679786128027e-06, + "loss": 0.0913, "step": 460 }, { - "epoch": 0.03490271795633447, - "grad_norm": 3.084035634994507, - "learning_rate": 2.9790583692261995e-05, - "loss": 0.0913, + "epoch": 0.06980543591266894, + "grad_norm": 2.111666440963745, + "learning_rate": 9.930194564087333e-06, + "loss": 0.1176, "step": 470 }, { - "epoch": 0.03564532897668202, - "grad_norm": 4.946183681488037, - "learning_rate": 2.9786128026139907e-05, - "loss": 0.1228, + "epoch": 0.07129065795336403, + "grad_norm": 1.5201246738433838, + "learning_rate": 9.928709342046636e-06, + "loss": 0.1142, "step": 480 }, { - "epoch": 0.03638793999702956, - "grad_norm": 1.6884194612503052, - "learning_rate": 2.9781672360017822e-05, - "loss": 0.1226, + "epoch": 0.07277587999405911, + "grad_norm": 1.7397887706756592, + "learning_rate": 9.927224120005942e-06, + "loss": 0.1295, "step": 490 }, { - "epoch": 0.0371305510173771, - "grad_norm": 3.5960988998413086, - "learning_rate": 2.977721669389574e-05, - "loss": 0.1086, + "epoch": 0.0742611020347542, + "grad_norm": 0.8776573538780212, + "learning_rate": 9.925738897965246e-06, + "loss": 0.1038, "step": 500 }, { - "epoch": 0.03787316203772464, - "grad_norm": 1.143905758857727, - "learning_rate": 2.9772761027773652e-05, - "loss": 0.1043, + "epoch": 0.07574632407544928, + "grad_norm": 0.9134131669998169, + "learning_rate": 9.92425367592455e-06, + "loss": 0.1046, "step": 510 }, { - "epoch": 0.038615773058072185, - "grad_norm": 5.153063774108887, - "learning_rate": 2.9768305361651567e-05, - "loss": 0.1263, + "epoch": 0.07723154611614437, + "grad_norm": 0.8088383674621582, + "learning_rate": 9.922768453883857e-06, + "loss": 0.1095, "step": 520 }, { - "epoch": 0.039358384078419725, - "grad_norm": 3.656158924102783, - "learning_rate": 2.9763849695529485e-05, - "loss": 0.1135, + "epoch": 0.07871676815683945, + "grad_norm": 1.253274917602539, + "learning_rate": 9.921283231843161e-06, + "loss": 0.1006, "step": 530 }, { - "epoch": 0.040100995098767266, - "grad_norm": 3.6122283935546875, - "learning_rate": 2.9759394029407397e-05, - "loss": 0.1087, + "epoch": 0.08020199019753453, + "grad_norm": 0.9179076552391052, + "learning_rate": 9.919798009802466e-06, + "loss": 0.0918, "step": 540 }, { - "epoch": 0.040843606119114806, - "grad_norm": 2.1035525798797607, - "learning_rate": 2.9754938363285312e-05, - "loss": 0.1246, + "epoch": 0.08168721223822961, + "grad_norm": 2.49874210357666, + "learning_rate": 9.918312787761772e-06, + "loss": 0.0909, "step": 550 }, { - "epoch": 0.041586217139462346, - "grad_norm": 4.143787384033203, - "learning_rate": 2.9750482697163227e-05, - "loss": 0.1212, + "epoch": 0.08317243427892469, + "grad_norm": 1.1081864833831787, + "learning_rate": 9.916827565721076e-06, + "loss": 0.0909, "step": 560 }, { - "epoch": 0.04232882815980989, - "grad_norm": 3.2385432720184326, - "learning_rate": 2.9746027031041142e-05, - "loss": 0.1181, + "epoch": 0.08465765631961979, + "grad_norm": 0.7937701940536499, + "learning_rate": 9.91534234368038e-06, + "loss": 0.1153, "step": 570 }, { - "epoch": 0.043071439180157434, - "grad_norm": 2.036144733428955, - "learning_rate": 2.9741571364919057e-05, - "loss": 0.1131, + "epoch": 0.08614287836031487, + "grad_norm": 1.2835909128189087, + "learning_rate": 9.913857121639687e-06, + "loss": 0.1001, "step": 580 }, { - "epoch": 0.043814050200504974, - "grad_norm": 8.29155158996582, - "learning_rate": 2.973711569879697e-05, - "loss": 0.1433, + "epoch": 0.08762810040100995, + "grad_norm": 2.2040748596191406, + "learning_rate": 9.912371899598991e-06, + "loss": 0.1065, "step": 590 }, { - "epoch": 0.044556661220852514, - "grad_norm": 4.493346691131592, - "learning_rate": 2.9732660032674887e-05, - "loss": 0.1083, + "epoch": 0.08911332244170503, + "grad_norm": 1.055566430091858, + "learning_rate": 9.910886677558296e-06, + "loss": 0.1175, "step": 600 }, { - "epoch": 0.04529927224120006, - "grad_norm": 2.184577465057373, - "learning_rate": 2.9728204366552802e-05, - "loss": 0.076, + "epoch": 0.09059854448240012, + "grad_norm": 0.8433722853660583, + "learning_rate": 9.909401455517602e-06, + "loss": 0.1111, "step": 610 }, { - "epoch": 0.0460418832615476, - "grad_norm": 5.727611064910889, - "learning_rate": 2.9723748700430713e-05, - "loss": 0.1295, + "epoch": 0.0920837665230952, + "grad_norm": 2.4051921367645264, + "learning_rate": 9.907916233476904e-06, + "loss": 0.1074, "step": 620 }, { - "epoch": 0.04678449428189514, - "grad_norm": 2.224817991256714, - "learning_rate": 2.971929303430863e-05, - "loss": 0.1438, + "epoch": 0.09356898856379028, + "grad_norm": 1.3250362873077393, + "learning_rate": 9.90643101143621e-06, + "loss": 0.1202, "step": 630 }, { - "epoch": 0.04752710530224268, - "grad_norm": 2.196302652359009, - "learning_rate": 2.9714837368186547e-05, - "loss": 0.1176, + "epoch": 0.09505421060448536, + "grad_norm": 1.2202482223510742, + "learning_rate": 9.904945789395515e-06, + "loss": 0.114, "step": 640 }, { - "epoch": 0.04826971632259023, - "grad_norm": 2.8282594680786133, - "learning_rate": 2.971038170206446e-05, - "loss": 0.1177, + "epoch": 0.09653943264518046, + "grad_norm": 1.8007020950317383, + "learning_rate": 9.90346056735482e-06, + "loss": 0.0916, "step": 650 }, { - "epoch": 0.04901232734293777, - "grad_norm": 3.0465710163116455, - "learning_rate": 2.9705926035942373e-05, - "loss": 0.1386, + "epoch": 0.09802465468587554, + "grad_norm": 1.4209966659545898, + "learning_rate": 9.901975345314126e-06, + "loss": 0.0859, "step": 660 }, { - "epoch": 0.04975493836328531, - "grad_norm": 2.1503002643585205, - "learning_rate": 2.9701470369820292e-05, - "loss": 0.1032, + "epoch": 0.09950987672657062, + "grad_norm": 2.0150651931762695, + "learning_rate": 9.90049012327343e-06, + "loss": 0.1268, "step": 670 }, { - "epoch": 0.05049754938363285, - "grad_norm": 3.2530624866485596, - "learning_rate": 2.9697014703698203e-05, - "loss": 0.1137, + "epoch": 0.1009950987672657, + "grad_norm": 1.1411141157150269, + "learning_rate": 9.899004901232734e-06, + "loss": 0.1089, "step": 680 }, { - "epoch": 0.0512401604039804, - "grad_norm": 4.812537670135498, - "learning_rate": 2.969255903757612e-05, - "loss": 0.1326, + "epoch": 0.1024803208079608, + "grad_norm": 2.234036922454834, + "learning_rate": 9.89751967919204e-06, + "loss": 0.1017, "step": 690 }, { - "epoch": 0.05198277142432794, - "grad_norm": 3.6558496952056885, - "learning_rate": 2.9688103371454033e-05, - "loss": 0.1269, + "epoch": 0.10396554284865588, + "grad_norm": 1.515994906425476, + "learning_rate": 9.896034457151345e-06, + "loss": 0.1002, "step": 700 }, { - "epoch": 0.05272538244467548, - "grad_norm": 2.3057334423065186, - "learning_rate": 2.968364770533195e-05, - "loss": 0.1299, + "epoch": 0.10545076488935096, + "grad_norm": 1.4167280197143555, + "learning_rate": 9.89454923511065e-06, + "loss": 0.0884, "step": 710 }, { - "epoch": 0.05346799346502302, - "grad_norm": 3.2395918369293213, - "learning_rate": 2.9679192039209863e-05, - "loss": 0.0952, + "epoch": 0.10693598693004604, + "grad_norm": 1.0119879245758057, + "learning_rate": 9.893064013069956e-06, + "loss": 0.0975, "step": 720 }, { - "epoch": 0.054210604485370566, - "grad_norm": 2.045339584350586, - "learning_rate": 2.9674736373087775e-05, - "loss": 0.1311, + "epoch": 0.10842120897074113, + "grad_norm": 0.941593587398529, + "learning_rate": 9.89157879102926e-06, + "loss": 0.0978, "step": 730 }, { - "epoch": 0.054953215505718106, - "grad_norm": 5.249059677124023, - "learning_rate": 2.9670280706965693e-05, - "loss": 0.1554, + "epoch": 0.10990643101143621, + "grad_norm": 3.35498309135437, + "learning_rate": 9.890093568988564e-06, + "loss": 0.111, "step": 740 }, { - "epoch": 0.055695826526065646, - "grad_norm": 2.609159469604492, - "learning_rate": 2.966582504084361e-05, - "loss": 0.094, + "epoch": 0.11139165305213129, + "grad_norm": 0.6840288043022156, + "learning_rate": 9.88860834694787e-06, + "loss": 0.085, "step": 750 }, { - "epoch": 0.05643843754641319, - "grad_norm": 6.062933444976807, - "learning_rate": 2.966136937472152e-05, - "loss": 0.1746, + "epoch": 0.11287687509282637, + "grad_norm": 1.4142742156982422, + "learning_rate": 9.887123124907175e-06, + "loss": 0.1101, "step": 760 }, { - "epoch": 0.057181048566760734, - "grad_norm": 1.9558234214782715, - "learning_rate": 2.965691370859944e-05, - "loss": 0.1119, + "epoch": 0.11436209713352147, + "grad_norm": 0.8787768483161926, + "learning_rate": 9.88563790286648e-06, + "loss": 0.0939, "step": 770 }, { - "epoch": 0.057923659587108274, - "grad_norm": 3.7589683532714844, - "learning_rate": 2.965245804247735e-05, - "loss": 0.1309, + "epoch": 0.11584731917421655, + "grad_norm": 1.280203104019165, + "learning_rate": 9.884152680825784e-06, + "loss": 0.0813, "step": 780 }, { - "epoch": 0.058666270607455814, - "grad_norm": 5.349353790283203, - "learning_rate": 2.9648002376355265e-05, - "loss": 0.1092, + "epoch": 0.11733254121491163, + "grad_norm": 1.72808837890625, + "learning_rate": 9.882667458785088e-06, + "loss": 0.1089, "step": 790 }, { - "epoch": 0.059408881627803355, - "grad_norm": 2.1258533000946045, - "learning_rate": 2.964354671023318e-05, - "loss": 0.0964, + "epoch": 0.11881776325560671, + "grad_norm": 0.944834291934967, + "learning_rate": 9.881182236744394e-06, + "loss": 0.1104, "step": 800 }, { - "epoch": 0.0601514926481509, - "grad_norm": 2.0149474143981934, - "learning_rate": 2.9639091044111095e-05, - "loss": 0.1101, + "epoch": 0.1203029852963018, + "grad_norm": 1.4327675104141235, + "learning_rate": 9.879697014703699e-06, + "loss": 0.1108, "step": 810 }, { - "epoch": 0.06089410366849844, - "grad_norm": 4.236600875854492, - "learning_rate": 2.963463537798901e-05, - "loss": 0.091, + "epoch": 0.12178820733699688, + "grad_norm": 0.7821674346923828, + "learning_rate": 9.878211792663003e-06, + "loss": 0.1136, "step": 820 }, { - "epoch": 0.06163671468884598, - "grad_norm": 1.9681212902069092, - "learning_rate": 2.9630179711866925e-05, - "loss": 0.1065, + "epoch": 0.12327342937769196, + "grad_norm": 0.8276019096374512, + "learning_rate": 9.87672657062231e-06, + "loss": 0.0694, "step": 830 }, { - "epoch": 0.06237932570919352, - "grad_norm": 3.0421035289764404, - "learning_rate": 2.962572404574484e-05, - "loss": 0.1165, + "epoch": 0.12475865141838705, + "grad_norm": 2.3967127799987793, + "learning_rate": 9.875241348581614e-06, + "loss": 0.0881, "step": 840 }, { - "epoch": 0.06312193672954107, - "grad_norm": 4.69833517074585, - "learning_rate": 2.9621268379622755e-05, - "loss": 0.1158, + "epoch": 0.12624387345908214, + "grad_norm": 1.1571794748306274, + "learning_rate": 9.873756126540918e-06, + "loss": 0.0886, "step": 850 }, { - "epoch": 0.0638645477498886, - "grad_norm": 1.8385733366012573, - "learning_rate": 2.961681271350067e-05, - "loss": 0.1073, + "epoch": 0.1277290954997772, + "grad_norm": 1.8005186319351196, + "learning_rate": 9.872270904500224e-06, + "loss": 0.1016, "step": 860 }, { - "epoch": 0.06460715877023615, - "grad_norm": 4.566705703735352, - "learning_rate": 2.961235704737858e-05, - "loss": 0.0994, + "epoch": 0.1292143175404723, + "grad_norm": 0.8932623863220215, + "learning_rate": 9.870785682459529e-06, + "loss": 0.0908, "step": 870 }, { - "epoch": 0.0653497697905837, - "grad_norm": 5.810744285583496, - "learning_rate": 2.96079013812565e-05, - "loss": 0.101, + "epoch": 0.1306995395811674, + "grad_norm": 1.1731626987457275, + "learning_rate": 9.869300460418833e-06, + "loss": 0.0887, "step": 880 }, { - "epoch": 0.06609238081093123, - "grad_norm": 2.1213254928588867, - "learning_rate": 2.960344571513441e-05, - "loss": 0.1196, + "epoch": 0.13218476162186246, + "grad_norm": 0.9981728196144104, + "learning_rate": 9.867815238378138e-06, + "loss": 0.0945, "step": 890 }, { - "epoch": 0.06683499183127878, - "grad_norm": 2.281233787536621, - "learning_rate": 2.9598990049012327e-05, - "loss": 0.0955, + "epoch": 0.13366998366255756, + "grad_norm": 0.8230689167976379, + "learning_rate": 9.866330016337444e-06, + "loss": 0.1331, "step": 900 }, { - "epoch": 0.06757760285162631, - "grad_norm": 1.4217268228530884, - "learning_rate": 2.9594534382890245e-05, - "loss": 0.1013, + "epoch": 0.13515520570325262, + "grad_norm": 0.873444676399231, + "learning_rate": 9.864844794296748e-06, + "loss": 0.0934, "step": 910 }, { - "epoch": 0.06832021387197386, - "grad_norm": 2.6054506301879883, - "learning_rate": 2.9590078716768157e-05, - "loss": 0.0866, + "epoch": 0.13664042774394772, + "grad_norm": 1.5538525581359863, + "learning_rate": 9.863359572256053e-06, + "loss": 0.0865, "step": 920 }, { - "epoch": 0.0690628248923214, - "grad_norm": 3.899290084838867, - "learning_rate": 2.958562305064607e-05, - "loss": 0.128, + "epoch": 0.1381256497846428, + "grad_norm": 1.2409332990646362, + "learning_rate": 9.861874350215357e-06, + "loss": 0.0838, "step": 930 }, { - "epoch": 0.06980543591266894, - "grad_norm": 1.7163687944412231, - "learning_rate": 2.958116738452399e-05, - "loss": 0.14, + "epoch": 0.13961087182533788, + "grad_norm": 1.0957996845245361, + "learning_rate": 9.860389128174663e-06, + "loss": 0.1009, "step": 940 }, { - "epoch": 0.07054804693301649, - "grad_norm": 2.1250195503234863, - "learning_rate": 2.95767117184019e-05, - "loss": 0.1174, + "epoch": 0.14109609386603297, + "grad_norm": 0.722710371017456, + "learning_rate": 9.858903906133968e-06, + "loss": 0.0902, "step": 950 }, { - "epoch": 0.07129065795336403, - "grad_norm": 3.302210569381714, - "learning_rate": 2.9572256052279816e-05, - "loss": 0.0919, + "epoch": 0.14258131590672807, + "grad_norm": 1.6766164302825928, + "learning_rate": 9.857418684093272e-06, + "loss": 0.1139, "step": 960 }, { - "epoch": 0.07203326897371157, - "grad_norm": 2.169238328933716, - "learning_rate": 2.956780038615773e-05, - "loss": 0.1573, + "epoch": 0.14406653794742313, + "grad_norm": 1.146504521369934, + "learning_rate": 9.855933462052578e-06, + "loss": 0.1004, "step": 970 }, { - "epoch": 0.07277587999405911, - "grad_norm": 5.784306049346924, - "learning_rate": 2.9563344720035646e-05, - "loss": 0.1171, + "epoch": 0.14555175998811823, + "grad_norm": 2.164172410964966, + "learning_rate": 9.854448240011883e-06, + "loss": 0.0968, "step": 980 }, { - "epoch": 0.07351849101440665, - "grad_norm": 1.108161449432373, - "learning_rate": 2.955888905391356e-05, - "loss": 0.1081, + "epoch": 0.1470369820288133, + "grad_norm": 1.636353611946106, + "learning_rate": 9.852963017971187e-06, + "loss": 0.1029, "step": 990 }, { - "epoch": 0.0742611020347542, - "grad_norm": 2.4193813800811768, - "learning_rate": 2.9554433387791473e-05, - "loss": 0.1057, + "epoch": 0.1485222040695084, + "grad_norm": 0.8112787008285522, + "learning_rate": 9.851477795930492e-06, + "loss": 0.1033, "step": 1000 }, { - "epoch": 0.07500371305510174, - "grad_norm": 0.32207611203193665, - "learning_rate": 2.954997772166939e-05, - "loss": 0.1128, + "epoch": 0.15000742611020348, + "grad_norm": 1.385244607925415, + "learning_rate": 9.849992573889798e-06, + "loss": 0.0872, "step": 1010 }, { - "epoch": 0.07574632407544928, - "grad_norm": 2.084397554397583, - "learning_rate": 2.9545522055547306e-05, - "loss": 0.0976, + "epoch": 0.15149264815089855, + "grad_norm": 2.009169816970825, + "learning_rate": 9.848507351849102e-06, + "loss": 0.0953, "step": 1020 }, { - "epoch": 0.07648893509579682, - "grad_norm": 4.024555683135986, - "learning_rate": 2.9541066389425218e-05, - "loss": 0.1344, + "epoch": 0.15297787019159365, + "grad_norm": 0.788632333278656, + "learning_rate": 9.847022129808407e-06, + "loss": 0.092, "step": 1030 }, { - "epoch": 0.07723154611614437, - "grad_norm": 1.9376616477966309, - "learning_rate": 2.9536610723303133e-05, - "loss": 0.0779, + "epoch": 0.15446309223228874, + "grad_norm": 1.4505847692489624, + "learning_rate": 9.845536907767713e-06, + "loss": 0.1187, "step": 1040 }, { - "epoch": 0.0779741571364919, - "grad_norm": 4.285502910614014, - "learning_rate": 2.953215505718105e-05, - "loss": 0.1116, + "epoch": 0.1559483142729838, + "grad_norm": 1.9600352048873901, + "learning_rate": 9.844051685727017e-06, + "loss": 0.0951, "step": 1050 }, { - "epoch": 0.07871676815683945, - "grad_norm": 2.065674066543579, - "learning_rate": 2.9527699391058963e-05, - "loss": 0.1212, + "epoch": 0.1574335363136789, + "grad_norm": 1.3148131370544434, + "learning_rate": 9.842566463686322e-06, + "loss": 0.1247, "step": 1060 }, { - "epoch": 0.07945937917718698, - "grad_norm": 3.7805116176605225, - "learning_rate": 2.9523243724936878e-05, - "loss": 0.1089, + "epoch": 0.15891875835437397, + "grad_norm": 0.83713698387146, + "learning_rate": 9.841081241645628e-06, + "loss": 0.0838, "step": 1070 }, { - "epoch": 0.08020199019753453, - "grad_norm": 2.0384578704833984, - "learning_rate": 2.9518788058814796e-05, - "loss": 0.0872, + "epoch": 0.16040398039506906, + "grad_norm": 0.9304842352867126, + "learning_rate": 9.839596019604932e-06, + "loss": 0.1138, "step": 1080 }, { - "epoch": 0.08094460121788208, - "grad_norm": 1.7026249170303345, - "learning_rate": 2.9514332392692708e-05, - "loss": 0.0844, + "epoch": 0.16188920243576416, + "grad_norm": 1.4497487545013428, + "learning_rate": 9.838110797564237e-06, + "loss": 0.0867, "step": 1090 }, { - "epoch": 0.08168721223822961, - "grad_norm": 2.1008737087249756, - "learning_rate": 2.9509876726570623e-05, - "loss": 0.0968, + "epoch": 0.16337442447645922, + "grad_norm": 1.3044580221176147, + "learning_rate": 9.836625575523541e-06, + "loss": 0.0913, "step": 1100 }, { - "epoch": 0.08242982325857716, - "grad_norm": 2.490581750869751, - "learning_rate": 2.9505421060448538e-05, - "loss": 0.0936, + "epoch": 0.16485964651715432, + "grad_norm": 0.8657674193382263, + "learning_rate": 9.835140353482845e-06, + "loss": 0.0907, "step": 1110 }, { - "epoch": 0.08317243427892469, - "grad_norm": 3.1089231967926025, - "learning_rate": 2.9500965394326453e-05, - "loss": 0.1072, + "epoch": 0.16634486855784938, + "grad_norm": 1.2274751663208008, + "learning_rate": 9.833655131442152e-06, + "loss": 0.1128, "step": 1120 }, { - "epoch": 0.08391504529927224, - "grad_norm": 5.446791172027588, - "learning_rate": 2.9496509728204368e-05, - "loss": 0.1201, + "epoch": 0.16783009059854448, + "grad_norm": 1.1105313301086426, + "learning_rate": 9.832169909401456e-06, + "loss": 0.1022, "step": 1130 }, { - "epoch": 0.08465765631961979, - "grad_norm": 2.201861619949341, - "learning_rate": 2.949205406208228e-05, - "loss": 0.1447, + "epoch": 0.16931531263923957, + "grad_norm": 1.0874226093292236, + "learning_rate": 9.83068468736076e-06, + "loss": 0.1068, "step": 1140 }, { - "epoch": 0.08540026733996732, - "grad_norm": 1.191215991973877, - "learning_rate": 2.9487598395960198e-05, - "loss": 0.1118, + "epoch": 0.17080053467993464, + "grad_norm": 0.47556501626968384, + "learning_rate": 9.829199465320067e-06, + "loss": 0.0902, "step": 1150 }, { - "epoch": 0.08614287836031487, - "grad_norm": 1.263275146484375, - "learning_rate": 2.9483142729838113e-05, - "loss": 0.0975, + "epoch": 0.17228575672062973, + "grad_norm": 1.3352073431015015, + "learning_rate": 9.827714243279371e-06, + "loss": 0.0985, "step": 1160 }, { - "epoch": 0.08688548938066241, - "grad_norm": 4.553534507751465, - "learning_rate": 2.9478687063716025e-05, - "loss": 0.112, + "epoch": 0.17377097876132483, + "grad_norm": 1.3669053316116333, + "learning_rate": 9.826229021238675e-06, + "loss": 0.0974, "step": 1170 }, { - "epoch": 0.08762810040100995, - "grad_norm": 2.4747018814086914, - "learning_rate": 2.9474231397593943e-05, - "loss": 0.1136, + "epoch": 0.1752562008020199, + "grad_norm": 1.1422746181488037, + "learning_rate": 9.824743799197982e-06, + "loss": 0.105, "step": 1180 }, { - "epoch": 0.0883707114213575, - "grad_norm": 1.9060287475585938, - "learning_rate": 2.9469775731471858e-05, - "loss": 0.1184, + "epoch": 0.176741422842715, + "grad_norm": 1.4345518350601196, + "learning_rate": 9.823258577157286e-06, + "loss": 0.0856, "step": 1190 }, { - "epoch": 0.08911332244170503, - "grad_norm": 2.2394371032714844, - "learning_rate": 2.946532006534977e-05, - "loss": 0.1119, + "epoch": 0.17822664488341006, + "grad_norm": 0.863542377948761, + "learning_rate": 9.82177335511659e-06, + "loss": 0.1197, "step": 1200 }, { - "epoch": 0.08985593346205258, - "grad_norm": 4.413462162017822, - "learning_rate": 2.9460864399227685e-05, - "loss": 0.1005, + "epoch": 0.17971186692410515, + "grad_norm": 1.3736563920974731, + "learning_rate": 9.820288133075897e-06, + "loss": 0.0897, "step": 1210 }, { - "epoch": 0.09059854448240012, - "grad_norm": 2.7055234909057617, - "learning_rate": 2.94564087331056e-05, - "loss": 0.1211, + "epoch": 0.18119708896480025, + "grad_norm": 0.5827713012695312, + "learning_rate": 9.8188029110352e-06, + "loss": 0.073, "step": 1220 }, { - "epoch": 0.09134115550274766, - "grad_norm": 4.200916290283203, - "learning_rate": 2.9451953066983515e-05, - "loss": 0.1064, + "epoch": 0.1826823110054953, + "grad_norm": 1.133959412574768, + "learning_rate": 9.817317688994505e-06, + "loss": 0.0877, "step": 1230 }, { - "epoch": 0.0920837665230952, - "grad_norm": 4.629003047943115, - "learning_rate": 2.944749740086143e-05, - "loss": 0.1465, + "epoch": 0.1841675330461904, + "grad_norm": 1.5258187055587769, + "learning_rate": 9.815832466953812e-06, + "loss": 0.0935, "step": 1240 }, { - "epoch": 0.09282637754344275, - "grad_norm": 2.249943494796753, - "learning_rate": 2.9443041734739345e-05, - "loss": 0.1019, + "epoch": 0.1856527550868855, + "grad_norm": 1.1951026916503906, + "learning_rate": 9.814347244913114e-06, + "loss": 0.0975, "step": 1250 }, { - "epoch": 0.09356898856379028, - "grad_norm": 2.5523922443389893, - "learning_rate": 2.943858606861726e-05, - "loss": 0.1458, + "epoch": 0.18713797712758057, + "grad_norm": 0.7857174277305603, + "learning_rate": 9.81286202287242e-06, + "loss": 0.0899, "step": 1260 }, { - "epoch": 0.09431159958413783, - "grad_norm": 1.662513017654419, - "learning_rate": 2.9434130402495175e-05, - "loss": 0.1219, + "epoch": 0.18862319916827566, + "grad_norm": 1.260588526725769, + "learning_rate": 9.811376800831725e-06, + "loss": 0.0998, "step": 1270 }, { - "epoch": 0.09505421060448536, - "grad_norm": 1.973795771598816, - "learning_rate": 2.9429674736373086e-05, - "loss": 0.1208, + "epoch": 0.19010842120897073, + "grad_norm": 0.8027825355529785, + "learning_rate": 9.80989157879103e-06, + "loss": 0.09, "step": 1280 }, { - "epoch": 0.09579682162483291, - "grad_norm": 2.5172524452209473, - "learning_rate": 2.9425219070251005e-05, - "loss": 0.0764, + "epoch": 0.19159364324966582, + "grad_norm": 1.722459316253662, + "learning_rate": 9.808406356750335e-06, + "loss": 0.097, "step": 1290 }, { - "epoch": 0.09653943264518046, - "grad_norm": 2.8811981678009033, - "learning_rate": 2.9420763404128916e-05, - "loss": 0.1142, + "epoch": 0.19307886529036092, + "grad_norm": 0.8458243012428284, + "learning_rate": 9.80692113470964e-06, + "loss": 0.0969, "step": 1300 }, { - "epoch": 0.09728204366552799, - "grad_norm": 2.752640962600708, - "learning_rate": 2.941630773800683e-05, - "loss": 0.0713, + "epoch": 0.19456408733105598, + "grad_norm": 0.9626701474189758, + "learning_rate": 9.805435912668944e-06, + "loss": 0.0942, "step": 1310 }, { - "epoch": 0.09802465468587554, - "grad_norm": 3.1258955001831055, - "learning_rate": 2.941185207188475e-05, - "loss": 0.1004, + "epoch": 0.19604930937175108, + "grad_norm": 0.7567682862281799, + "learning_rate": 9.80395069062825e-06, + "loss": 0.1092, "step": 1320 }, { - "epoch": 0.09876726570622309, - "grad_norm": 2.652444839477539, - "learning_rate": 2.940739640576266e-05, - "loss": 0.1075, + "epoch": 0.19753453141244617, + "grad_norm": 1.9160317182540894, + "learning_rate": 9.802465468587555e-06, + "loss": 0.0882, "step": 1330 }, { - "epoch": 0.09950987672657062, - "grad_norm": 1.6719880104064941, - "learning_rate": 2.9402940739640576e-05, - "loss": 0.1315, + "epoch": 0.19901975345314124, + "grad_norm": 1.1188064813613892, + "learning_rate": 9.80098024654686e-06, + "loss": 0.0983, "step": 1340 }, { - "epoch": 0.10025248774691817, - "grad_norm": 1.6493836641311646, - "learning_rate": 2.9398485073518494e-05, - "loss": 0.1261, + "epoch": 0.20050497549383633, + "grad_norm": 1.1348779201507568, + "learning_rate": 9.799495024506165e-06, + "loss": 0.1138, "step": 1350 }, { - "epoch": 0.1009950987672657, - "grad_norm": 3.3327760696411133, - "learning_rate": 2.9394029407396406e-05, - "loss": 0.1226, + "epoch": 0.2019901975345314, + "grad_norm": 0.7097839117050171, + "learning_rate": 9.79800980246547e-06, + "loss": 0.0989, "step": 1360 }, { - "epoch": 0.10173770978761325, - "grad_norm": 1.6937384605407715, - "learning_rate": 2.938957374127432e-05, - "loss": 0.1263, + "epoch": 0.2034754195752265, + "grad_norm": 1.01082444190979, + "learning_rate": 9.796524580424774e-06, + "loss": 0.0803, "step": 1370 }, { - "epoch": 0.1024803208079608, - "grad_norm": 1.859004259109497, - "learning_rate": 2.9385118075152236e-05, - "loss": 0.0915, + "epoch": 0.2049606416159216, + "grad_norm": 0.8688536286354065, + "learning_rate": 9.79503935838408e-06, + "loss": 0.1079, "step": 1380 }, { - "epoch": 0.10322293182830833, - "grad_norm": 2.384235382080078, - "learning_rate": 2.938066240903015e-05, - "loss": 0.1396, + "epoch": 0.20644586365661666, + "grad_norm": 0.9843276143074036, + "learning_rate": 9.793554136343383e-06, + "loss": 0.0917, "step": 1390 }, { - "epoch": 0.10396554284865588, - "grad_norm": 1.8067560195922852, - "learning_rate": 2.9376206742908066e-05, - "loss": 0.0948, + "epoch": 0.20793108569731175, + "grad_norm": 1.4539355039596558, + "learning_rate": 9.79206891430269e-06, + "loss": 0.1013, "step": 1400 }, { - "epoch": 0.10470815386900341, - "grad_norm": 0.9995975494384766, - "learning_rate": 2.9371751076785978e-05, - "loss": 0.0838, + "epoch": 0.20941630773800682, + "grad_norm": 0.6660147905349731, + "learning_rate": 9.790583692261994e-06, + "loss": 0.0806, "step": 1410 }, { - "epoch": 0.10545076488935096, - "grad_norm": 1.3114655017852783, - "learning_rate": 2.9367295410663896e-05, - "loss": 0.1145, + "epoch": 0.2109015297787019, + "grad_norm": 1.042614459991455, + "learning_rate": 9.789098470221298e-06, + "loss": 0.083, "step": 1420 }, { - "epoch": 0.1061933759096985, - "grad_norm": 2.0225460529327393, - "learning_rate": 2.936283974454181e-05, - "loss": 0.1128, + "epoch": 0.212386751819397, + "grad_norm": 1.052061676979065, + "learning_rate": 9.787613248180604e-06, + "loss": 0.1052, "step": 1430 }, { - "epoch": 0.10693598693004604, - "grad_norm": 2.146571159362793, - "learning_rate": 2.9358384078419723e-05, - "loss": 0.0799, + "epoch": 0.21387197386009207, + "grad_norm": 0.9673222303390503, + "learning_rate": 9.786128026139909e-06, + "loss": 0.0957, "step": 1440 }, { - "epoch": 0.10767859795039358, - "grad_norm": 4.288600921630859, - "learning_rate": 2.9353928412297638e-05, - "loss": 0.1249, + "epoch": 0.21535719590078717, + "grad_norm": 1.9790165424346924, + "learning_rate": 9.784642804099213e-06, + "loss": 0.0825, "step": 1450 }, { - "epoch": 0.10842120897074113, - "grad_norm": 1.9718669652938843, - "learning_rate": 2.9349472746175556e-05, - "loss": 0.1119, + "epoch": 0.21684241794148226, + "grad_norm": 1.3546462059020996, + "learning_rate": 9.78315758205852e-06, + "loss": 0.0858, "step": 1460 }, { - "epoch": 0.10916381999108866, - "grad_norm": 2.543238639831543, - "learning_rate": 2.9345017080053468e-05, - "loss": 0.113, + "epoch": 0.21832763998217733, + "grad_norm": 1.302915096282959, + "learning_rate": 9.781672360017824e-06, + "loss": 0.0983, "step": 1470 }, { - "epoch": 0.10990643101143621, - "grad_norm": 1.8163429498672485, - "learning_rate": 2.9340561413931383e-05, - "loss": 0.0969, + "epoch": 0.21981286202287242, + "grad_norm": 0.818485677242279, + "learning_rate": 9.780187137977128e-06, + "loss": 0.0979, "step": 1480 }, { - "epoch": 0.11064904203178375, - "grad_norm": 1.0760383605957031, - "learning_rate": 2.93361057478093e-05, - "loss": 0.0848, + "epoch": 0.2212980840635675, + "grad_norm": 1.091336965560913, + "learning_rate": 9.778701915936434e-06, + "loss": 0.0885, "step": 1490 }, { - "epoch": 0.11139165305213129, - "grad_norm": 0.9687877297401428, - "learning_rate": 2.9331650081687213e-05, - "loss": 0.0766, + "epoch": 0.22278330610426259, + "grad_norm": 2.4682865142822266, + "learning_rate": 9.777216693895739e-06, + "loss": 0.1194, "step": 1500 }, { - "epoch": 0.11213426407247884, - "grad_norm": 3.9466569423675537, - "learning_rate": 2.9327194415565128e-05, - "loss": 0.1187, + "epoch": 0.22426852814495768, + "grad_norm": 1.5554701089859009, + "learning_rate": 9.775731471855043e-06, + "loss": 0.1125, "step": 1510 }, { - "epoch": 0.11287687509282637, - "grad_norm": 4.158041477203369, - "learning_rate": 2.9322738749443043e-05, - "loss": 0.1327, + "epoch": 0.22575375018565275, + "grad_norm": 1.6344841718673706, + "learning_rate": 9.774246249814348e-06, + "loss": 0.1015, "step": 1520 }, { - "epoch": 0.11361948611317392, - "grad_norm": 4.5801591873168945, - "learning_rate": 2.9318283083320958e-05, - "loss": 0.0884, + "epoch": 0.22723897222634784, + "grad_norm": 1.3307982683181763, + "learning_rate": 9.772761027773654e-06, + "loss": 0.0928, "step": 1530 }, { - "epoch": 0.11436209713352147, - "grad_norm": 4.984243392944336, - "learning_rate": 2.9313827417198873e-05, - "loss": 0.1085, + "epoch": 0.22872419426704294, + "grad_norm": 1.5012156963348389, + "learning_rate": 9.771275805732958e-06, + "loss": 0.0976, "step": 1540 }, { - "epoch": 0.115104708153869, - "grad_norm": 3.010652780532837, - "learning_rate": 2.9309371751076784e-05, - "loss": 0.0696, + "epoch": 0.230209416307738, + "grad_norm": 1.1061965227127075, + "learning_rate": 9.769790583692263e-06, + "loss": 0.0881, "step": 1550 }, { - "epoch": 0.11584731917421655, - "grad_norm": 2.6272575855255127, - "learning_rate": 2.9304916084954703e-05, - "loss": 0.0979, + "epoch": 0.2316946383484331, + "grad_norm": 0.9800447225570679, + "learning_rate": 9.768305361651567e-06, + "loss": 0.094, "step": 1560 }, { - "epoch": 0.11658993019456408, - "grad_norm": 5.034517765045166, - "learning_rate": 2.9300460418832618e-05, - "loss": 0.0946, + "epoch": 0.23317986038912816, + "grad_norm": 1.0607389211654663, + "learning_rate": 9.766820139610873e-06, + "loss": 0.0953, "step": 1570 }, { - "epoch": 0.11733254121491163, - "grad_norm": 2.4360742568969727, - "learning_rate": 2.929600475271053e-05, - "loss": 0.1243, + "epoch": 0.23466508242982326, + "grad_norm": 1.320814847946167, + "learning_rate": 9.765334917570178e-06, + "loss": 0.0763, "step": 1580 }, { - "epoch": 0.11807515223525918, - "grad_norm": 1.762876033782959, - "learning_rate": 2.9291549086588448e-05, - "loss": 0.0878, + "epoch": 0.23615030447051835, + "grad_norm": 0.9268600940704346, + "learning_rate": 9.763849695529482e-06, + "loss": 0.0991, "step": 1590 }, { - "epoch": 0.11881776325560671, - "grad_norm": 4.341997146606445, - "learning_rate": 2.9287093420466363e-05, - "loss": 0.143, + "epoch": 0.23763552651121342, + "grad_norm": 1.741870641708374, + "learning_rate": 9.762364473488788e-06, + "loss": 0.1003, "step": 1600 }, { - "epoch": 0.11956037427595426, - "grad_norm": 2.6205196380615234, - "learning_rate": 2.9282637754344274e-05, - "loss": 0.1234, + "epoch": 0.2391207485519085, + "grad_norm": 0.7823505401611328, + "learning_rate": 9.760879251448093e-06, + "loss": 0.1073, "step": 1610 }, { - "epoch": 0.1203029852963018, - "grad_norm": 1.5635881423950195, - "learning_rate": 2.927818208822219e-05, - "loss": 0.0918, + "epoch": 0.2406059705926036, + "grad_norm": 1.4746675491333008, + "learning_rate": 9.759394029407397e-06, + "loss": 0.0953, "step": 1620 }, { - "epoch": 0.12104559631664934, - "grad_norm": 4.154393672943115, - "learning_rate": 2.9273726422100104e-05, - "loss": 0.1227, + "epoch": 0.24209119263329867, + "grad_norm": 1.5444601774215698, + "learning_rate": 9.757908807366701e-06, + "loss": 0.076, "step": 1630 }, { - "epoch": 0.12178820733699688, - "grad_norm": 3.161184549331665, - "learning_rate": 2.926927075597802e-05, - "loss": 0.1084, + "epoch": 0.24357641467399377, + "grad_norm": 0.9794516563415527, + "learning_rate": 9.756423585326008e-06, + "loss": 0.1012, "step": 1640 }, { - "epoch": 0.12253081835734442, - "grad_norm": 1.4087167978286743, - "learning_rate": 2.9264815089855934e-05, - "loss": 0.0725, + "epoch": 0.24506163671468884, + "grad_norm": 0.9550230503082275, + "learning_rate": 9.754938363285312e-06, + "loss": 0.0837, "step": 1650 }, { - "epoch": 0.12327342937769196, - "grad_norm": 2.6927695274353027, - "learning_rate": 2.926035942373385e-05, - "loss": 0.0761, + "epoch": 0.24654685875538393, + "grad_norm": 0.9263174533843994, + "learning_rate": 9.753453141244616e-06, + "loss": 0.0787, "step": 1660 }, { - "epoch": 0.12401604039803951, - "grad_norm": 2.0278165340423584, - "learning_rate": 2.9255903757611764e-05, - "loss": 0.0915, + "epoch": 0.24803208079607902, + "grad_norm": 1.3556021451950073, + "learning_rate": 9.751967919203923e-06, + "loss": 0.1044, "step": 1670 }, { - "epoch": 0.12475865141838705, - "grad_norm": 4.448098659515381, - "learning_rate": 2.925144809148968e-05, - "loss": 0.1183, + "epoch": 0.2495173028367741, + "grad_norm": 0.7733617424964905, + "learning_rate": 9.750482697163227e-06, + "loss": 0.0835, "step": 1680 }, { - "epoch": 0.12550126243873458, - "grad_norm": 1.2473807334899902, - "learning_rate": 2.9246992425367594e-05, - "loss": 0.0999, + "epoch": 0.25100252487746916, + "grad_norm": 0.9405840635299683, + "learning_rate": 9.748997475122531e-06, + "loss": 0.0957, "step": 1690 }, { - "epoch": 0.12624387345908214, - "grad_norm": 1.8847259283065796, - "learning_rate": 2.924253675924551e-05, - "loss": 0.0896, + "epoch": 0.2524877469181643, + "grad_norm": 0.7323219180107117, + "learning_rate": 9.747512253081838e-06, + "loss": 0.0863, "step": 1700 }, { - "epoch": 0.12698648447942967, - "grad_norm": 2.6261157989501953, - "learning_rate": 2.923808109312342e-05, - "loss": 0.1043, + "epoch": 0.25397296895885935, + "grad_norm": 1.754362940788269, + "learning_rate": 9.74602703104114e-06, + "loss": 0.0904, "step": 1710 }, { - "epoch": 0.1277290954997772, - "grad_norm": 4.396406650543213, - "learning_rate": 2.9233625427001336e-05, - "loss": 0.1166, + "epoch": 0.2554581909995544, + "grad_norm": 0.9422330856323242, + "learning_rate": 9.744541809000446e-06, + "loss": 0.118, "step": 1720 }, { - "epoch": 0.12847170652012477, - "grad_norm": 1.8150869607925415, - "learning_rate": 2.9229169760879254e-05, - "loss": 0.0932, + "epoch": 0.25694341304024954, + "grad_norm": 1.2818480730056763, + "learning_rate": 9.74305658695975e-06, + "loss": 0.0933, "step": 1730 }, { - "epoch": 0.1292143175404723, - "grad_norm": 1.0094959735870361, - "learning_rate": 2.9224714094757166e-05, - "loss": 0.107, + "epoch": 0.2584286350809446, + "grad_norm": 0.8700027465820312, + "learning_rate": 9.741571364919055e-06, + "loss": 0.0863, "step": 1740 }, { - "epoch": 0.12995692856081983, - "grad_norm": 1.1417855024337769, - "learning_rate": 2.922025842863508e-05, - "loss": 0.1165, + "epoch": 0.25991385712163967, + "grad_norm": 1.0074703693389893, + "learning_rate": 9.740086142878361e-06, + "loss": 0.1151, "step": 1750 }, { - "epoch": 0.1306995395811674, - "grad_norm": 2.269012689590454, - "learning_rate": 2.9215802762513e-05, - "loss": 0.124, + "epoch": 0.2613990791623348, + "grad_norm": 1.1355704069137573, + "learning_rate": 9.738600920837666e-06, + "loss": 0.0921, "step": 1760 }, { - "epoch": 0.13144215060151493, - "grad_norm": 2.0236096382141113, - "learning_rate": 2.921134709639091e-05, - "loss": 0.0817, + "epoch": 0.26288430120302986, + "grad_norm": 0.9470556378364563, + "learning_rate": 9.73711569879697e-06, + "loss": 0.0763, "step": 1770 }, { - "epoch": 0.13218476162186246, - "grad_norm": 3.055938482284546, - "learning_rate": 2.9206891430268826e-05, - "loss": 0.1123, + "epoch": 0.2643695232437249, + "grad_norm": 1.5006542205810547, + "learning_rate": 9.735630476756276e-06, + "loss": 0.076, "step": 1780 }, { - "epoch": 0.13292737264221002, - "grad_norm": 3.183199882507324, - "learning_rate": 2.920243576414674e-05, - "loss": 0.1264, + "epoch": 0.26585474528442005, + "grad_norm": 0.9084158539772034, + "learning_rate": 9.73414525471558e-06, + "loss": 0.091, "step": 1790 }, { - "epoch": 0.13366998366255756, - "grad_norm": 1.401501178741455, - "learning_rate": 2.9197980098024656e-05, - "loss": 0.1142, + "epoch": 0.2673399673251151, + "grad_norm": 1.3258038759231567, + "learning_rate": 9.732660032674885e-06, + "loss": 0.1342, "step": 1800 }, { - "epoch": 0.1344125946829051, - "grad_norm": 0.9348109364509583, - "learning_rate": 2.919352443190257e-05, - "loss": 0.0885, + "epoch": 0.2688251893658102, + "grad_norm": 0.9406817555427551, + "learning_rate": 9.731174810634191e-06, + "loss": 0.0979, "step": 1810 }, { - "epoch": 0.13515520570325262, - "grad_norm": 2.85656476020813, - "learning_rate": 2.9189068765780482e-05, - "loss": 0.1249, + "epoch": 0.27031041140650525, + "grad_norm": 0.9855642318725586, + "learning_rate": 9.729689588593496e-06, + "loss": 0.078, "step": 1820 }, { - "epoch": 0.13589781672360018, - "grad_norm": 2.1008095741271973, - "learning_rate": 2.91846130996584e-05, - "loss": 0.092, + "epoch": 0.27179563344720037, + "grad_norm": 0.9892807006835938, + "learning_rate": 9.7282043665528e-06, + "loss": 0.0977, "step": 1830 }, { - "epoch": 0.13664042774394772, - "grad_norm": 3.1172657012939453, - "learning_rate": 2.9180157433536316e-05, - "loss": 0.0867, + "epoch": 0.27328085548789544, + "grad_norm": 0.8949669599533081, + "learning_rate": 9.726719144512106e-06, + "loss": 0.0822, "step": 1840 }, { - "epoch": 0.13738303876429525, - "grad_norm": 1.8529694080352783, - "learning_rate": 2.9175701767414227e-05, - "loss": 0.0946, + "epoch": 0.2747660775285905, + "grad_norm": 1.159778118133545, + "learning_rate": 9.725233922471409e-06, + "loss": 0.1103, "step": 1850 }, { - "epoch": 0.1381256497846428, - "grad_norm": 2.7626330852508545, - "learning_rate": 2.9171246101292142e-05, - "loss": 0.0952, + "epoch": 0.2762512995692856, + "grad_norm": 1.6085240840911865, + "learning_rate": 9.723748700430715e-06, + "loss": 0.0864, "step": 1860 }, { - "epoch": 0.13886826080499035, - "grad_norm": 7.8472089767456055, - "learning_rate": 2.916679043517006e-05, - "loss": 0.1124, + "epoch": 0.2777365216099807, + "grad_norm": 1.4303113222122192, + "learning_rate": 9.72226347839002e-06, + "loss": 0.101, "step": 1870 }, { - "epoch": 0.13961087182533788, - "grad_norm": 2.6485297679901123, - "learning_rate": 2.9162334769047972e-05, - "loss": 0.1109, + "epoch": 0.27922174365067576, + "grad_norm": 1.550205111503601, + "learning_rate": 9.720778256349324e-06, + "loss": 0.0782, "step": 1880 }, { - "epoch": 0.14035348284568544, - "grad_norm": 4.575742721557617, - "learning_rate": 2.9157879102925887e-05, - "loss": 0.0989, + "epoch": 0.2807069656913709, + "grad_norm": 1.5073113441467285, + "learning_rate": 9.71929303430863e-06, + "loss": 0.0986, "step": 1890 }, { - "epoch": 0.14109609386603297, - "grad_norm": 4.842647552490234, - "learning_rate": 2.9153423436803806e-05, - "loss": 0.1074, + "epoch": 0.28219218773206595, + "grad_norm": 1.2778253555297852, + "learning_rate": 9.717807812267935e-06, + "loss": 0.0731, "step": 1900 }, { - "epoch": 0.1418387048863805, - "grad_norm": 3.04119873046875, - "learning_rate": 2.9148967770681717e-05, - "loss": 0.0972, + "epoch": 0.283677409772761, + "grad_norm": 0.669276237487793, + "learning_rate": 9.716322590227239e-06, + "loss": 0.0902, "step": 1910 }, { - "epoch": 0.14258131590672807, - "grad_norm": 2.0396547317504883, - "learning_rate": 2.9144512104559632e-05, - "loss": 0.1181, + "epoch": 0.28516263181345614, + "grad_norm": 1.8269588947296143, + "learning_rate": 9.714837368186545e-06, + "loss": 0.0975, "step": 1920 }, { - "epoch": 0.1433239269270756, - "grad_norm": 2.2266111373901367, - "learning_rate": 2.9140056438437547e-05, - "loss": 0.0901, + "epoch": 0.2866478538541512, + "grad_norm": 1.0207661390304565, + "learning_rate": 9.71335214614585e-06, + "loss": 0.0947, "step": 1930 }, { - "epoch": 0.14406653794742313, - "grad_norm": 3.9434754848480225, - "learning_rate": 2.9135600772315462e-05, - "loss": 0.1078, + "epoch": 0.28813307589484627, + "grad_norm": 1.4014843702316284, + "learning_rate": 9.711866924105154e-06, + "loss": 0.0655, "step": 1940 }, { - "epoch": 0.1448091489677707, - "grad_norm": 1.5583536624908447, - "learning_rate": 2.9131145106193377e-05, - "loss": 0.0767, + "epoch": 0.2896182979355414, + "grad_norm": 1.199450969696045, + "learning_rate": 9.71038170206446e-06, + "loss": 0.0831, "step": 1950 }, { - "epoch": 0.14555175998811823, - "grad_norm": 2.2595632076263428, - "learning_rate": 2.912668944007129e-05, - "loss": 0.0906, + "epoch": 0.29110351997623646, + "grad_norm": 0.9979912638664246, + "learning_rate": 9.708896480023765e-06, + "loss": 0.0745, "step": 1960 }, { - "epoch": 0.14629437100846576, - "grad_norm": 1.268849492073059, - "learning_rate": 2.9122233773949207e-05, - "loss": 0.0847, + "epoch": 0.2925887420169315, + "grad_norm": 0.9386908411979675, + "learning_rate": 9.707411257983069e-06, + "loss": 0.0934, "step": 1970 }, { - "epoch": 0.1470369820288133, - "grad_norm": 2.6412172317504883, - "learning_rate": 2.9117778107827122e-05, - "loss": 0.1297, + "epoch": 0.2940739640576266, + "grad_norm": 0.9256591200828552, + "learning_rate": 9.705926035942375e-06, + "loss": 0.1039, "step": 1980 }, { - "epoch": 0.14777959304916086, - "grad_norm": 3.151843547821045, - "learning_rate": 2.9113322441705034e-05, - "loss": 0.0975, + "epoch": 0.2955591860983217, + "grad_norm": 1.0602842569351196, + "learning_rate": 9.70444081390168e-06, + "loss": 0.1308, "step": 1990 }, { - "epoch": 0.1485222040695084, - "grad_norm": 2.6987667083740234, - "learning_rate": 2.9108866775582952e-05, - "loss": 0.0992, + "epoch": 0.2970444081390168, + "grad_norm": 0.7016430497169495, + "learning_rate": 9.702955591860984e-06, + "loss": 0.0987, "step": 2000 }, { - "epoch": 0.14926481508985592, - "grad_norm": 3.049734115600586, - "learning_rate": 2.9104411109460867e-05, - "loss": 0.1189, + "epoch": 0.29852963017971185, + "grad_norm": 1.3720355033874512, + "learning_rate": 9.70147036982029e-06, + "loss": 0.1047, "step": 2010 }, { - "epoch": 0.15000742611020348, - "grad_norm": 2.4125332832336426, - "learning_rate": 2.909995544333878e-05, - "loss": 0.059, + "epoch": 0.30001485222040697, + "grad_norm": 0.9285506010055542, + "learning_rate": 9.699985147779593e-06, + "loss": 0.0884, "step": 2020 }, { - "epoch": 0.15075003713055102, - "grad_norm": 2.5139408111572266, - "learning_rate": 2.9095499777216694e-05, - "loss": 0.1085, + "epoch": 0.30150007426110204, + "grad_norm": 0.745842456817627, + "learning_rate": 9.698499925738899e-06, + "loss": 0.0887, "step": 2030 }, { - "epoch": 0.15149264815089855, - "grad_norm": 2.7138638496398926, - "learning_rate": 2.909104411109461e-05, - "loss": 0.1312, + "epoch": 0.3029852963017971, + "grad_norm": 1.2370271682739258, + "learning_rate": 9.697014703698203e-06, + "loss": 0.1043, "step": 2040 }, { - "epoch": 0.1522352591712461, - "grad_norm": 2.654601812362671, - "learning_rate": 2.9086588444972524e-05, - "loss": 0.1116, + "epoch": 0.3044705183424922, + "grad_norm": 0.888309121131897, + "learning_rate": 9.695529481657508e-06, + "loss": 0.1045, "step": 2050 }, { - "epoch": 0.15297787019159365, - "grad_norm": 2.8384549617767334, - "learning_rate": 2.908213277885044e-05, - "loss": 0.0842, + "epoch": 0.3059557403831873, + "grad_norm": 0.7352439165115356, + "learning_rate": 9.694044259616814e-06, + "loss": 0.0787, "step": 2060 }, { - "epoch": 0.15372048121194118, - "grad_norm": 2.3352036476135254, - "learning_rate": 2.9077677112728354e-05, - "loss": 0.1261, + "epoch": 0.30744096242388236, + "grad_norm": 0.7865113019943237, + "learning_rate": 9.692559037576118e-06, + "loss": 0.0764, "step": 2070 }, { - "epoch": 0.15446309223228874, - "grad_norm": 4.299140453338623, - "learning_rate": 2.907322144660627e-05, - "loss": 0.1066, + "epoch": 0.3089261844645775, + "grad_norm": 1.0966145992279053, + "learning_rate": 9.691073815535423e-06, + "loss": 0.0982, "step": 2080 }, { - "epoch": 0.15520570325263627, - "grad_norm": 3.3088362216949463, - "learning_rate": 2.9068765780484184e-05, - "loss": 0.0954, + "epoch": 0.31041140650527255, + "grad_norm": 0.7263454794883728, + "learning_rate": 9.689588593494729e-06, + "loss": 0.1118, "step": 2090 }, { - "epoch": 0.1559483142729838, - "grad_norm": 1.6698198318481445, - "learning_rate": 2.90643101143621e-05, - "loss": 0.1086, + "epoch": 0.3118966285459676, + "grad_norm": 1.5336846113204956, + "learning_rate": 9.688103371454033e-06, + "loss": 0.1068, "step": 2100 }, { - "epoch": 0.15669092529333134, - "grad_norm": 2.066899061203003, - "learning_rate": 2.9059854448240014e-05, - "loss": 0.1169, + "epoch": 0.3133818505866627, + "grad_norm": 1.1668576002120972, + "learning_rate": 9.686618149413338e-06, + "loss": 0.1032, "step": 2110 }, { - "epoch": 0.1574335363136789, - "grad_norm": 1.2918481826782227, - "learning_rate": 2.905539878211793e-05, - "loss": 0.1248, + "epoch": 0.3148670726273578, + "grad_norm": 0.7677258849143982, + "learning_rate": 9.685132927372644e-06, + "loss": 0.0741, "step": 2120 }, { - "epoch": 0.15817614733402643, - "grad_norm": 3.492408037185669, - "learning_rate": 2.905094311599584e-05, - "loss": 0.0907, + "epoch": 0.31635229466805287, + "grad_norm": 1.1100083589553833, + "learning_rate": 9.683647705331948e-06, + "loss": 0.0863, "step": 2130 }, { - "epoch": 0.15891875835437397, - "grad_norm": 4.03883695602417, - "learning_rate": 2.904648744987376e-05, - "loss": 0.1044, + "epoch": 0.31783751670874794, + "grad_norm": 1.2412970066070557, + "learning_rate": 9.682162483291253e-06, + "loss": 0.0939, "step": 2140 }, { - "epoch": 0.15966136937472153, - "grad_norm": 2.541898250579834, - "learning_rate": 2.904203178375167e-05, - "loss": 0.1244, + "epoch": 0.31932273874944306, + "grad_norm": 0.913336455821991, + "learning_rate": 9.680677261250557e-06, + "loss": 0.0633, "step": 2150 }, { - "epoch": 0.16040398039506906, - "grad_norm": 1.1779425144195557, - "learning_rate": 2.9037576117629585e-05, - "loss": 0.096, + "epoch": 0.3208079607901381, + "grad_norm": 0.5790470242500305, + "learning_rate": 9.679192039209862e-06, + "loss": 0.1064, "step": 2160 }, { - "epoch": 0.1611465914154166, - "grad_norm": 2.521737575531006, - "learning_rate": 2.9033120451507504e-05, - "loss": 0.0854, + "epoch": 0.3222931828308332, + "grad_norm": 1.0939009189605713, + "learning_rate": 9.677706817169168e-06, + "loss": 0.1065, "step": 2170 }, { - "epoch": 0.16188920243576416, - "grad_norm": 2.2708542346954346, - "learning_rate": 2.9028664785385415e-05, - "loss": 0.1053, + "epoch": 0.3237784048715283, + "grad_norm": 1.2712286710739136, + "learning_rate": 9.676221595128472e-06, + "loss": 0.1024, "step": 2180 }, { - "epoch": 0.1626318134561117, - "grad_norm": 1.611698865890503, - "learning_rate": 2.902420911926333e-05, - "loss": 0.1094, + "epoch": 0.3252636269122234, + "grad_norm": 1.3349504470825195, + "learning_rate": 9.674736373087777e-06, + "loss": 0.089, "step": 2190 }, { - "epoch": 0.16337442447645922, - "grad_norm": 2.962660074234009, - "learning_rate": 2.9019753453141245e-05, - "loss": 0.1022, + "epoch": 0.32674884895291845, + "grad_norm": 2.0255956649780273, + "learning_rate": 9.673251151047083e-06, + "loss": 0.1185, "step": 2200 }, { - "epoch": 0.16411703549680678, - "grad_norm": 1.720831036567688, - "learning_rate": 2.901529778701916e-05, - "loss": 0.1207, + "epoch": 0.32823407099361357, + "grad_norm": 0.5723013281822205, + "learning_rate": 9.671765929006387e-06, + "loss": 0.0564, "step": 2210 }, { - "epoch": 0.16485964651715432, - "grad_norm": 2.2500967979431152, - "learning_rate": 2.9010842120897075e-05, - "loss": 0.1023, + "epoch": 0.32971929303430864, + "grad_norm": 1.5544242858886719, + "learning_rate": 9.670280706965692e-06, + "loss": 0.1056, "step": 2220 }, { - "epoch": 0.16560225753750185, - "grad_norm": 2.8786466121673584, - "learning_rate": 2.9006386454774987e-05, - "loss": 0.133, + "epoch": 0.3312045150750037, + "grad_norm": 1.538115382194519, + "learning_rate": 9.668795484924996e-06, + "loss": 0.0859, "step": 2230 }, { - "epoch": 0.16634486855784938, - "grad_norm": 2.339738607406616, - "learning_rate": 2.9001930788652905e-05, - "loss": 0.1436, + "epoch": 0.33268973711569877, + "grad_norm": 0.458325058221817, + "learning_rate": 9.667310262884302e-06, + "loss": 0.1003, "step": 2240 }, { - "epoch": 0.16708747957819695, - "grad_norm": 2.527097702026367, - "learning_rate": 2.899747512253082e-05, - "loss": 0.0817, + "epoch": 0.3341749591563939, + "grad_norm": 1.3945813179016113, + "learning_rate": 9.665825040843607e-06, + "loss": 0.0995, "step": 2250 }, { - "epoch": 0.16783009059854448, - "grad_norm": 2.750969171524048, - "learning_rate": 2.8993019456408732e-05, - "loss": 0.1492, + "epoch": 0.33566018119708896, + "grad_norm": 1.528809905052185, + "learning_rate": 9.664339818802911e-06, + "loss": 0.0809, "step": 2260 }, { - "epoch": 0.168572701618892, - "grad_norm": 2.195770740509033, - "learning_rate": 2.8988563790286647e-05, - "loss": 0.115, + "epoch": 0.337145403237784, + "grad_norm": 0.4207174479961395, + "learning_rate": 9.662854596762217e-06, + "loss": 0.089, "step": 2270 }, { - "epoch": 0.16931531263923957, - "grad_norm": 3.0774083137512207, - "learning_rate": 2.8984108124164565e-05, - "loss": 0.1062, + "epoch": 0.33863062527847915, + "grad_norm": 0.9507777094841003, + "learning_rate": 9.661369374721522e-06, + "loss": 0.0842, "step": 2280 }, { - "epoch": 0.1700579236595871, - "grad_norm": 2.673882484436035, - "learning_rate": 2.8979652458042477e-05, - "loss": 0.1063, + "epoch": 0.3401158473191742, + "grad_norm": 0.9284889698028564, + "learning_rate": 9.659884152680826e-06, + "loss": 0.0803, "step": 2290 }, { - "epoch": 0.17080053467993464, - "grad_norm": 3.152207612991333, - "learning_rate": 2.8975196791920392e-05, - "loss": 0.1098, + "epoch": 0.3416010693598693, + "grad_norm": 0.8482735753059387, + "learning_rate": 9.658398930640132e-06, + "loss": 0.0951, "step": 2300 }, { - "epoch": 0.1715431457002822, - "grad_norm": 4.860641956329346, - "learning_rate": 2.897074112579831e-05, - "loss": 0.1073, + "epoch": 0.3430862914005644, + "grad_norm": 2.1062092781066895, + "learning_rate": 9.656913708599437e-06, + "loss": 0.095, "step": 2310 }, { - "epoch": 0.17228575672062973, - "grad_norm": 2.261838436126709, - "learning_rate": 2.8966285459676222e-05, - "loss": 0.1035, + "epoch": 0.34457151344125947, + "grad_norm": 1.5955981016159058, + "learning_rate": 9.655428486558741e-06, + "loss": 0.101, "step": 2320 }, { - "epoch": 0.17302836774097727, - "grad_norm": 1.3627759218215942, - "learning_rate": 2.8961829793554137e-05, - "loss": 0.0873, + "epoch": 0.34605673548195454, + "grad_norm": 1.2699885368347168, + "learning_rate": 9.653943264518046e-06, + "loss": 0.0987, "step": 2330 }, { - "epoch": 0.17377097876132483, - "grad_norm": 2.009950637817383, - "learning_rate": 2.8957374127432052e-05, - "loss": 0.1033, + "epoch": 0.34754195752264966, + "grad_norm": 0.6988068222999573, + "learning_rate": 9.65245804247735e-06, + "loss": 0.0866, "step": 2340 }, { - "epoch": 0.17451358978167236, - "grad_norm": 1.0061966180801392, - "learning_rate": 2.8952918461309967e-05, - "loss": 0.0995, + "epoch": 0.3490271795633447, + "grad_norm": 1.0660147666931152, + "learning_rate": 9.650972820436656e-06, + "loss": 0.0965, "step": 2350 }, { - "epoch": 0.1752562008020199, - "grad_norm": 4.665594100952148, - "learning_rate": 2.8948462795187882e-05, - "loss": 0.1148, + "epoch": 0.3505124016040398, + "grad_norm": 0.8632172346115112, + "learning_rate": 9.64948759839596e-06, + "loss": 0.0884, "step": 2360 }, { - "epoch": 0.17599881182236746, - "grad_norm": 2.1051509380340576, - "learning_rate": 2.8944007129065793e-05, - "loss": 0.0832, + "epoch": 0.3519976236447349, + "grad_norm": 0.8742761015892029, + "learning_rate": 9.648002376355265e-06, + "loss": 0.0929, "step": 2370 }, { - "epoch": 0.176741422842715, - "grad_norm": 4.589431285858154, - "learning_rate": 2.8939551462943712e-05, - "loss": 0.072, + "epoch": 0.35348284568543, + "grad_norm": 1.3093167543411255, + "learning_rate": 9.646517154314571e-06, + "loss": 0.1031, "step": 2380 }, { - "epoch": 0.17748403386306252, - "grad_norm": 5.031434059143066, - "learning_rate": 2.8935095796821627e-05, - "loss": 0.1275, + "epoch": 0.35496806772612505, + "grad_norm": 0.9593400359153748, + "learning_rate": 9.645031932273876e-06, + "loss": 0.0925, "step": 2390 }, { - "epoch": 0.17822664488341006, - "grad_norm": 1.6660507917404175, - "learning_rate": 2.893064013069954e-05, - "loss": 0.1189, + "epoch": 0.3564532897668201, + "grad_norm": 1.3659998178482056, + "learning_rate": 9.64354671023318e-06, + "loss": 0.0825, "step": 2400 }, { - "epoch": 0.17896925590375762, - "grad_norm": 2.1114559173583984, - "learning_rate": 2.8926184464577457e-05, - "loss": 0.09, + "epoch": 0.35793851180751524, + "grad_norm": 0.9901537299156189, + "learning_rate": 9.642061488192486e-06, + "loss": 0.088, "step": 2410 }, { - "epoch": 0.17971186692410515, - "grad_norm": 1.1121262311935425, - "learning_rate": 2.8921728798455372e-05, - "loss": 0.0799, + "epoch": 0.3594237338482103, + "grad_norm": 1.5357334613800049, + "learning_rate": 9.64057626615179e-06, + "loss": 0.0786, "step": 2420 }, { - "epoch": 0.18045447794445268, - "grad_norm": 2.8174080848693848, - "learning_rate": 2.8917273132333283e-05, - "loss": 0.0926, + "epoch": 0.36090895588890537, + "grad_norm": 0.3341294825077057, + "learning_rate": 9.639091044111095e-06, + "loss": 0.0797, "step": 2430 }, { - "epoch": 0.18119708896480025, - "grad_norm": 3.2218480110168457, - "learning_rate": 2.89128174662112e-05, - "loss": 0.0758, + "epoch": 0.3623941779296005, + "grad_norm": 1.1990760564804077, + "learning_rate": 9.637605822070401e-06, + "loss": 0.0966, "step": 2440 }, { - "epoch": 0.18193969998514778, - "grad_norm": 1.7610548734664917, - "learning_rate": 2.8908361800089113e-05, - "loss": 0.1052, + "epoch": 0.36387939997029556, + "grad_norm": 0.9783576726913452, + "learning_rate": 9.636120600029704e-06, + "loss": 0.0957, "step": 2450 }, { - "epoch": 0.1826823110054953, - "grad_norm": 2.7015151977539062, - "learning_rate": 2.890390613396703e-05, - "loss": 0.0857, + "epoch": 0.3653646220109906, + "grad_norm": 0.8899098634719849, + "learning_rate": 9.63463537798901e-06, + "loss": 0.0987, "step": 2460 }, { - "epoch": 0.18342492202584287, - "grad_norm": 1.8576743602752686, - "learning_rate": 2.8899450467844943e-05, - "loss": 0.0653, + "epoch": 0.36684984405168575, + "grad_norm": 0.6533779501914978, + "learning_rate": 9.633150155948316e-06, + "loss": 0.0862, "step": 2470 }, { - "epoch": 0.1841675330461904, - "grad_norm": 5.928577423095703, - "learning_rate": 2.889499480172286e-05, - "loss": 0.1243, + "epoch": 0.3683350660923808, + "grad_norm": 0.6127310991287231, + "learning_rate": 9.631664933907619e-06, + "loss": 0.0803, "step": 2480 }, { - "epoch": 0.18491014406653794, - "grad_norm": 2.597346544265747, - "learning_rate": 2.8890539135600773e-05, - "loss": 0.1309, + "epoch": 0.3698202881330759, + "grad_norm": 1.53936767578125, + "learning_rate": 9.630179711866925e-06, + "loss": 0.0991, "step": 2490 }, { - "epoch": 0.1856527550868855, - "grad_norm": 3.324141263961792, - "learning_rate": 2.888608346947869e-05, - "loss": 0.0892, + "epoch": 0.371305510173771, + "grad_norm": 0.5968347191810608, + "learning_rate": 9.62869448982623e-06, + "loss": 0.0865, "step": 2500 }, { - "epoch": 0.18639536610723303, - "grad_norm": 2.4857001304626465, - "learning_rate": 2.8881627803356603e-05, - "loss": 0.0984, + "epoch": 0.37279073221446607, + "grad_norm": 1.7111512422561646, + "learning_rate": 9.627209267785534e-06, + "loss": 0.0959, "step": 2510 }, { - "epoch": 0.18713797712758057, - "grad_norm": 2.5961930751800537, - "learning_rate": 2.887717213723452e-05, - "loss": 0.091, + "epoch": 0.37427595425516114, + "grad_norm": 1.5803933143615723, + "learning_rate": 9.62572404574484e-06, + "loss": 0.1062, "step": 2520 }, { - "epoch": 0.1878805881479281, - "grad_norm": 0.8424578309059143, - "learning_rate": 2.8872716471112433e-05, - "loss": 0.1048, + "epoch": 0.3757611762958562, + "grad_norm": 1.0247458219528198, + "learning_rate": 9.624238823704144e-06, + "loss": 0.0937, "step": 2530 }, { - "epoch": 0.18862319916827566, - "grad_norm": 1.7092845439910889, - "learning_rate": 2.8868260804990345e-05, - "loss": 0.0916, + "epoch": 0.3772463983365513, + "grad_norm": 0.9361761212348938, + "learning_rate": 9.622753601663449e-06, + "loss": 0.075, "step": 2540 }, { - "epoch": 0.1893658101886232, - "grad_norm": 1.8642319440841675, - "learning_rate": 2.8863805138868263e-05, - "loss": 0.0953, + "epoch": 0.3787316203772464, + "grad_norm": 1.0401147603988647, + "learning_rate": 9.621268379622755e-06, + "loss": 0.0856, "step": 2550 }, { - "epoch": 0.19010842120897073, - "grad_norm": 3.4981400966644287, - "learning_rate": 2.8859349472746175e-05, - "loss": 0.088, + "epoch": 0.38021684241794146, + "grad_norm": 0.7037819623947144, + "learning_rate": 9.61978315758206e-06, + "loss": 0.0908, "step": 2560 }, { - "epoch": 0.1908510322293183, - "grad_norm": 4.905360221862793, - "learning_rate": 2.885489380662409e-05, - "loss": 0.102, + "epoch": 0.3817020644586366, + "grad_norm": 1.608916163444519, + "learning_rate": 9.618297935541364e-06, + "loss": 0.0975, "step": 2570 }, { - "epoch": 0.19159364324966582, - "grad_norm": 3.5098886489868164, - "learning_rate": 2.8850438140502008e-05, - "loss": 0.116, + "epoch": 0.38318728649933165, + "grad_norm": 2.890395402908325, + "learning_rate": 9.61681271350067e-06, + "loss": 0.1023, "step": 2580 }, { - "epoch": 0.19233625427001336, - "grad_norm": 2.462068557739258, - "learning_rate": 2.884598247437992e-05, - "loss": 0.0878, + "epoch": 0.3846725085400267, + "grad_norm": 0.8799918293952942, + "learning_rate": 9.615327491459974e-06, + "loss": 0.0717, "step": 2590 }, { - "epoch": 0.19307886529036092, - "grad_norm": 1.3594739437103271, - "learning_rate": 2.8841526808257835e-05, - "loss": 0.0978, + "epoch": 0.38615773058072184, + "grad_norm": 1.2562460899353027, + "learning_rate": 9.613842269419279e-06, + "loss": 0.0885, "step": 2600 }, { - "epoch": 0.19382147631070845, - "grad_norm": 0.8977119326591492, - "learning_rate": 2.883707114213575e-05, - "loss": 0.1115, + "epoch": 0.3876429526214169, + "grad_norm": 0.7879093289375305, + "learning_rate": 9.612357047378585e-06, + "loss": 0.0812, "step": 2610 }, { - "epoch": 0.19456408733105598, - "grad_norm": 4.278836727142334, - "learning_rate": 2.8832615476013665e-05, - "loss": 0.0873, + "epoch": 0.38912817466211197, + "grad_norm": 2.1101765632629395, + "learning_rate": 9.610871825337888e-06, + "loss": 0.0847, "step": 2620 }, { - "epoch": 0.19530669835140355, - "grad_norm": 2.4040420055389404, - "learning_rate": 2.882815980989158e-05, - "loss": 0.1154, + "epoch": 0.3906133967028071, + "grad_norm": 1.0832703113555908, + "learning_rate": 9.609386603297194e-06, + "loss": 0.088, "step": 2630 }, { - "epoch": 0.19604930937175108, - "grad_norm": 3.2387709617614746, - "learning_rate": 2.8823704143769495e-05, - "loss": 0.1202, + "epoch": 0.39209861874350216, + "grad_norm": 0.7375301718711853, + "learning_rate": 9.6079013812565e-06, + "loss": 0.0829, "step": 2640 }, { - "epoch": 0.1967919203920986, - "grad_norm": 1.2771217823028564, - "learning_rate": 2.881924847764741e-05, - "loss": 0.1064, + "epoch": 0.3935838407841972, + "grad_norm": 1.1385936737060547, + "learning_rate": 9.606416159215803e-06, + "loss": 0.0845, "step": 2650 }, { - "epoch": 0.19753453141244617, - "grad_norm": 4.477030277252197, - "learning_rate": 2.8814792811525325e-05, - "loss": 0.0813, + "epoch": 0.39506906282489235, + "grad_norm": 1.1508512496948242, + "learning_rate": 9.604930937175109e-06, + "loss": 0.0804, "step": 2660 }, { - "epoch": 0.1982771424327937, - "grad_norm": 2.6116533279418945, - "learning_rate": 2.8810337145403236e-05, - "loss": 0.1133, + "epoch": 0.3965542848655874, + "grad_norm": 1.269167184829712, + "learning_rate": 9.603445715134413e-06, + "loss": 0.0721, "step": 2670 }, { - "epoch": 0.19901975345314124, - "grad_norm": 2.1124253273010254, - "learning_rate": 2.880588147928115e-05, - "loss": 0.1134, + "epoch": 0.3980395069062825, + "grad_norm": 0.8906748294830322, + "learning_rate": 9.601960493093718e-06, + "loss": 0.0755, "step": 2680 }, { - "epoch": 0.19976236447348877, - "grad_norm": 2.3649754524230957, - "learning_rate": 2.880142581315907e-05, - "loss": 0.1062, + "epoch": 0.39952472894697755, + "grad_norm": 0.6662545204162598, + "learning_rate": 9.600475271053024e-06, + "loss": 0.0669, "step": 2690 }, { - "epoch": 0.20050497549383633, - "grad_norm": 2.6647801399230957, - "learning_rate": 2.879697014703698e-05, - "loss": 0.1092, + "epoch": 0.40100995098767267, + "grad_norm": 1.1794975996017456, + "learning_rate": 9.598990049012328e-06, + "loss": 0.1041, "step": 2700 }, { - "epoch": 0.20124758651418387, - "grad_norm": 3.3392791748046875, - "learning_rate": 2.8792514480914896e-05, - "loss": 0.1107, + "epoch": 0.40249517302836774, + "grad_norm": 0.8003746867179871, + "learning_rate": 9.597504826971633e-06, + "loss": 0.0861, "step": 2710 }, { - "epoch": 0.2019901975345314, - "grad_norm": 2.0530688762664795, - "learning_rate": 2.8788058814792815e-05, - "loss": 0.1148, + "epoch": 0.4039803950690628, + "grad_norm": 0.7886612415313721, + "learning_rate": 9.596019604930939e-06, + "loss": 0.0897, "step": 2720 }, { - "epoch": 0.20273280855487896, - "grad_norm": 2.3883824348449707, - "learning_rate": 2.8783603148670726e-05, - "loss": 0.0924, + "epoch": 0.4054656171097579, + "grad_norm": 1.1297708749771118, + "learning_rate": 9.594534382890243e-06, + "loss": 0.0854, "step": 2730 }, { - "epoch": 0.2034754195752265, - "grad_norm": 1.486218810081482, - "learning_rate": 2.877914748254864e-05, - "loss": 0.0943, + "epoch": 0.406950839150453, + "grad_norm": 0.5781280398368835, + "learning_rate": 9.593049160849548e-06, + "loss": 0.084, "step": 2740 }, { - "epoch": 0.20421803059557403, - "grad_norm": 2.0853097438812256, - "learning_rate": 2.8774691816426556e-05, - "loss": 0.1223, + "epoch": 0.40843606119114806, + "grad_norm": 0.6772047281265259, + "learning_rate": 9.591563938808852e-06, + "loss": 0.0894, "step": 2750 }, { - "epoch": 0.2049606416159216, - "grad_norm": 3.1080849170684814, - "learning_rate": 2.877023615030447e-05, - "loss": 0.0905, + "epoch": 0.4099212832318432, + "grad_norm": 1.1391324996948242, + "learning_rate": 9.590078716768158e-06, + "loss": 0.0632, "step": 2760 }, { - "epoch": 0.20570325263626912, - "grad_norm": 1.9018908739089966, - "learning_rate": 2.8765780484182386e-05, - "loss": 0.0928, + "epoch": 0.41140650527253825, + "grad_norm": 0.5008231997489929, + "learning_rate": 9.588593494727463e-06, + "loss": 0.0701, "step": 2770 }, { - "epoch": 0.20644586365661666, - "grad_norm": 2.179426908493042, - "learning_rate": 2.8761324818060298e-05, - "loss": 0.1011, + "epoch": 0.4128917273132333, + "grad_norm": 1.8930658102035522, + "learning_rate": 9.587108272686767e-06, + "loss": 0.0873, "step": 2780 }, { - "epoch": 0.20718847467696422, - "grad_norm": 1.9516263008117676, - "learning_rate": 2.8756869151938216e-05, - "loss": 0.1033, + "epoch": 0.41437694935392844, + "grad_norm": 2.1659274101257324, + "learning_rate": 9.585623050646072e-06, + "loss": 0.0929, "step": 2790 }, { - "epoch": 0.20793108569731175, - "grad_norm": 2.347296953201294, - "learning_rate": 2.875241348581613e-05, - "loss": 0.104, + "epoch": 0.4158621713946235, + "grad_norm": 0.8841612935066223, + "learning_rate": 9.584137828605378e-06, + "loss": 0.0866, "step": 2800 }, { - "epoch": 0.20867369671765928, - "grad_norm": 2.022731304168701, - "learning_rate": 2.8747957819694043e-05, - "loss": 0.0947, + "epoch": 0.41734739343531857, + "grad_norm": 1.3454309701919556, + "learning_rate": 9.582652606564682e-06, + "loss": 0.083, "step": 2810 }, { - "epoch": 0.20941630773800682, - "grad_norm": 1.8994909524917603, - "learning_rate": 2.874350215357196e-05, - "loss": 0.0827, + "epoch": 0.41883261547601364, + "grad_norm": 1.09120774269104, + "learning_rate": 9.581167384523987e-06, + "loss": 0.0937, "step": 2820 }, { - "epoch": 0.21015891875835438, - "grad_norm": 1.9812676906585693, - "learning_rate": 2.8739046487449876e-05, - "loss": 0.0721, + "epoch": 0.42031783751670876, + "grad_norm": 0.5597397089004517, + "learning_rate": 9.579682162483293e-06, + "loss": 0.0787, "step": 2830 }, { - "epoch": 0.2109015297787019, - "grad_norm": 0.4040673077106476, - "learning_rate": 2.8734590821327788e-05, - "loss": 0.0885, + "epoch": 0.4218030595574038, + "grad_norm": 0.9361597299575806, + "learning_rate": 9.578196940442597e-06, + "loss": 0.0831, "step": 2840 }, { - "epoch": 0.21164414079904945, - "grad_norm": 3.147190570831299, - "learning_rate": 2.8730135155205703e-05, - "loss": 0.1106, + "epoch": 0.4232882815980989, + "grad_norm": 1.678627848625183, + "learning_rate": 9.576711718401902e-06, + "loss": 0.0799, "step": 2850 }, { - "epoch": 0.212386751819397, - "grad_norm": 2.5040011405944824, - "learning_rate": 2.8725679489083618e-05, - "loss": 0.1102, + "epoch": 0.424773503638794, + "grad_norm": 1.2205970287322998, + "learning_rate": 9.575226496361206e-06, + "loss": 0.0781, "step": 2860 }, { - "epoch": 0.21312936283974454, - "grad_norm": 2.1592671871185303, - "learning_rate": 2.8721223822961533e-05, - "loss": 0.0697, + "epoch": 0.4262587256794891, + "grad_norm": 1.2254648208618164, + "learning_rate": 9.573741274320512e-06, + "loss": 0.0769, "step": 2870 }, { - "epoch": 0.21387197386009207, - "grad_norm": 2.1262803077697754, - "learning_rate": 2.8716768156839448e-05, - "loss": 0.1251, + "epoch": 0.42774394772018415, + "grad_norm": 0.9398304224014282, + "learning_rate": 9.572256052279817e-06, + "loss": 0.0796, "step": 2880 }, { - "epoch": 0.21461458488043963, - "grad_norm": 6.860218524932861, - "learning_rate": 2.8712312490717363e-05, - "loss": 0.0852, + "epoch": 0.42922916976087927, + "grad_norm": 0.6901552081108093, + "learning_rate": 9.570770830239121e-06, + "loss": 0.087, "step": 2890 }, { - "epoch": 0.21535719590078717, - "grad_norm": 3.187988758087158, - "learning_rate": 2.8707856824595278e-05, - "loss": 0.112, + "epoch": 0.43071439180157434, + "grad_norm": 1.507688045501709, + "learning_rate": 9.569285608198427e-06, + "loss": 0.0831, "step": 2900 }, { - "epoch": 0.2160998069211347, - "grad_norm": 2.9651613235473633, - "learning_rate": 2.8703401158473193e-05, - "loss": 0.0847, + "epoch": 0.4321996138422694, + "grad_norm": 0.6589367389678955, + "learning_rate": 9.567800386157732e-06, + "loss": 0.0951, "step": 2910 }, { - "epoch": 0.21684241794148226, - "grad_norm": 1.7240506410598755, - "learning_rate": 2.8698945492351108e-05, - "loss": 0.0983, + "epoch": 0.4336848358829645, + "grad_norm": 1.355723261833191, + "learning_rate": 9.566315164117036e-06, + "loss": 0.0674, "step": 2920 }, { - "epoch": 0.2175850289618298, - "grad_norm": 3.074819803237915, - "learning_rate": 2.8694489826229023e-05, - "loss": 0.0944, + "epoch": 0.4351700579236596, + "grad_norm": 1.7150205373764038, + "learning_rate": 9.564829942076342e-06, + "loss": 0.0994, "step": 2930 }, { - "epoch": 0.21832763998217733, - "grad_norm": 4.255871772766113, - "learning_rate": 2.8690034160106938e-05, - "loss": 0.1251, + "epoch": 0.43665527996435466, + "grad_norm": 1.8153376579284668, + "learning_rate": 9.563344720035647e-06, + "loss": 0.1008, "step": 2940 }, { - "epoch": 0.2190702510025249, - "grad_norm": 2.6262733936309814, - "learning_rate": 2.868557849398485e-05, - "loss": 0.0804, + "epoch": 0.4381405020050498, + "grad_norm": 1.1676079034805298, + "learning_rate": 9.561859497994951e-06, + "loss": 0.0993, "step": 2950 }, { - "epoch": 0.21981286202287242, - "grad_norm": 1.9793500900268555, - "learning_rate": 2.8681122827862768e-05, - "loss": 0.1058, + "epoch": 0.43962572404574485, + "grad_norm": 0.7941600680351257, + "learning_rate": 9.560374275954255e-06, + "loss": 0.0837, "step": 2960 }, { - "epoch": 0.22055547304321996, - "grad_norm": 1.1691769361495972, - "learning_rate": 2.867666716174068e-05, - "loss": 0.0838, + "epoch": 0.4411109460864399, + "grad_norm": 0.6376268863677979, + "learning_rate": 9.55888905391356e-06, + "loss": 0.0834, "step": 2970 }, { - "epoch": 0.2212980840635675, - "grad_norm": 2.1811420917510986, - "learning_rate": 2.8672211495618594e-05, - "loss": 0.1095, + "epoch": 0.442596168127135, + "grad_norm": 0.8009418845176697, + "learning_rate": 9.557403831872866e-06, + "loss": 0.076, "step": 2980 }, { - "epoch": 0.22204069508391505, - "grad_norm": 2.311396360397339, - "learning_rate": 2.8667755829496513e-05, - "loss": 0.1164, + "epoch": 0.4440813901678301, + "grad_norm": 0.4576304256916046, + "learning_rate": 9.55591860983217e-06, + "loss": 0.0717, "step": 2990 }, { - "epoch": 0.22278330610426259, - "grad_norm": 5.444539546966553, - "learning_rate": 2.8663300163374424e-05, - "loss": 0.1294, + "epoch": 0.44556661220852517, + "grad_norm": 1.8865007162094116, + "learning_rate": 9.554433387791475e-06, + "loss": 0.0771, "step": 3000 }, { - "epoch": 0.22352591712461012, - "grad_norm": 1.1934783458709717, - "learning_rate": 2.865884449725234e-05, - "loss": 0.1179, + "epoch": 0.44705183424922024, + "grad_norm": 1.5658719539642334, + "learning_rate": 9.552948165750781e-06, + "loss": 0.0944, "step": 3010 }, { - "epoch": 0.22426852814495768, - "grad_norm": 1.7925602197647095, - "learning_rate": 2.8654388831130254e-05, - "loss": 0.144, + "epoch": 0.44853705628991536, + "grad_norm": 0.936982274055481, + "learning_rate": 9.551462943710085e-06, + "loss": 0.0847, "step": 3020 }, { - "epoch": 0.2250111391653052, - "grad_norm": 4.332716941833496, - "learning_rate": 2.864993316500817e-05, - "loss": 0.111, + "epoch": 0.4500222783306104, + "grad_norm": 0.6622723340988159, + "learning_rate": 9.54997772166939e-06, + "loss": 0.0967, "step": 3030 }, { - "epoch": 0.22575375018565275, - "grad_norm": 3.0859615802764893, - "learning_rate": 2.8645477498886084e-05, - "loss": 0.0978, + "epoch": 0.4515075003713055, + "grad_norm": 1.0170260667800903, + "learning_rate": 9.548492499628696e-06, + "loss": 0.0794, "step": 3040 }, { - "epoch": 0.2264963612060003, - "grad_norm": 2.46098256111145, - "learning_rate": 2.8641021832764e-05, - "loss": 0.0836, + "epoch": 0.4529927224120006, + "grad_norm": 0.8602062463760376, + "learning_rate": 9.547007277588e-06, + "loss": 0.0819, "step": 3050 }, { - "epoch": 0.22723897222634784, - "grad_norm": 1.820902705192566, - "learning_rate": 2.8636566166641914e-05, - "loss": 0.0985, + "epoch": 0.4544779444526957, + "grad_norm": 0.7832088470458984, + "learning_rate": 9.545522055547305e-06, + "loss": 0.0935, "step": 3060 }, { - "epoch": 0.22798158324669537, - "grad_norm": 2.86248517036438, - "learning_rate": 2.863211050051983e-05, - "loss": 0.1093, + "epoch": 0.45596316649339075, + "grad_norm": 1.4716272354125977, + "learning_rate": 9.544036833506611e-06, + "loss": 0.0802, "step": 3070 }, { - "epoch": 0.22872419426704294, - "grad_norm": 2.933708429336548, - "learning_rate": 2.862765483439774e-05, - "loss": 0.0901, + "epoch": 0.45744838853408587, + "grad_norm": 0.9476587772369385, + "learning_rate": 9.542551611465914e-06, + "loss": 0.104, "step": 3080 }, { - "epoch": 0.22946680528739047, - "grad_norm": 2.0867459774017334, - "learning_rate": 2.862319916827566e-05, - "loss": 0.0686, + "epoch": 0.45893361057478094, + "grad_norm": 0.751997172832489, + "learning_rate": 9.54106638942522e-06, + "loss": 0.092, "step": 3090 }, { - "epoch": 0.230209416307738, - "grad_norm": 2.3671841621398926, - "learning_rate": 2.8618743502153574e-05, - "loss": 0.118, + "epoch": 0.460418832615476, + "grad_norm": 0.611443817615509, + "learning_rate": 9.539581167384526e-06, + "loss": 0.0791, "step": 3100 }, { - "epoch": 0.23095202732808554, - "grad_norm": 1.118376612663269, - "learning_rate": 2.8614287836031486e-05, - "loss": 0.0853, + "epoch": 0.46190405465617107, + "grad_norm": 1.4013361930847168, + "learning_rate": 9.538095945343829e-06, + "loss": 0.0855, "step": 3110 }, { - "epoch": 0.2316946383484331, - "grad_norm": 3.297832727432251, - "learning_rate": 2.86098321699094e-05, - "loss": 0.0996, + "epoch": 0.4633892766968662, + "grad_norm": 1.0885292291641235, + "learning_rate": 9.536610723303135e-06, + "loss": 0.0968, "step": 3120 }, { - "epoch": 0.23243724936878063, - "grad_norm": 2.1501147747039795, - "learning_rate": 2.860537650378732e-05, - "loss": 0.1381, + "epoch": 0.46487449873756126, + "grad_norm": 0.7723345160484314, + "learning_rate": 9.53512550126244e-06, + "loss": 0.0922, "step": 3130 }, { - "epoch": 0.23317986038912816, - "grad_norm": 0.9489710927009583, - "learning_rate": 2.860092083766523e-05, - "loss": 0.0692, + "epoch": 0.4663597207782563, + "grad_norm": 1.0908517837524414, + "learning_rate": 9.533640279221744e-06, + "loss": 0.0898, "step": 3140 }, { - "epoch": 0.23392247140947572, - "grad_norm": 2.0320940017700195, - "learning_rate": 2.8596465171543146e-05, - "loss": 0.0855, + "epoch": 0.46784494281895145, + "grad_norm": 0.9453380107879639, + "learning_rate": 9.53215505718105e-06, + "loss": 0.0589, "step": 3150 }, { - "epoch": 0.23466508242982326, - "grad_norm": 2.169110059738159, - "learning_rate": 2.859200950542106e-05, - "loss": 0.0809, + "epoch": 0.4693301648596465, + "grad_norm": 1.3652657270431519, + "learning_rate": 9.530669835140354e-06, + "loss": 0.0787, "step": 3160 }, { - "epoch": 0.2354076934501708, - "grad_norm": 3.284989595413208, - "learning_rate": 2.8587553839298976e-05, - "loss": 0.1028, + "epoch": 0.4708153869003416, + "grad_norm": 1.1908072233200073, + "learning_rate": 9.529184613099659e-06, + "loss": 0.0658, "step": 3170 }, { - "epoch": 0.23615030447051835, - "grad_norm": 2.6544220447540283, - "learning_rate": 2.858309817317689e-05, - "loss": 0.115, + "epoch": 0.4723006089410367, + "grad_norm": 0.8739597201347351, + "learning_rate": 9.527699391058965e-06, + "loss": 0.0945, "step": 3180 }, { - "epoch": 0.23689291549086589, - "grad_norm": 1.7478609085083008, - "learning_rate": 2.8578642507054803e-05, - "loss": 0.083, + "epoch": 0.47378583098173177, + "grad_norm": 0.9348416328430176, + "learning_rate": 9.52621416901827e-06, + "loss": 0.1154, "step": 3190 }, { - "epoch": 0.23763552651121342, - "grad_norm": 2.0759472846984863, - "learning_rate": 2.857418684093272e-05, - "loss": 0.1224, + "epoch": 0.47527105302242684, + "grad_norm": 1.416810154914856, + "learning_rate": 9.524728946977574e-06, + "loss": 0.0995, "step": 3200 }, { - "epoch": 0.23837813753156098, - "grad_norm": 2.7815895080566406, - "learning_rate": 2.8569731174810636e-05, - "loss": 0.1166, + "epoch": 0.47675627506312196, + "grad_norm": 0.7512962818145752, + "learning_rate": 9.52324372493688e-06, + "loss": 0.0902, "step": 3210 }, { - "epoch": 0.2391207485519085, - "grad_norm": 3.542616367340088, - "learning_rate": 2.8565275508688548e-05, - "loss": 0.1009, + "epoch": 0.478241497103817, + "grad_norm": 1.0729320049285889, + "learning_rate": 9.521758502896184e-06, + "loss": 0.1031, "step": 3220 }, { - "epoch": 0.23986335957225605, - "grad_norm": 1.8111937046051025, - "learning_rate": 2.8560819842566466e-05, - "loss": 0.099, + "epoch": 0.4797267191445121, + "grad_norm": 0.9648675322532654, + "learning_rate": 9.520273280855489e-06, + "loss": 0.0823, "step": 3230 }, { - "epoch": 0.2406059705926036, - "grad_norm": 1.9494497776031494, - "learning_rate": 2.855636417644438e-05, - "loss": 0.0787, + "epoch": 0.4812119411852072, + "grad_norm": 1.019823670387268, + "learning_rate": 9.518788058814795e-06, + "loss": 0.0827, "step": 3240 }, { - "epoch": 0.24134858161295114, - "grad_norm": 1.57643461227417, - "learning_rate": 2.8551908510322293e-05, - "loss": 0.0731, + "epoch": 0.4826971632259023, + "grad_norm": 0.8790899515151978, + "learning_rate": 9.517302836774098e-06, + "loss": 0.0521, "step": 3250 }, { - "epoch": 0.24209119263329867, - "grad_norm": 1.140007495880127, - "learning_rate": 2.8547452844200208e-05, - "loss": 0.0824, + "epoch": 0.48418238526659735, + "grad_norm": 1.2139712572097778, + "learning_rate": 9.515817614733404e-06, + "loss": 0.0806, "step": 3260 }, { - "epoch": 0.2428338036536462, - "grad_norm": 4.138311386108398, - "learning_rate": 2.8542997178078123e-05, - "loss": 0.1063, + "epoch": 0.4856676073072924, + "grad_norm": 2.080587148666382, + "learning_rate": 9.514332392692708e-06, + "loss": 0.1032, "step": 3270 }, { - "epoch": 0.24357641467399377, - "grad_norm": 3.1349868774414062, - "learning_rate": 2.8538541511956038e-05, - "loss": 0.1078, + "epoch": 0.48715282934798754, + "grad_norm": 1.7146954536437988, + "learning_rate": 9.512847170652013e-06, + "loss": 0.0949, "step": 3280 }, { - "epoch": 0.2443190256943413, - "grad_norm": 1.922900676727295, - "learning_rate": 2.8534085845833953e-05, - "loss": 0.0801, + "epoch": 0.4886380513886826, + "grad_norm": 0.7744117379188538, + "learning_rate": 9.511361948611319e-06, + "loss": 0.0848, "step": 3290 }, { - "epoch": 0.24506163671468884, - "grad_norm": 3.57891583442688, - "learning_rate": 2.8529630179711868e-05, - "loss": 0.0956, + "epoch": 0.49012327342937767, + "grad_norm": 1.0753897428512573, + "learning_rate": 9.509876726570623e-06, + "loss": 0.0549, "step": 3300 }, { - "epoch": 0.2458042477350364, - "grad_norm": 1.5893707275390625, - "learning_rate": 2.8525174513589783e-05, - "loss": 0.0762, + "epoch": 0.4916084954700728, + "grad_norm": 0.5599222779273987, + "learning_rate": 9.508391504529928e-06, + "loss": 0.0696, "step": 3310 }, { - "epoch": 0.24654685875538393, - "grad_norm": 4.745431423187256, - "learning_rate": 2.8520718847467698e-05, - "loss": 0.0812, + "epoch": 0.49309371751076786, + "grad_norm": 1.2357051372528076, + "learning_rate": 9.506906282489234e-06, + "loss": 0.1067, "step": 3320 }, { - "epoch": 0.24728946977573146, - "grad_norm": 1.915309190750122, - "learning_rate": 2.8516263181345613e-05, - "loss": 0.0879, + "epoch": 0.4945789395514629, + "grad_norm": 1.6787092685699463, + "learning_rate": 9.505421060448538e-06, + "loss": 0.08, "step": 3330 }, { - "epoch": 0.24803208079607902, - "grad_norm": 2.0146467685699463, - "learning_rate": 2.8511807515223528e-05, - "loss": 0.1033, + "epoch": 0.49606416159215805, + "grad_norm": 1.1719763278961182, + "learning_rate": 9.503935838407843e-06, + "loss": 0.0862, "step": 3340 }, { - "epoch": 0.24877469181642656, - "grad_norm": 1.623887300491333, - "learning_rate": 2.8507351849101443e-05, - "loss": 0.0813, + "epoch": 0.4975493836328531, + "grad_norm": 0.7013140916824341, + "learning_rate": 9.502450616367149e-06, + "loss": 0.0837, "step": 3350 }, { - "epoch": 0.2495173028367741, - "grad_norm": 0.7771584987640381, - "learning_rate": 2.8502896182979354e-05, - "loss": 0.0881, + "epoch": 0.4990346056735482, + "grad_norm": 1.0084935426712036, + "learning_rate": 9.500965394326453e-06, + "loss": 0.0825, "step": 3360 }, { - "epoch": 0.25025991385712165, - "grad_norm": 2.778308868408203, - "learning_rate": 2.8498440516857272e-05, - "loss": 0.0786, + "epoch": 0.5005198277142433, + "grad_norm": 0.7926396727561951, + "learning_rate": 9.499480172285757e-06, + "loss": 0.081, "step": 3370 }, { - "epoch": 0.25100252487746916, - "grad_norm": 1.1731817722320557, - "learning_rate": 2.8493984850735184e-05, - "loss": 0.1235, + "epoch": 0.5020050497549383, + "grad_norm": 0.9179475903511047, + "learning_rate": 9.497994950245062e-06, + "loss": 0.1042, "step": 3380 }, { - "epoch": 0.2517451358978167, - "grad_norm": 2.099097967147827, - "learning_rate": 2.84895291846131e-05, - "loss": 0.11, + "epoch": 0.5034902717956334, + "grad_norm": 1.2699346542358398, + "learning_rate": 9.496509728204368e-06, + "loss": 0.0753, "step": 3390 }, { - "epoch": 0.2524877469181643, - "grad_norm": 1.712109923362732, - "learning_rate": 2.8485073518491017e-05, - "loss": 0.0873, + "epoch": 0.5049754938363286, + "grad_norm": 1.430041790008545, + "learning_rate": 9.495024506163672e-06, + "loss": 0.0838, "step": 3400 }, { - "epoch": 0.2532303579385118, - "grad_norm": 1.978943943977356, - "learning_rate": 2.848061785236893e-05, - "loss": 0.0906, + "epoch": 0.5064607158770236, + "grad_norm": 0.9805778861045837, + "learning_rate": 9.493539284122977e-06, + "loss": 0.1068, "step": 3410 }, { - "epoch": 0.25397296895885935, - "grad_norm": 2.0468902587890625, - "learning_rate": 2.8476162186246844e-05, - "loss": 0.1062, + "epoch": 0.5079459379177187, + "grad_norm": 1.0353554487228394, + "learning_rate": 9.492054062082281e-06, + "loss": 0.0841, "step": 3420 }, { - "epoch": 0.2547155799792069, - "grad_norm": 1.208884358406067, - "learning_rate": 2.847170652012476e-05, - "loss": 0.1014, + "epoch": 0.5094311599584138, + "grad_norm": 0.7422654032707214, + "learning_rate": 9.490568840041587e-06, + "loss": 0.0889, "step": 3430 }, { - "epoch": 0.2554581909995544, - "grad_norm": 2.65171217918396, - "learning_rate": 2.8467250854002674e-05, - "loss": 0.1386, + "epoch": 0.5109163819991088, + "grad_norm": 1.0225118398666382, + "learning_rate": 9.489083618000892e-06, + "loss": 0.1067, "step": 3440 }, { - "epoch": 0.256200802019902, - "grad_norm": 1.2456876039505005, - "learning_rate": 2.846279518788059e-05, - "loss": 0.0859, + "epoch": 0.512401604039804, + "grad_norm": 1.0774625539779663, + "learning_rate": 9.487598395960196e-06, + "loss": 0.0737, "step": 3450 }, { - "epoch": 0.25694341304024954, - "grad_norm": 1.7401740550994873, - "learning_rate": 2.8458339521758504e-05, - "loss": 0.1033, + "epoch": 0.5138868260804991, + "grad_norm": 1.3356751203536987, + "learning_rate": 9.486113173919502e-06, + "loss": 0.0927, "step": 3460 }, { - "epoch": 0.25768602406059704, - "grad_norm": 5.769093990325928, - "learning_rate": 2.845388385563642e-05, - "loss": 0.1029, + "epoch": 0.5153720481211941, + "grad_norm": 0.96246337890625, + "learning_rate": 9.484627951878807e-06, + "loss": 0.0977, "step": 3470 }, { - "epoch": 0.2584286350809446, - "grad_norm": 2.1862595081329346, - "learning_rate": 2.8449428189514334e-05, - "loss": 0.082, + "epoch": 0.5168572701618892, + "grad_norm": 0.9139418005943298, + "learning_rate": 9.483142729838111e-06, + "loss": 0.0744, "step": 3480 }, { - "epoch": 0.25917124610129216, - "grad_norm": 4.852025985717773, - "learning_rate": 2.8444972523392246e-05, - "loss": 0.0956, + "epoch": 0.5183424922025843, + "grad_norm": 1.0638551712036133, + "learning_rate": 9.481657507797416e-06, + "loss": 0.0709, "step": 3490 }, { - "epoch": 0.25991385712163967, - "grad_norm": 2.4434781074523926, - "learning_rate": 2.8440516857270164e-05, - "loss": 0.108, + "epoch": 0.5198277142432793, + "grad_norm": 1.449756145477295, + "learning_rate": 9.480172285756722e-06, + "loss": 0.0563, "step": 3500 }, { - "epoch": 0.26065646814198723, - "grad_norm": 2.209559679031372, - "learning_rate": 2.843606119114808e-05, - "loss": 0.1083, + "epoch": 0.5213129362839745, + "grad_norm": 1.2065516710281372, + "learning_rate": 9.478687063716026e-06, + "loss": 0.0764, "step": 3510 }, { - "epoch": 0.2613990791623348, - "grad_norm": 3.44124698638916, - "learning_rate": 2.843160552502599e-05, - "loss": 0.0981, + "epoch": 0.5227981583246696, + "grad_norm": 0.9641933441162109, + "learning_rate": 9.47720184167533e-06, + "loss": 0.0422, "step": 3520 }, { - "epoch": 0.2621416901826823, - "grad_norm": 3.689404249191284, - "learning_rate": 2.8427149858903906e-05, - "loss": 0.0863, + "epoch": 0.5242833803653646, + "grad_norm": 0.8296038508415222, + "learning_rate": 9.475716619634637e-06, + "loss": 0.0891, "step": 3530 }, { - "epoch": 0.26288430120302986, - "grad_norm": 1.4514044523239136, - "learning_rate": 2.8422694192781824e-05, - "loss": 0.0854, + "epoch": 0.5257686024060597, + "grad_norm": 1.7687201499938965, + "learning_rate": 9.474231397593941e-06, + "loss": 0.0801, "step": 3540 }, { - "epoch": 0.2636269122233774, - "grad_norm": 1.8752799034118652, - "learning_rate": 2.8418238526659736e-05, - "loss": 0.0775, + "epoch": 0.5272538244467548, + "grad_norm": 1.2099858522415161, + "learning_rate": 9.472746175553246e-06, + "loss": 0.0918, "step": 3550 }, { - "epoch": 0.2643695232437249, - "grad_norm": 2.1504430770874023, - "learning_rate": 2.841378286053765e-05, - "loss": 0.0839, + "epoch": 0.5287390464874498, + "grad_norm": 1.1296342611312866, + "learning_rate": 9.47126095351255e-06, + "loss": 0.0749, "step": 3560 }, { - "epoch": 0.2651121342640725, - "grad_norm": 3.2270238399505615, - "learning_rate": 2.840932719441557e-05, - "loss": 0.1367, + "epoch": 0.530224268528145, + "grad_norm": 0.9852902889251709, + "learning_rate": 9.469775731471856e-06, + "loss": 0.0759, "step": 3570 }, { - "epoch": 0.26585474528442005, - "grad_norm": 2.0077528953552246, - "learning_rate": 2.840487152829348e-05, - "loss": 0.0887, + "epoch": 0.5317094905688401, + "grad_norm": 0.7065750956535339, + "learning_rate": 9.46829050943116e-06, + "loss": 0.0904, "step": 3580 }, { - "epoch": 0.26659735630476755, - "grad_norm": 1.6168723106384277, - "learning_rate": 2.8400415862171396e-05, - "loss": 0.1324, + "epoch": 0.5331947126095351, + "grad_norm": 0.8571629524230957, + "learning_rate": 9.466805287390465e-06, + "loss": 0.0968, "step": 3590 }, { - "epoch": 0.2673399673251151, - "grad_norm": 1.800391674041748, - "learning_rate": 2.8395960196049307e-05, - "loss": 0.1256, + "epoch": 0.5346799346502302, + "grad_norm": 1.2015560865402222, + "learning_rate": 9.46532006534977e-06, + "loss": 0.083, "step": 3600 }, { - "epoch": 0.2680825783454627, - "grad_norm": 1.1540509462356567, - "learning_rate": 2.8391504529927226e-05, - "loss": 0.0888, + "epoch": 0.5361651566909253, + "grad_norm": 0.9257380962371826, + "learning_rate": 9.463834843309076e-06, + "loss": 0.0751, "step": 3610 }, { - "epoch": 0.2688251893658102, - "grad_norm": 1.3013066053390503, - "learning_rate": 2.838704886380514e-05, - "loss": 0.096, + "epoch": 0.5376503787316204, + "grad_norm": 0.8309530019760132, + "learning_rate": 9.46234962126838e-06, + "loss": 0.0798, "step": 3620 }, { - "epoch": 0.26956780038615774, - "grad_norm": 1.9634844064712524, - "learning_rate": 2.8382593197683052e-05, - "loss": 0.0817, + "epoch": 0.5391356007723155, + "grad_norm": 0.9568300247192383, + "learning_rate": 9.460864399227685e-06, + "loss": 0.0806, "step": 3630 }, { - "epoch": 0.27031041140650525, - "grad_norm": 2.515450954437256, - "learning_rate": 2.837813753156097e-05, - "loss": 0.0909, + "epoch": 0.5406208228130105, + "grad_norm": 1.2841296195983887, + "learning_rate": 9.45937917718699e-06, + "loss": 0.0785, "step": 3640 }, { - "epoch": 0.2710530224268528, - "grad_norm": 2.612504482269287, - "learning_rate": 2.8373681865438886e-05, - "loss": 0.1121, + "epoch": 0.5421060448537056, + "grad_norm": 1.0622037649154663, + "learning_rate": 9.457893955146295e-06, + "loss": 0.0866, "step": 3650 }, { - "epoch": 0.27179563344720037, - "grad_norm": 1.4512356519699097, - "learning_rate": 2.8369226199316797e-05, - "loss": 0.0939, + "epoch": 0.5435912668944007, + "grad_norm": 1.3247896432876587, + "learning_rate": 9.4564087331056e-06, + "loss": 0.1107, "step": 3660 }, { - "epoch": 0.2725382444675479, - "grad_norm": 2.2824881076812744, - "learning_rate": 2.8364770533194712e-05, - "loss": 0.1103, + "epoch": 0.5450764889350957, + "grad_norm": 1.0009887218475342, + "learning_rate": 9.454923511064906e-06, + "loss": 0.0863, "step": 3670 }, { - "epoch": 0.27328085548789544, - "grad_norm": 1.5197831392288208, - "learning_rate": 2.8360314867072627e-05, - "loss": 0.1198, + "epoch": 0.5465617109757909, + "grad_norm": 0.9625425338745117, + "learning_rate": 9.45343828902421e-06, + "loss": 0.0836, "step": 3680 }, { - "epoch": 0.274023466508243, - "grad_norm": 1.559735894203186, - "learning_rate": 2.8355859200950542e-05, - "loss": 0.1381, + "epoch": 0.548046933016486, + "grad_norm": 0.9985396862030029, + "learning_rate": 9.451953066983515e-06, + "loss": 0.0939, "step": 3690 }, { - "epoch": 0.2747660775285905, - "grad_norm": 1.8886692523956299, - "learning_rate": 2.8351403534828457e-05, - "loss": 0.0942, + "epoch": 0.549532155057181, + "grad_norm": 0.8877044320106506, + "learning_rate": 9.45046784494282e-06, + "loss": 0.0937, "step": 3700 }, { - "epoch": 0.27550868854893806, - "grad_norm": 1.677405834197998, - "learning_rate": 2.8346947868706372e-05, - "loss": 0.0857, + "epoch": 0.5510173770978761, + "grad_norm": 0.5919630527496338, + "learning_rate": 9.448982622902123e-06, + "loss": 0.0698, "step": 3710 }, { - "epoch": 0.2762512995692856, - "grad_norm": 1.0931998491287231, - "learning_rate": 2.8342492202584287e-05, - "loss": 0.1002, + "epoch": 0.5525025991385712, + "grad_norm": 1.2577953338623047, + "learning_rate": 9.44749740086143e-06, + "loss": 0.0818, "step": 3720 }, { - "epoch": 0.27699391058963313, - "grad_norm": 2.140795946121216, - "learning_rate": 2.8338036536462202e-05, - "loss": 0.1144, + "epoch": 0.5539878211792663, + "grad_norm": 0.9300584197044373, + "learning_rate": 9.446012178820734e-06, + "loss": 0.0927, "step": 3730 }, { - "epoch": 0.2777365216099807, - "grad_norm": 1.8325400352478027, - "learning_rate": 2.8333580870340117e-05, - "loss": 0.0829, + "epoch": 0.5554730432199614, + "grad_norm": 0.8681656122207642, + "learning_rate": 9.444526956780038e-06, + "loss": 0.1092, "step": 3740 }, { - "epoch": 0.27847913263032825, - "grad_norm": 2.1785285472869873, - "learning_rate": 2.8329125204218032e-05, - "loss": 0.0643, + "epoch": 0.5569582652606565, + "grad_norm": 1.0126725435256958, + "learning_rate": 9.443041734739345e-06, + "loss": 0.1075, "step": 3750 }, { - "epoch": 0.27922174365067576, - "grad_norm": 2.3438045978546143, - "learning_rate": 2.8324669538095947e-05, - "loss": 0.0871, + "epoch": 0.5584434873013515, + "grad_norm": 0.7265552282333374, + "learning_rate": 9.441556512698649e-06, + "loss": 0.0793, "step": 3760 }, { - "epoch": 0.2799643546710233, - "grad_norm": 2.866464853286743, - "learning_rate": 2.832021387197386e-05, - "loss": 0.0718, + "epoch": 0.5599287093420466, + "grad_norm": 1.078549861907959, + "learning_rate": 9.440071290657953e-06, + "loss": 0.1038, "step": 3770 }, { - "epoch": 0.2807069656913709, - "grad_norm": 1.4197877645492554, - "learning_rate": 2.8315758205851777e-05, - "loss": 0.1264, + "epoch": 0.5614139313827418, + "grad_norm": 0.6347576379776001, + "learning_rate": 9.43858606861726e-06, + "loss": 0.0883, "step": 3780 }, { - "epoch": 0.2814495767117184, - "grad_norm": 4.769101142883301, - "learning_rate": 2.831130253972969e-05, - "loss": 0.0845, + "epoch": 0.5628991534234368, + "grad_norm": 0.949237048625946, + "learning_rate": 9.437100846576564e-06, + "loss": 0.1049, "step": 3790 }, { - "epoch": 0.28219218773206595, - "grad_norm": 1.278130292892456, - "learning_rate": 2.8306846873607604e-05, - "loss": 0.0915, + "epoch": 0.5643843754641319, + "grad_norm": 0.5857681035995483, + "learning_rate": 9.435615624535868e-06, + "loss": 0.0767, "step": 3800 }, { - "epoch": 0.2829347987524135, - "grad_norm": 2.7825405597686768, - "learning_rate": 2.8302391207485522e-05, - "loss": 0.1035, + "epoch": 0.565869597504827, + "grad_norm": 0.973402202129364, + "learning_rate": 9.434130402495175e-06, + "loss": 0.0967, "step": 3810 }, { - "epoch": 0.283677409772761, - "grad_norm": 3.6590402126312256, - "learning_rate": 2.8297935541363434e-05, - "loss": 0.0821, + "epoch": 0.567354819545522, + "grad_norm": 0.8165373206138611, + "learning_rate": 9.432645180454479e-06, + "loss": 0.0912, "step": 3820 }, { - "epoch": 0.2844200207931086, - "grad_norm": 3.2565736770629883, - "learning_rate": 2.829347987524135e-05, - "loss": 0.097, + "epoch": 0.5688400415862171, + "grad_norm": 1.0625662803649902, + "learning_rate": 9.431159958413783e-06, + "loss": 0.0714, "step": 3830 }, { - "epoch": 0.28516263181345614, - "grad_norm": 1.7409720420837402, - "learning_rate": 2.8289024209119264e-05, - "loss": 0.0929, + "epoch": 0.5703252636269123, + "grad_norm": 0.6603330373764038, + "learning_rate": 9.42967473637309e-06, + "loss": 0.0694, "step": 3840 }, { - "epoch": 0.28590524283380364, - "grad_norm": 2.9615607261657715, - "learning_rate": 2.828456854299718e-05, - "loss": 0.0771, + "epoch": 0.5718104856676073, + "grad_norm": 1.0427000522613525, + "learning_rate": 9.428189514332392e-06, + "loss": 0.0961, "step": 3850 }, { - "epoch": 0.2866478538541512, - "grad_norm": 2.6329636573791504, - "learning_rate": 2.8280112876875094e-05, - "loss": 0.1215, + "epoch": 0.5732957077083024, + "grad_norm": 1.2957814931869507, + "learning_rate": 9.426704292291698e-06, + "loss": 0.0879, "step": 3860 }, { - "epoch": 0.28739046487449876, - "grad_norm": 1.5111801624298096, - "learning_rate": 2.827565721075301e-05, - "loss": 0.0735, + "epoch": 0.5747809297489975, + "grad_norm": 0.7232264280319214, + "learning_rate": 9.425219070251005e-06, + "loss": 0.067, "step": 3870 }, { - "epoch": 0.28813307589484627, - "grad_norm": 2.780776262283325, - "learning_rate": 2.8271201544630924e-05, - "loss": 0.0969, + "epoch": 0.5762661517896925, + "grad_norm": 1.0114610195159912, + "learning_rate": 9.423733848210307e-06, + "loss": 0.1031, "step": 3880 }, { - "epoch": 0.28887568691519383, - "grad_norm": 1.8121346235275269, - "learning_rate": 2.826674587850884e-05, - "loss": 0.1015, + "epoch": 0.5777513738303877, + "grad_norm": 1.2267494201660156, + "learning_rate": 9.422248626169613e-06, + "loss": 0.0863, "step": 3890 }, { - "epoch": 0.2896182979355414, - "grad_norm": 1.4083514213562012, - "learning_rate": 2.826229021238675e-05, - "loss": 0.066, + "epoch": 0.5792365958710828, + "grad_norm": 0.9242755174636841, + "learning_rate": 9.420763404128918e-06, + "loss": 0.0846, "step": 3900 }, { - "epoch": 0.2903609089558889, - "grad_norm": 2.5285115242004395, - "learning_rate": 2.825783454626467e-05, - "loss": 0.0923, + "epoch": 0.5807218179117778, + "grad_norm": 0.7261422872543335, + "learning_rate": 9.419278182088222e-06, + "loss": 0.0708, "step": 3910 }, { - "epoch": 0.29110351997623646, - "grad_norm": 0.7836059927940369, - "learning_rate": 2.8253378880142584e-05, - "loss": 0.0506, + "epoch": 0.5822070399524729, + "grad_norm": 0.7982576489448547, + "learning_rate": 9.417792960047528e-06, + "loss": 0.0981, "step": 3920 }, { - "epoch": 0.29184613099658396, - "grad_norm": 1.5895808935165405, - "learning_rate": 2.8248923214020495e-05, - "loss": 0.0886, + "epoch": 0.5836922619931679, + "grad_norm": 1.1539570093154907, + "learning_rate": 9.416307738006833e-06, + "loss": 0.084, "step": 3930 }, { - "epoch": 0.2925887420169315, - "grad_norm": 1.657165288925171, - "learning_rate": 2.824446754789841e-05, - "loss": 0.0845, + "epoch": 0.585177484033863, + "grad_norm": 0.8333094716072083, + "learning_rate": 9.414822515966137e-06, + "loss": 0.0766, "step": 3940 }, { - "epoch": 0.2933313530372791, - "grad_norm": 1.5813052654266357, - "learning_rate": 2.824001188177633e-05, - "loss": 0.1015, + "epoch": 0.5866627060745582, + "grad_norm": 0.9388213753700256, + "learning_rate": 9.413337293925443e-06, + "loss": 0.0733, "step": 3950 }, { - "epoch": 0.2940739640576266, - "grad_norm": 2.2893810272216797, - "learning_rate": 2.823555621565424e-05, - "loss": 0.1201, + "epoch": 0.5881479281152532, + "grad_norm": 0.8660235404968262, + "learning_rate": 9.411852071884748e-06, + "loss": 0.091, "step": 3960 }, { - "epoch": 0.29481657507797415, - "grad_norm": 3.8998055458068848, - "learning_rate": 2.8231100549532155e-05, - "loss": 0.1317, + "epoch": 0.5896331501559483, + "grad_norm": 1.156136155128479, + "learning_rate": 9.410366849844052e-06, + "loss": 0.0848, "step": 3970 }, { - "epoch": 0.2955591860983217, - "grad_norm": 1.5163902044296265, - "learning_rate": 2.8226644883410074e-05, - "loss": 0.1226, + "epoch": 0.5911183721966434, + "grad_norm": 1.7612046003341675, + "learning_rate": 9.408881627803358e-06, + "loss": 0.0893, "step": 3980 }, { - "epoch": 0.2963017971186692, - "grad_norm": 2.5356316566467285, - "learning_rate": 2.8222189217287985e-05, - "loss": 0.1257, + "epoch": 0.5926035942373384, + "grad_norm": 0.9188507199287415, + "learning_rate": 9.407396405762663e-06, + "loss": 0.0916, "step": 3990 }, { - "epoch": 0.2970444081390168, - "grad_norm": 0.5978565216064453, - "learning_rate": 2.82177335511659e-05, - "loss": 0.0741, + "epoch": 0.5940888162780336, + "grad_norm": 0.682065486907959, + "learning_rate": 9.405911183721967e-06, + "loss": 0.0862, "step": 4000 }, { - "epoch": 0.29778701915936434, - "grad_norm": 3.2044990062713623, - "learning_rate": 2.8213277885043812e-05, - "loss": 0.0927, + "epoch": 0.5955740383187287, + "grad_norm": 0.9579351544380188, + "learning_rate": 9.404425961681272e-06, + "loss": 0.0782, "step": 4010 }, { - "epoch": 0.29852963017971185, - "grad_norm": 2.5349199771881104, - "learning_rate": 2.820882221892173e-05, - "loss": 0.1082, + "epoch": 0.5970592603594237, + "grad_norm": 0.5488564372062683, + "learning_rate": 9.402940739640576e-06, + "loss": 0.0868, "step": 4020 }, { - "epoch": 0.2992722412000594, - "grad_norm": 2.297657012939453, - "learning_rate": 2.8204366552799645e-05, - "loss": 0.1034, + "epoch": 0.5985444824001188, + "grad_norm": 0.4922982156276703, + "learning_rate": 9.401455517599882e-06, + "loss": 0.079, "step": 4030 }, { - "epoch": 0.30001485222040697, - "grad_norm": 2.956207036972046, - "learning_rate": 2.8199910886677557e-05, - "loss": 0.0724, + "epoch": 0.6000297044408139, + "grad_norm": 0.7463983297348022, + "learning_rate": 9.399970295559187e-06, + "loss": 0.0917, "step": 4040 }, { - "epoch": 0.3007574632407545, - "grad_norm": 2.382066488265991, - "learning_rate": 2.8195455220555475e-05, - "loss": 0.0734, + "epoch": 0.601514926481509, + "grad_norm": 0.34033700823783875, + "learning_rate": 9.398485073518491e-06, + "loss": 0.069, "step": 4050 }, { - "epoch": 0.30150007426110204, - "grad_norm": 2.7788658142089844, - "learning_rate": 2.819099955443339e-05, - "loss": 0.1129, + "epoch": 0.6030001485222041, + "grad_norm": 0.8693416118621826, + "learning_rate": 9.396999851477797e-06, + "loss": 0.0917, "step": 4060 }, { - "epoch": 0.3022426852814496, - "grad_norm": 1.6891690492630005, - "learning_rate": 2.8186543888311302e-05, - "loss": 0.1168, + "epoch": 0.6044853705628992, + "grad_norm": 0.6260632276535034, + "learning_rate": 9.395514629437102e-06, + "loss": 0.0601, "step": 4070 }, { - "epoch": 0.3029852963017971, - "grad_norm": 2.3250083923339844, - "learning_rate": 2.8182088222189217e-05, - "loss": 0.0998, + "epoch": 0.6059705926035942, + "grad_norm": 1.0476924180984497, + "learning_rate": 9.394029407396406e-06, + "loss": 0.1001, "step": 4080 }, { - "epoch": 0.30372790732214466, - "grad_norm": 2.700108766555786, - "learning_rate": 2.8177632556067135e-05, - "loss": 0.1129, + "epoch": 0.6074558146442893, + "grad_norm": 1.490868091583252, + "learning_rate": 9.392544185355712e-06, + "loss": 0.0955, "step": 4090 }, { - "epoch": 0.3044705183424922, - "grad_norm": 2.239126443862915, - "learning_rate": 2.8173176889945047e-05, - "loss": 0.1041, + "epoch": 0.6089410366849844, + "grad_norm": 0.8268522620201111, + "learning_rate": 9.391058963315017e-06, + "loss": 0.0735, "step": 4100 }, { - "epoch": 0.30521312936283973, - "grad_norm": 2.046869993209839, - "learning_rate": 2.8168721223822962e-05, - "loss": 0.1011, + "epoch": 0.6104262587256795, + "grad_norm": 0.732266366481781, + "learning_rate": 9.389573741274321e-06, + "loss": 0.0884, "step": 4110 }, { - "epoch": 0.3059557403831873, - "grad_norm": 2.6533050537109375, - "learning_rate": 2.8164265557700877e-05, - "loss": 0.0722, + "epoch": 0.6119114807663746, + "grad_norm": 1.1488885879516602, + "learning_rate": 9.388088519233626e-06, + "loss": 0.0823, "step": 4120 }, { - "epoch": 0.30669835140353485, - "grad_norm": 1.3280346393585205, - "learning_rate": 2.8159809891578792e-05, - "loss": 0.0925, + "epoch": 0.6133967028070697, + "grad_norm": 1.073241114616394, + "learning_rate": 9.386603297192932e-06, + "loss": 0.0813, "step": 4130 }, { - "epoch": 0.30744096242388236, - "grad_norm": 1.894659161567688, - "learning_rate": 2.8155354225456707e-05, - "loss": 0.0688, + "epoch": 0.6148819248477647, + "grad_norm": 0.7972573041915894, + "learning_rate": 9.385118075152236e-06, + "loss": 0.0698, "step": 4140 }, { - "epoch": 0.3081835734442299, - "grad_norm": 1.138370394706726, - "learning_rate": 2.8150898559334622e-05, - "loss": 0.085, + "epoch": 0.6163671468884598, + "grad_norm": 1.5724502801895142, + "learning_rate": 9.38363285311154e-06, + "loss": 0.0873, "step": 4150 }, { - "epoch": 0.3089261844645775, - "grad_norm": 2.353771686553955, - "learning_rate": 2.8146442893212537e-05, - "loss": 0.1051, + "epoch": 0.617852368929155, + "grad_norm": 0.7835360169410706, + "learning_rate": 9.382147631070847e-06, + "loss": 0.0609, "step": 4160 }, { - "epoch": 0.309668795484925, - "grad_norm": 1.1877645254135132, - "learning_rate": 2.8141987227090452e-05, - "loss": 0.1023, + "epoch": 0.61933759096985, + "grad_norm": 1.0752160549163818, + "learning_rate": 9.380662409030151e-06, + "loss": 0.0866, "step": 4170 }, { - "epoch": 0.31041140650527255, - "grad_norm": 1.905053973197937, - "learning_rate": 2.8137531560968363e-05, - "loss": 0.1233, + "epoch": 0.6208228130105451, + "grad_norm": 1.1483020782470703, + "learning_rate": 9.379177186989456e-06, + "loss": 0.0835, "step": 4180 }, { - "epoch": 0.3111540175256201, - "grad_norm": 2.760115385055542, - "learning_rate": 2.813307589484628e-05, - "loss": 0.1237, + "epoch": 0.6223080350512402, + "grad_norm": 0.9119643568992615, + "learning_rate": 9.37769196494876e-06, + "loss": 0.0961, "step": 4190 }, { - "epoch": 0.3118966285459676, - "grad_norm": 2.511549711227417, - "learning_rate": 2.8128620228724193e-05, - "loss": 0.0922, + "epoch": 0.6237932570919352, + "grad_norm": 0.8819010853767395, + "learning_rate": 9.376206742908064e-06, + "loss": 0.0834, "step": 4200 }, { - "epoch": 0.3126392395663152, - "grad_norm": 1.948473572731018, - "learning_rate": 2.8124164562602108e-05, - "loss": 0.0985, + "epoch": 0.6252784791326303, + "grad_norm": 0.7143545746803284, + "learning_rate": 9.37472152086737e-06, + "loss": 0.0901, "step": 4210 }, { - "epoch": 0.3133818505866627, - "grad_norm": 3.190645456314087, - "learning_rate": 2.8119708896480027e-05, - "loss": 0.106, + "epoch": 0.6267637011733254, + "grad_norm": 0.913231372833252, + "learning_rate": 9.373236298826675e-06, + "loss": 0.074, "step": 4220 }, { - "epoch": 0.31412446160701024, - "grad_norm": 2.2379205226898193, - "learning_rate": 2.8115253230357938e-05, - "loss": 0.0896, + "epoch": 0.6282489232140205, + "grad_norm": 2.2083752155303955, + "learning_rate": 9.37175107678598e-06, + "loss": 0.1012, "step": 4230 }, { - "epoch": 0.3148670726273578, - "grad_norm": 1.1914069652557373, - "learning_rate": 2.8110797564235853e-05, - "loss": 0.0659, + "epoch": 0.6297341452547156, + "grad_norm": 1.3217167854309082, + "learning_rate": 9.370265854745286e-06, + "loss": 0.1131, "step": 4240 }, { - "epoch": 0.3156096836477053, - "grad_norm": 1.5502461194992065, - "learning_rate": 2.8106341898113768e-05, - "loss": 0.0995, + "epoch": 0.6312193672954106, + "grad_norm": 0.7895593643188477, + "learning_rate": 9.36878063270459e-06, + "loss": 0.0775, "step": 4250 }, { - "epoch": 0.31635229466805287, - "grad_norm": 2.563169240951538, - "learning_rate": 2.8101886231991683e-05, - "loss": 0.0967, + "epoch": 0.6327045893361057, + "grad_norm": 0.6688397526741028, + "learning_rate": 9.367295410663894e-06, + "loss": 0.0823, "step": 4260 }, { - "epoch": 0.31709490568840043, - "grad_norm": 4.562102317810059, - "learning_rate": 2.8097430565869598e-05, - "loss": 0.107, + "epoch": 0.6341898113768009, + "grad_norm": 0.959603488445282, + "learning_rate": 9.3658101886232e-06, + "loss": 0.0641, "step": 4270 }, { - "epoch": 0.31783751670874794, - "grad_norm": 0.7943652868270874, - "learning_rate": 2.8092974899747513e-05, - "loss": 0.0811, + "epoch": 0.6356750334174959, + "grad_norm": 0.7541965842247009, + "learning_rate": 9.364324966582505e-06, + "loss": 0.0808, "step": 4280 }, { - "epoch": 0.3185801277290955, - "grad_norm": 2.5280022621154785, - "learning_rate": 2.8088519233625428e-05, - "loss": 0.0628, + "epoch": 0.637160255458191, + "grad_norm": 1.5787636041641235, + "learning_rate": 9.36283974454181e-06, + "loss": 0.0824, "step": 4290 }, { - "epoch": 0.31932273874944306, - "grad_norm": 1.1994893550872803, - "learning_rate": 2.8084063567503343e-05, - "loss": 0.0747, + "epoch": 0.6386454774988861, + "grad_norm": 1.1910892724990845, + "learning_rate": 9.361354522501116e-06, + "loss": 0.0828, "step": 4300 }, { - "epoch": 0.32006534976979056, - "grad_norm": 2.5964338779449463, - "learning_rate": 2.8079607901381255e-05, - "loss": 0.0978, + "epoch": 0.6401306995395811, + "grad_norm": 1.3543577194213867, + "learning_rate": 9.359869300460418e-06, + "loss": 0.0843, "step": 4310 }, { - "epoch": 0.3208079607901381, - "grad_norm": 1.0539716482162476, - "learning_rate": 2.8075152235259173e-05, - "loss": 0.1243, + "epoch": 0.6416159215802762, + "grad_norm": 0.8576405048370361, + "learning_rate": 9.358384078419724e-06, + "loss": 0.088, "step": 4320 }, { - "epoch": 0.3215505718104857, - "grad_norm": 3.5578460693359375, - "learning_rate": 2.8070696569137088e-05, - "loss": 0.1073, + "epoch": 0.6431011436209714, + "grad_norm": 0.8443158268928528, + "learning_rate": 9.35689885637903e-06, + "loss": 0.0838, "step": 4330 }, { - "epoch": 0.3222931828308332, - "grad_norm": 3.5634069442749023, - "learning_rate": 2.8066240903015e-05, - "loss": 0.1102, + "epoch": 0.6445863656616664, + "grad_norm": 0.6524272561073303, + "learning_rate": 9.355413634338333e-06, + "loss": 0.11, "step": 4340 }, { - "epoch": 0.32303579385118075, - "grad_norm": 1.1170202493667603, - "learning_rate": 2.8061785236892915e-05, - "loss": 0.0718, + "epoch": 0.6460715877023615, + "grad_norm": 1.3981050252914429, + "learning_rate": 9.35392841229764e-06, + "loss": 0.0648, "step": 4350 }, { - "epoch": 0.3237784048715283, - "grad_norm": 2.6861186027526855, - "learning_rate": 2.8057329570770833e-05, - "loss": 0.0986, + "epoch": 0.6475568097430566, + "grad_norm": 0.7724353671073914, + "learning_rate": 9.352443190256944e-06, + "loss": 0.1112, "step": 4360 }, { - "epoch": 0.3245210158918758, - "grad_norm": 2.0378482341766357, - "learning_rate": 2.8052873904648745e-05, - "loss": 0.1087, + "epoch": 0.6490420317837516, + "grad_norm": 0.9034995436668396, + "learning_rate": 9.350957968216248e-06, + "loss": 0.0696, "step": 4370 }, { - "epoch": 0.3252636269122234, - "grad_norm": 2.456540822982788, - "learning_rate": 2.804841823852666e-05, - "loss": 0.0763, + "epoch": 0.6505272538244468, + "grad_norm": 0.7171207666397095, + "learning_rate": 9.349472746175554e-06, + "loss": 0.0691, "step": 4380 }, { - "epoch": 0.32600623793257094, - "grad_norm": 1.6984671354293823, - "learning_rate": 2.8043962572404578e-05, - "loss": 0.1253, + "epoch": 0.6520124758651419, + "grad_norm": 1.1618750095367432, + "learning_rate": 9.347987524134859e-06, + "loss": 0.1043, "step": 4390 }, { - "epoch": 0.32674884895291845, - "grad_norm": 3.025683641433716, - "learning_rate": 2.803950690628249e-05, - "loss": 0.1072, + "epoch": 0.6534976979058369, + "grad_norm": 1.0285515785217285, + "learning_rate": 9.346502302094163e-06, + "loss": 0.0683, "step": 4400 }, { - "epoch": 0.327491459973266, - "grad_norm": 2.3869524002075195, - "learning_rate": 2.8035051240160405e-05, - "loss": 0.0733, + "epoch": 0.654982919946532, + "grad_norm": 0.7944039702415466, + "learning_rate": 9.34501708005347e-06, + "loss": 0.0727, "step": 4410 }, { - "epoch": 0.32823407099361357, - "grad_norm": 1.5265862941741943, - "learning_rate": 2.8030595574038316e-05, - "loss": 0.0541, + "epoch": 0.6564681419872271, + "grad_norm": 1.1061533689498901, + "learning_rate": 9.343531858012774e-06, + "loss": 0.074, "step": 4420 }, { - "epoch": 0.3289766820139611, - "grad_norm": 2.0215351581573486, - "learning_rate": 2.8026139907916235e-05, - "loss": 0.0865, + "epoch": 0.6579533640279221, + "grad_norm": 0.9832868576049805, + "learning_rate": 9.342046635972078e-06, + "loss": 0.0649, "step": 4430 }, { - "epoch": 0.32971929303430864, - "grad_norm": 1.381551742553711, - "learning_rate": 2.802168424179415e-05, - "loss": 0.1084, + "epoch": 0.6594385860686173, + "grad_norm": 0.5685659050941467, + "learning_rate": 9.340561413931384e-06, + "loss": 0.0808, "step": 4440 }, { - "epoch": 0.3304619040546562, - "grad_norm": 1.7766149044036865, - "learning_rate": 2.801722857567206e-05, - "loss": 0.0987, + "epoch": 0.6609238081093124, + "grad_norm": 1.069838523864746, + "learning_rate": 9.339076191890689e-06, + "loss": 0.0832, "step": 4450 }, { - "epoch": 0.3312045150750037, - "grad_norm": 2.317441701889038, - "learning_rate": 2.801277290954998e-05, - "loss": 0.0809, + "epoch": 0.6624090301500074, + "grad_norm": 1.2676031589508057, + "learning_rate": 9.337590969849993e-06, + "loss": 0.0941, "step": 4460 }, { - "epoch": 0.33194712609535126, - "grad_norm": 2.322162389755249, - "learning_rate": 2.8008317243427895e-05, - "loss": 0.114, + "epoch": 0.6638942521907025, + "grad_norm": 1.0843100547790527, + "learning_rate": 9.3361057478093e-06, + "loss": 0.0984, "step": 4470 }, { - "epoch": 0.33268973711569877, - "grad_norm": 2.353233575820923, - "learning_rate": 2.8003861577305806e-05, - "loss": 0.098, + "epoch": 0.6653794742313975, + "grad_norm": 0.958575963973999, + "learning_rate": 9.334620525768602e-06, + "loss": 0.0962, "step": 4480 }, { - "epoch": 0.33343234813604633, - "grad_norm": 0.9074286818504333, - "learning_rate": 2.7999405911183725e-05, - "loss": 0.1093, + "epoch": 0.6668646962720927, + "grad_norm": 0.9856349229812622, + "learning_rate": 9.333135303727908e-06, + "loss": 0.1015, "step": 4490 }, { - "epoch": 0.3341749591563939, - "grad_norm": 4.220743656158447, - "learning_rate": 2.799495024506164e-05, - "loss": 0.0983, + "epoch": 0.6683499183127878, + "grad_norm": 0.6050782203674316, + "learning_rate": 9.331650081687214e-06, + "loss": 0.0652, "step": 4500 }, { - "epoch": 0.3349175701767414, - "grad_norm": 2.652031898498535, - "learning_rate": 2.799049457893955e-05, - "loss": 0.0873, + "epoch": 0.6698351403534828, + "grad_norm": 0.7362445592880249, + "learning_rate": 9.330164859646517e-06, + "loss": 0.0692, "step": 4510 }, { - "epoch": 0.33566018119708896, - "grad_norm": 1.0324969291687012, - "learning_rate": 2.7986038912817466e-05, - "loss": 0.0755, + "epoch": 0.6713203623941779, + "grad_norm": 0.9347787499427795, + "learning_rate": 9.328679637605823e-06, + "loss": 0.0876, "step": 4520 }, { - "epoch": 0.3364027922174365, - "grad_norm": 0.8681501746177673, - "learning_rate": 2.798158324669538e-05, - "loss": 0.1043, + "epoch": 0.672805584434873, + "grad_norm": 1.99434494972229, + "learning_rate": 9.327194415565128e-06, + "loss": 0.0868, "step": 4530 }, { - "epoch": 0.337145403237784, - "grad_norm": 1.413583755493164, - "learning_rate": 2.7977127580573296e-05, - "loss": 0.0682, + "epoch": 0.674290806475568, + "grad_norm": 0.7971028685569763, + "learning_rate": 9.325709193524432e-06, + "loss": 0.0735, "step": 4540 }, { - "epoch": 0.3378880142581316, - "grad_norm": 1.2596721649169922, - "learning_rate": 2.797267191445121e-05, - "loss": 0.1021, + "epoch": 0.6757760285162632, + "grad_norm": 0.8102898001670837, + "learning_rate": 9.324223971483738e-06, + "loss": 0.0713, "step": 4550 }, { - "epoch": 0.33863062527847915, - "grad_norm": 2.051772117614746, - "learning_rate": 2.7968216248329126e-05, - "loss": 0.0646, + "epoch": 0.6772612505569583, + "grad_norm": 0.6246395707130432, + "learning_rate": 9.322738749443043e-06, + "loss": 0.0761, "step": 4560 }, { - "epoch": 0.33937323629882665, - "grad_norm": 2.39245343208313, - "learning_rate": 2.796376058220704e-05, - "loss": 0.0913, + "epoch": 0.6787464725976533, + "grad_norm": 0.6454740166664124, + "learning_rate": 9.321253527402347e-06, + "loss": 0.0869, "step": 4570 }, { - "epoch": 0.3401158473191742, - "grad_norm": 1.1950043439865112, - "learning_rate": 2.7959304916084956e-05, - "loss": 0.0772, + "epoch": 0.6802316946383484, + "grad_norm": 0.6368558406829834, + "learning_rate": 9.319768305361653e-06, + "loss": 0.082, "step": 4580 }, { - "epoch": 0.3408584583395218, - "grad_norm": 1.7713611125946045, - "learning_rate": 2.7954849249962868e-05, - "loss": 0.1027, + "epoch": 0.6817169166790435, + "grad_norm": 0.5236619710922241, + "learning_rate": 9.318283083320958e-06, + "loss": 0.0726, "step": 4590 }, { - "epoch": 0.3416010693598693, - "grad_norm": 1.3670064210891724, - "learning_rate": 2.7950393583840786e-05, - "loss": 0.0623, + "epoch": 0.6832021387197386, + "grad_norm": 2.013545036315918, + "learning_rate": 9.316797861280262e-06, + "loss": 0.098, "step": 4600 }, { - "epoch": 0.34234368038021684, - "grad_norm": 1.9665565490722656, - "learning_rate": 2.7945937917718698e-05, - "loss": 0.0808, + "epoch": 0.6846873607604337, + "grad_norm": 1.2313984632492065, + "learning_rate": 9.315312639239567e-06, + "loss": 0.0712, "step": 4610 }, { - "epoch": 0.3430862914005644, - "grad_norm": 3.5627613067626953, - "learning_rate": 2.7941482251596613e-05, - "loss": 0.0861, + "epoch": 0.6861725828011288, + "grad_norm": 0.9200993776321411, + "learning_rate": 9.313827417198873e-06, + "loss": 0.0858, "step": 4620 }, { - "epoch": 0.3438289024209119, - "grad_norm": 1.8066272735595703, - "learning_rate": 2.793702658547453e-05, - "loss": 0.0831, + "epoch": 0.6876578048418238, + "grad_norm": 0.725279688835144, + "learning_rate": 9.312342195158177e-06, + "loss": 0.0775, "step": 4630 }, { - "epoch": 0.34457151344125947, - "grad_norm": 2.1542608737945557, - "learning_rate": 2.7932570919352443e-05, - "loss": 0.1111, + "epoch": 0.6891430268825189, + "grad_norm": 1.057055115699768, + "learning_rate": 9.310856973117482e-06, + "loss": 0.0921, "step": 4640 }, { - "epoch": 0.34531412446160703, - "grad_norm": 2.243263006210327, - "learning_rate": 2.7928115253230358e-05, - "loss": 0.0756, + "epoch": 0.6906282489232141, + "grad_norm": 0.567707896232605, + "learning_rate": 9.309371751076786e-06, + "loss": 0.0942, "step": 4650 }, { - "epoch": 0.34605673548195454, - "grad_norm": 1.6739652156829834, - "learning_rate": 2.7923659587108273e-05, - "loss": 0.1328, + "epoch": 0.6921134709639091, + "grad_norm": 1.4313199520111084, + "learning_rate": 9.307886529036092e-06, + "loss": 0.0752, "step": 4660 }, { - "epoch": 0.3467993465023021, - "grad_norm": 2.321486711502075, - "learning_rate": 2.7919203920986188e-05, - "loss": 0.0903, + "epoch": 0.6935986930046042, + "grad_norm": 0.7691679000854492, + "learning_rate": 9.306401306995397e-06, + "loss": 0.0784, "step": 4670 }, { - "epoch": 0.34754195752264966, - "grad_norm": 2.773947238922119, - "learning_rate": 2.7914748254864103e-05, - "loss": 0.0967, + "epoch": 0.6950839150452993, + "grad_norm": 1.1163687705993652, + "learning_rate": 9.304916084954701e-06, + "loss": 0.0698, "step": 4680 }, { - "epoch": 0.34828456854299716, - "grad_norm": 1.9256445169448853, - "learning_rate": 2.7910292588742018e-05, - "loss": 0.1307, + "epoch": 0.6965691370859943, + "grad_norm": 0.7844257950782776, + "learning_rate": 9.303430862914007e-06, + "loss": 0.0697, "step": 4690 }, { - "epoch": 0.3490271795633447, - "grad_norm": 2.0387189388275146, - "learning_rate": 2.7905836922619933e-05, - "loss": 0.0758, + "epoch": 0.6980543591266894, + "grad_norm": 1.8008625507354736, + "learning_rate": 9.301945640873312e-06, + "loss": 0.0866, "step": 4700 }, { - "epoch": 0.3497697905836923, - "grad_norm": 0.6718337535858154, - "learning_rate": 2.7901381256497848e-05, - "loss": 0.0923, + "epoch": 0.6995395811673846, + "grad_norm": 1.1329940557479858, + "learning_rate": 9.300460418832616e-06, + "loss": 0.0722, "step": 4710 }, { - "epoch": 0.3505124016040398, - "grad_norm": 2.4144012928009033, - "learning_rate": 2.789692559037576e-05, - "loss": 0.0815, + "epoch": 0.7010248032080796, + "grad_norm": 1.0240144729614258, + "learning_rate": 9.29897519679192e-06, + "loss": 0.0707, "step": 4720 }, { - "epoch": 0.35125501262438735, - "grad_norm": 0.7492033839225769, - "learning_rate": 2.7892469924253678e-05, - "loss": 0.1039, + "epoch": 0.7025100252487747, + "grad_norm": 1.1364368200302124, + "learning_rate": 9.297489974751227e-06, + "loss": 0.0837, "step": 4730 }, { - "epoch": 0.3519976236447349, - "grad_norm": 3.2652149200439453, - "learning_rate": 2.7888014258131593e-05, - "loss": 0.1027, + "epoch": 0.7039952472894698, + "grad_norm": 0.9570313096046448, + "learning_rate": 9.296004752710531e-06, + "loss": 0.0711, "step": 4740 }, { - "epoch": 0.3527402346650824, - "grad_norm": 1.8765047788619995, - "learning_rate": 2.7883558592009504e-05, - "loss": 0.1082, + "epoch": 0.7054804693301648, + "grad_norm": 1.208341360092163, + "learning_rate": 9.294519530669835e-06, + "loss": 0.0762, "step": 4750 }, { - "epoch": 0.35348284568543, - "grad_norm": 2.7471463680267334, - "learning_rate": 2.787910292588742e-05, - "loss": 0.0829, + "epoch": 0.70696569137086, + "grad_norm": 0.8793076276779175, + "learning_rate": 9.293034308629142e-06, + "loss": 0.0722, "step": 4760 }, { - "epoch": 0.3542254567057775, - "grad_norm": 4.803821563720703, - "learning_rate": 2.7874647259765338e-05, - "loss": 0.0897, + "epoch": 0.708450913411555, + "grad_norm": 1.1761753559112549, + "learning_rate": 9.291549086588446e-06, + "loss": 0.0874, "step": 4770 }, { - "epoch": 0.35496806772612505, - "grad_norm": 1.495339035987854, - "learning_rate": 2.787019159364325e-05, - "loss": 0.0898, + "epoch": 0.7099361354522501, + "grad_norm": 1.384575605392456, + "learning_rate": 9.29006386454775e-06, + "loss": 0.0855, "step": 4780 }, { - "epoch": 0.3557106787464726, - "grad_norm": 2.4038844108581543, - "learning_rate": 2.7865735927521164e-05, - "loss": 0.0976, + "epoch": 0.7114213574929452, + "grad_norm": 1.1813658475875854, + "learning_rate": 9.288578642507056e-06, + "loss": 0.0775, "step": 4790 }, { - "epoch": 0.3564532897668201, - "grad_norm": 1.810927152633667, - "learning_rate": 2.7861280261399083e-05, - "loss": 0.0859, + "epoch": 0.7129065795336402, + "grad_norm": 0.9031196236610413, + "learning_rate": 9.287093420466361e-06, + "loss": 0.0854, "step": 4800 }, { - "epoch": 0.3571959007871677, - "grad_norm": 3.185044527053833, - "learning_rate": 2.7856824595276994e-05, - "loss": 0.0832, + "epoch": 0.7143918015743354, + "grad_norm": 1.7310291528701782, + "learning_rate": 9.285608198425665e-06, + "loss": 0.0893, "step": 4810 }, { - "epoch": 0.35793851180751524, - "grad_norm": 4.21889066696167, - "learning_rate": 2.785236892915491e-05, - "loss": 0.0992, + "epoch": 0.7158770236150305, + "grad_norm": 0.6749886870384216, + "learning_rate": 9.28412297638497e-06, + "loss": 0.0928, "step": 4820 }, { - "epoch": 0.35868112282786274, - "grad_norm": 1.788333773612976, - "learning_rate": 2.784791326303282e-05, - "loss": 0.0538, + "epoch": 0.7173622456557255, + "grad_norm": 1.170947790145874, + "learning_rate": 9.282637754344274e-06, + "loss": 0.0974, "step": 4830 }, { - "epoch": 0.3594237338482103, - "grad_norm": 3.176811933517456, - "learning_rate": 2.784345759691074e-05, - "loss": 0.107, + "epoch": 0.7188474676964206, + "grad_norm": 0.5311076045036316, + "learning_rate": 9.28115253230358e-06, + "loss": 0.0891, "step": 4840 }, { - "epoch": 0.36016634486855786, - "grad_norm": 3.0961802005767822, - "learning_rate": 2.7839001930788654e-05, - "loss": 0.0717, + "epoch": 0.7203326897371157, + "grad_norm": 0.5467868447303772, + "learning_rate": 9.279667310262885e-06, + "loss": 0.0788, "step": 4850 }, { - "epoch": 0.36090895588890537, - "grad_norm": 1.6400991678237915, - "learning_rate": 2.7834546264666566e-05, - "loss": 0.0998, + "epoch": 0.7218179117778107, + "grad_norm": 1.394687294960022, + "learning_rate": 9.27818208822219e-06, + "loss": 0.0939, "step": 4860 }, { - "epoch": 0.36165156690925293, - "grad_norm": 1.0916283130645752, - "learning_rate": 2.7830090598544484e-05, - "loss": 0.0795, + "epoch": 0.7233031338185059, + "grad_norm": 1.7100565433502197, + "learning_rate": 9.276696866181495e-06, + "loss": 0.0985, "step": 4870 }, { - "epoch": 0.3623941779296005, - "grad_norm": 0.8684899210929871, - "learning_rate": 2.78256349324224e-05, - "loss": 0.1083, + "epoch": 0.724788355859201, + "grad_norm": 1.3609228134155273, + "learning_rate": 9.2752116441408e-06, + "loss": 0.0685, "step": 4880 }, { - "epoch": 0.363136788949948, - "grad_norm": 6.465219497680664, - "learning_rate": 2.782117926630031e-05, - "loss": 0.1498, + "epoch": 0.726273577899896, + "grad_norm": 0.7217347621917725, + "learning_rate": 9.273726422100104e-06, + "loss": 0.0702, "step": 4890 }, { - "epoch": 0.36387939997029556, - "grad_norm": 1.2663229703903198, - "learning_rate": 2.781672360017823e-05, - "loss": 0.0654, + "epoch": 0.7277587999405911, + "grad_norm": 1.1523468494415283, + "learning_rate": 9.27224120005941e-06, + "loss": 0.1019, "step": 4900 }, { - "epoch": 0.3646220109906431, - "grad_norm": 3.739539861679077, - "learning_rate": 2.7812267934056144e-05, - "loss": 0.1353, + "epoch": 0.7292440219812862, + "grad_norm": 1.0476207733154297, + "learning_rate": 9.270755978018715e-06, + "loss": 0.0729, "step": 4910 }, { - "epoch": 0.3653646220109906, - "grad_norm": 3.384850025177002, - "learning_rate": 2.7807812267934056e-05, - "loss": 0.111, + "epoch": 0.7307292440219813, + "grad_norm": 1.555390477180481, + "learning_rate": 9.26927075597802e-06, + "loss": 0.0845, "step": 4920 }, { - "epoch": 0.3661072330313382, - "grad_norm": 2.7936530113220215, - "learning_rate": 2.780335660181197e-05, - "loss": 0.1048, + "epoch": 0.7322144660626764, + "grad_norm": 1.0015746355056763, + "learning_rate": 9.267785533937325e-06, + "loss": 0.079, "step": 4930 }, { - "epoch": 0.36684984405168575, - "grad_norm": 1.8607102632522583, - "learning_rate": 2.7798900935689886e-05, - "loss": 0.0717, + "epoch": 0.7336996881033715, + "grad_norm": 0.8952016830444336, + "learning_rate": 9.266300311896628e-06, + "loss": 0.0992, "step": 4940 }, { - "epoch": 0.36759245507203325, - "grad_norm": 2.1067261695861816, - "learning_rate": 2.77944452695678e-05, - "loss": 0.0881, + "epoch": 0.7351849101440665, + "grad_norm": 0.7995119690895081, + "learning_rate": 9.264815089855934e-06, + "loss": 0.0843, "step": 4950 }, { - "epoch": 0.3683350660923808, - "grad_norm": 1.7310969829559326, - "learning_rate": 2.7789989603445716e-05, - "loss": 0.0873, + "epoch": 0.7366701321847616, + "grad_norm": 0.6183965802192688, + "learning_rate": 9.263329867815239e-06, + "loss": 0.0838, "step": 4960 }, { - "epoch": 0.3690776771127284, - "grad_norm": 1.5683966875076294, - "learning_rate": 2.778553393732363e-05, - "loss": 0.1008, + "epoch": 0.7381553542254568, + "grad_norm": 1.539196252822876, + "learning_rate": 9.261844645774543e-06, + "loss": 0.0888, "step": 4970 }, { - "epoch": 0.3698202881330759, - "grad_norm": 3.5258140563964844, - "learning_rate": 2.7781078271201546e-05, - "loss": 0.0738, + "epoch": 0.7396405762661518, + "grad_norm": 1.1313135623931885, + "learning_rate": 9.26035942373385e-06, + "loss": 0.0899, "step": 4980 }, { - "epoch": 0.37056289915342344, - "grad_norm": 1.4318699836730957, - "learning_rate": 2.777662260507946e-05, - "loss": 0.0773, + "epoch": 0.7411257983068469, + "grad_norm": 1.304632306098938, + "learning_rate": 9.258874201693154e-06, + "loss": 0.0799, "step": 4990 }, { - "epoch": 0.371305510173771, - "grad_norm": 2.4203314781188965, - "learning_rate": 2.7772166938957373e-05, - "loss": 0.1097, + "epoch": 0.742611020347542, + "grad_norm": 0.8942739963531494, + "learning_rate": 9.257388979652458e-06, + "loss": 0.0756, "step": 5000 }, { - "epoch": 0.3720481211941185, - "grad_norm": 1.1299662590026855, - "learning_rate": 2.776771127283529e-05, - "loss": 0.0858, + "epoch": 0.744096242388237, + "grad_norm": 1.0307176113128662, + "learning_rate": 9.255903757611764e-06, + "loss": 0.0953, "step": 5010 }, { - "epoch": 0.37279073221446607, - "grad_norm": 4.186913013458252, - "learning_rate": 2.7763255606713206e-05, - "loss": 0.1212, + "epoch": 0.7455814644289321, + "grad_norm": 0.9079128503799438, + "learning_rate": 9.254418535571069e-06, + "loss": 0.0883, "step": 5020 }, { - "epoch": 0.37353334323481363, - "grad_norm": 3.082172393798828, - "learning_rate": 2.7758799940591117e-05, - "loss": 0.1027, + "epoch": 0.7470666864696273, + "grad_norm": 0.6662859916687012, + "learning_rate": 9.252933313530373e-06, + "loss": 0.0635, "step": 5030 }, { - "epoch": 0.37427595425516114, - "grad_norm": 3.5075833797454834, - "learning_rate": 2.7754344274469036e-05, - "loss": 0.0974, + "epoch": 0.7485519085103223, + "grad_norm": 0.5575108528137207, + "learning_rate": 9.25144809148968e-06, + "loss": 0.0763, "step": 5040 }, { - "epoch": 0.3750185652755087, - "grad_norm": 4.949690818786621, - "learning_rate": 2.7749888608346947e-05, - "loss": 0.0912, + "epoch": 0.7500371305510174, + "grad_norm": 1.2261484861373901, + "learning_rate": 9.249962869448984e-06, + "loss": 0.079, "step": 5050 }, { - "epoch": 0.3757611762958562, - "grad_norm": 2.1641194820404053, - "learning_rate": 2.7745432942224862e-05, - "loss": 0.11, + "epoch": 0.7515223525917124, + "grad_norm": 0.442999005317688, + "learning_rate": 9.248477647408288e-06, + "loss": 0.0529, "step": 5060 }, { - "epoch": 0.37650378731620376, - "grad_norm": 2.0834977626800537, - "learning_rate": 2.7740977276102777e-05, - "loss": 0.055, + "epoch": 0.7530075746324075, + "grad_norm": 1.6518497467041016, + "learning_rate": 9.246992425367594e-06, + "loss": 0.0973, "step": 5070 }, { - "epoch": 0.3772463983365513, - "grad_norm": 1.6411371231079102, - "learning_rate": 2.7736521609980692e-05, - "loss": 0.0844, + "epoch": 0.7544927966731027, + "grad_norm": 0.7751675844192505, + "learning_rate": 9.245507203326899e-06, + "loss": 0.0614, "step": 5080 }, { - "epoch": 0.37798900935689883, - "grad_norm": 2.455378770828247, - "learning_rate": 2.7732065943858607e-05, - "loss": 0.086, + "epoch": 0.7559780187137977, + "grad_norm": 0.7261990904808044, + "learning_rate": 9.244021981286203e-06, + "loss": 0.0768, "step": 5090 }, { - "epoch": 0.3787316203772464, - "grad_norm": 0.9530849456787109, - "learning_rate": 2.7727610277736522e-05, - "loss": 0.0879, + "epoch": 0.7574632407544928, + "grad_norm": 0.9539375901222229, + "learning_rate": 9.24253675924551e-06, + "loss": 0.0946, "step": 5100 }, { - "epoch": 0.37947423139759395, - "grad_norm": 0.6833879947662354, - "learning_rate": 2.7723154611614437e-05, - "loss": 0.1207, + "epoch": 0.7589484627951879, + "grad_norm": 0.43877437710762024, + "learning_rate": 9.241051537204812e-06, + "loss": 0.0586, "step": 5110 }, { - "epoch": 0.38021684241794146, - "grad_norm": 2.2070958614349365, - "learning_rate": 2.7718698945492352e-05, - "loss": 0.0749, + "epoch": 0.7604336848358829, + "grad_norm": 1.3418385982513428, + "learning_rate": 9.239566315164118e-06, + "loss": 0.1075, "step": 5120 }, { - "epoch": 0.380959453438289, - "grad_norm": 4.673049449920654, - "learning_rate": 2.7714243279370264e-05, - "loss": 0.0855, + "epoch": 0.761918906876578, + "grad_norm": 1.0130970478057861, + "learning_rate": 9.238081093123422e-06, + "loss": 0.0606, "step": 5130 }, { - "epoch": 0.3817020644586366, - "grad_norm": 2.2408103942871094, - "learning_rate": 2.7709787613248182e-05, - "loss": 0.0863, + "epoch": 0.7634041289172732, + "grad_norm": 1.10467529296875, + "learning_rate": 9.236595871082727e-06, + "loss": 0.0827, "step": 5140 }, { - "epoch": 0.3824446754789841, - "grad_norm": 1.7068830728530884, - "learning_rate": 2.7705331947126097e-05, - "loss": 0.0949, + "epoch": 0.7648893509579682, + "grad_norm": 1.0823795795440674, + "learning_rate": 9.235110649042033e-06, + "loss": 0.0758, "step": 5150 }, { - "epoch": 0.38318728649933165, - "grad_norm": 1.8522627353668213, - "learning_rate": 2.770087628100401e-05, + "epoch": 0.7663745729986633, + "grad_norm": 0.8557460904121399, + "learning_rate": 9.233625427001337e-06, "loss": 0.0875, "step": 5160 }, { - "epoch": 0.3839298975196792, - "grad_norm": 2.645232915878296, - "learning_rate": 2.7696420614881924e-05, - "loss": 0.0823, + "epoch": 0.7678597950393584, + "grad_norm": 0.6835376620292664, + "learning_rate": 9.232140204960642e-06, + "loss": 0.0777, "step": 5170 }, { - "epoch": 0.3846725085400267, - "grad_norm": 3.677633047103882, - "learning_rate": 2.7691964948759842e-05, - "loss": 0.0726, + "epoch": 0.7693450170800534, + "grad_norm": 0.653923749923706, + "learning_rate": 9.230654982919948e-06, + "loss": 0.0615, "step": 5180 }, { - "epoch": 0.3854151195603743, - "grad_norm": 2.5653793811798096, - "learning_rate": 2.7687509282637754e-05, - "loss": 0.087, + "epoch": 0.7708302391207486, + "grad_norm": 0.7122613787651062, + "learning_rate": 9.229169760879252e-06, + "loss": 0.0759, "step": 5190 }, { - "epoch": 0.38615773058072184, - "grad_norm": 3.1218738555908203, - "learning_rate": 2.768305361651567e-05, - "loss": 0.09, + "epoch": 0.7723154611614437, + "grad_norm": 1.2723838090896606, + "learning_rate": 9.227684538838557e-06, + "loss": 0.1033, "step": 5200 }, { - "epoch": 0.38690034160106934, - "grad_norm": 1.5911304950714111, - "learning_rate": 2.7678597950393587e-05, - "loss": 0.0867, + "epoch": 0.7738006832021387, + "grad_norm": 0.43908363580703735, + "learning_rate": 9.226199316797863e-06, + "loss": 0.06, "step": 5210 }, { - "epoch": 0.3876429526214169, - "grad_norm": 1.051086187362671, - "learning_rate": 2.76741422842715e-05, - "loss": 0.1087, + "epoch": 0.7752859052428338, + "grad_norm": 1.225393295288086, + "learning_rate": 9.224714094757167e-06, + "loss": 0.069, "step": 5220 }, { - "epoch": 0.38838556364176446, - "grad_norm": 2.593616247177124, - "learning_rate": 2.7669686618149414e-05, - "loss": 0.0842, + "epoch": 0.7767711272835289, + "grad_norm": 0.8042909502983093, + "learning_rate": 9.223228872716472e-06, + "loss": 0.0789, "step": 5230 }, { - "epoch": 0.38912817466211197, - "grad_norm": 2.5163533687591553, - "learning_rate": 2.7665230952027326e-05, - "loss": 0.1173, + "epoch": 0.7782563493242239, + "grad_norm": 0.4831196665763855, + "learning_rate": 9.221743650675776e-06, + "loss": 0.0792, "step": 5240 }, { - "epoch": 0.38987078568245953, - "grad_norm": 4.386409759521484, - "learning_rate": 2.7660775285905244e-05, - "loss": 0.1035, + "epoch": 0.7797415713649191, + "grad_norm": 1.2344239950180054, + "learning_rate": 9.22025842863508e-06, + "loss": 0.0726, "step": 5250 }, { - "epoch": 0.3906133967028071, - "grad_norm": 2.9560604095458984, - "learning_rate": 2.765631961978316e-05, - "loss": 0.0936, + "epoch": 0.7812267934056142, + "grad_norm": 0.7011733651161194, + "learning_rate": 9.218773206594387e-06, + "loss": 0.0883, "step": 5260 }, { - "epoch": 0.3913560077231546, - "grad_norm": 2.026900291442871, - "learning_rate": 2.765186395366107e-05, - "loss": 0.1084, + "epoch": 0.7827120154463092, + "grad_norm": 0.8087848424911499, + "learning_rate": 9.217287984553691e-06, + "loss": 0.0719, "step": 5270 }, { - "epoch": 0.39209861874350216, - "grad_norm": 2.574880361557007, - "learning_rate": 2.764740828753899e-05, - "loss": 0.0733, + "epoch": 0.7841972374870043, + "grad_norm": 0.5542543530464172, + "learning_rate": 9.215802762512996e-06, + "loss": 0.0736, "step": 5280 }, { - "epoch": 0.3928412297638497, - "grad_norm": 1.350338339805603, - "learning_rate": 2.7642952621416904e-05, - "loss": 0.092, + "epoch": 0.7856824595276994, + "grad_norm": 1.5584278106689453, + "learning_rate": 9.214317540472302e-06, + "loss": 0.0762, "step": 5290 }, { - "epoch": 0.3935838407841972, - "grad_norm": 1.7275868654251099, - "learning_rate": 2.7638496955294816e-05, - "loss": 0.0534, + "epoch": 0.7871676815683945, + "grad_norm": 0.9587001204490662, + "learning_rate": 9.212832318431606e-06, + "loss": 0.079, "step": 5300 }, { - "epoch": 0.3943264518045448, - "grad_norm": 1.1320747137069702, - "learning_rate": 2.7634041289172734e-05, - "loss": 0.1091, + "epoch": 0.7886529036090896, + "grad_norm": 1.045442819595337, + "learning_rate": 9.21134709639091e-06, + "loss": 0.0693, "step": 5310 }, { - "epoch": 0.39506906282489235, - "grad_norm": 1.5764305591583252, - "learning_rate": 2.762958562305065e-05, - "loss": 0.0697, + "epoch": 0.7901381256497847, + "grad_norm": 0.9065925478935242, + "learning_rate": 9.209861874350217e-06, + "loss": 0.0902, "step": 5320 }, { - "epoch": 0.39581167384523985, - "grad_norm": 1.4530662298202515, - "learning_rate": 2.762512995692856e-05, - "loss": 0.0823, + "epoch": 0.7916233476904797, + "grad_norm": 1.3168227672576904, + "learning_rate": 9.208376652309521e-06, + "loss": 0.0935, "step": 5330 }, { - "epoch": 0.3965542848655874, - "grad_norm": 3.964816093444824, - "learning_rate": 2.7620674290806476e-05, - "loss": 0.0677, + "epoch": 0.7931085697311748, + "grad_norm": 1.3936703205108643, + "learning_rate": 9.206891430268826e-06, + "loss": 0.0922, "step": 5340 }, { - "epoch": 0.3972968958859349, - "grad_norm": 2.6048128604888916, - "learning_rate": 2.761621862468439e-05, - "loss": 0.0656, + "epoch": 0.7945937917718698, + "grad_norm": 1.0532509088516235, + "learning_rate": 9.20540620822813e-06, + "loss": 0.0876, "step": 5350 }, { - "epoch": 0.3980395069062825, - "grad_norm": 1.2549293041229248, - "learning_rate": 2.7611762958562306e-05, + "epoch": 0.796079013812565, + "grad_norm": 0.8267485499382019, + "learning_rate": 9.203920986187436e-06, "loss": 0.0803, "step": 5360 }, { - "epoch": 0.39878211792663004, - "grad_norm": 2.1924233436584473, - "learning_rate": 2.760730729244022e-05, - "loss": 0.0775, + "epoch": 0.7975642358532601, + "grad_norm": 1.8309590816497803, + "learning_rate": 9.20243576414674e-06, + "loss": 0.0817, "step": 5370 }, { - "epoch": 0.39952472894697755, - "grad_norm": 1.1957290172576904, - "learning_rate": 2.7602851626318136e-05, - "loss": 0.0526, + "epoch": 0.7990494578939551, + "grad_norm": 0.9805804491043091, + "learning_rate": 9.200950542106045e-06, + "loss": 0.0701, "step": 5380 }, { - "epoch": 0.4002673399673251, - "grad_norm": 4.39811897277832, - "learning_rate": 2.759839596019605e-05, - "loss": 0.0843, + "epoch": 0.8005346799346502, + "grad_norm": 0.8274025321006775, + "learning_rate": 9.199465320065351e-06, + "loss": 0.068, "step": 5390 }, { - "epoch": 0.40100995098767267, - "grad_norm": 2.887032985687256, - "learning_rate": 2.7593940294073965e-05, - "loss": 0.105, + "epoch": 0.8020199019753453, + "grad_norm": 0.9127753376960754, + "learning_rate": 9.197980098024656e-06, + "loss": 0.0736, "step": 5400 }, { - "epoch": 0.4017525620080202, - "grad_norm": 2.1287643909454346, - "learning_rate": 2.7589484627951877e-05, - "loss": 0.0919, + "epoch": 0.8035051240160404, + "grad_norm": 0.9885143637657166, + "learning_rate": 9.19649487598396e-06, + "loss": 0.0669, "step": 5410 }, { - "epoch": 0.40249517302836774, - "grad_norm": 2.559832811355591, - "learning_rate": 2.7585028961829795e-05, - "loss": 0.0941, + "epoch": 0.8049903460567355, + "grad_norm": 1.3231810331344604, + "learning_rate": 9.195009653943265e-06, + "loss": 0.075, "step": 5420 }, { - "epoch": 0.4032377840487153, - "grad_norm": 3.4506430625915527, - "learning_rate": 2.758057329570771e-05, - "loss": 0.0959, + "epoch": 0.8064755680974306, + "grad_norm": 0.8889951705932617, + "learning_rate": 9.19352443190257e-06, + "loss": 0.0954, "step": 5430 }, { - "epoch": 0.4039803950690628, - "grad_norm": 0.877765953540802, - "learning_rate": 2.7576117629585622e-05, - "loss": 0.1001, + "epoch": 0.8079607901381256, + "grad_norm": 0.4237781763076782, + "learning_rate": 9.192039209861875e-06, + "loss": 0.0456, "step": 5440 }, { - "epoch": 0.40472300608941036, - "grad_norm": 2.6283414363861084, - "learning_rate": 2.757166196346354e-05, - "loss": 0.0734, + "epoch": 0.8094460121788207, + "grad_norm": 1.211202621459961, + "learning_rate": 9.19055398782118e-06, + "loss": 0.0769, "step": 5450 }, { - "epoch": 0.4054656171097579, - "grad_norm": 2.917095899581909, - "learning_rate": 2.7567206297341452e-05, - "loss": 0.1153, + "epoch": 0.8109312342195159, + "grad_norm": 0.7493748664855957, + "learning_rate": 9.189068765780484e-06, + "loss": 0.0805, "step": 5460 }, { - "epoch": 0.40620822813010543, - "grad_norm": 1.10123872756958, - "learning_rate": 2.7562750631219367e-05, - "loss": 0.0765, + "epoch": 0.8124164562602109, + "grad_norm": 0.38762542605400085, + "learning_rate": 9.18758354373979e-06, + "loss": 0.0562, "step": 5470 }, { - "epoch": 0.406950839150453, - "grad_norm": 4.8916096687316895, - "learning_rate": 2.7558294965097282e-05, - "loss": 0.0609, + "epoch": 0.813901678300906, + "grad_norm": 1.0665141344070435, + "learning_rate": 9.186098321699095e-06, + "loss": 0.0936, "step": 5480 }, { - "epoch": 0.40769345017080055, - "grad_norm": 1.0813095569610596, - "learning_rate": 2.7553839298975197e-05, - "loss": 0.0926, + "epoch": 0.8153869003416011, + "grad_norm": 1.0364309549331665, + "learning_rate": 9.184613099658399e-06, + "loss": 0.0994, "step": 5490 }, { - "epoch": 0.40843606119114806, - "grad_norm": 2.3865935802459717, - "learning_rate": 2.7549383632853112e-05, - "loss": 0.0769, + "epoch": 0.8168721223822961, + "grad_norm": 1.3025943040847778, + "learning_rate": 9.183127877617705e-06, + "loss": 0.082, "step": 5500 }, { - "epoch": 0.4091786722114956, - "grad_norm": 2.4773435592651367, - "learning_rate": 2.7544927966731027e-05, - "loss": 0.0767, + "epoch": 0.8183573444229912, + "grad_norm": 0.35092493891716003, + "learning_rate": 9.18164265557701e-06, + "loss": 0.0562, "step": 5510 }, { - "epoch": 0.4099212832318432, - "grad_norm": 1.5695173740386963, - "learning_rate": 2.7540472300608942e-05, - "loss": 0.0639, + "epoch": 0.8198425664636864, + "grad_norm": 0.7516621351242065, + "learning_rate": 9.180157433536314e-06, + "loss": 0.0919, "step": 5520 }, { - "epoch": 0.4106638942521907, - "grad_norm": 3.533438205718994, - "learning_rate": 2.7536016634486857e-05, - "loss": 0.0692, + "epoch": 0.8213277885043814, + "grad_norm": 1.3148735761642456, + "learning_rate": 9.17867221149562e-06, + "loss": 0.0795, "step": 5530 }, { - "epoch": 0.41140650527253825, - "grad_norm": 1.1190873384475708, - "learning_rate": 2.7531560968364772e-05, - "loss": 0.0645, + "epoch": 0.8228130105450765, + "grad_norm": 0.4802815616130829, + "learning_rate": 9.177186989454925e-06, + "loss": 0.0753, "step": 5540 }, { - "epoch": 0.4121491162928858, - "grad_norm": 2.1660842895507812, - "learning_rate": 2.7527105302242687e-05, - "loss": 0.0883, + "epoch": 0.8242982325857716, + "grad_norm": 0.9008163213729858, + "learning_rate": 9.175701767414229e-06, + "loss": 0.0959, "step": 5550 }, { - "epoch": 0.4128917273132333, - "grad_norm": 1.7716519832611084, - "learning_rate": 2.7522649636120602e-05, - "loss": 0.0858, + "epoch": 0.8257834546264666, + "grad_norm": 0.9163670539855957, + "learning_rate": 9.174216545373535e-06, + "loss": 0.0688, "step": 5560 }, { - "epoch": 0.4136343383335809, - "grad_norm": 1.537878155708313, - "learning_rate": 2.7518193969998514e-05, - "loss": 0.0814, + "epoch": 0.8272686766671618, + "grad_norm": 0.994637131690979, + "learning_rate": 9.172731323332838e-06, + "loss": 0.0776, "step": 5570 }, { - "epoch": 0.41437694935392844, - "grad_norm": 2.6977486610412598, - "learning_rate": 2.751373830387643e-05, - "loss": 0.0803, + "epoch": 0.8287538987078569, + "grad_norm": 0.8866167068481445, + "learning_rate": 9.171246101292144e-06, + "loss": 0.0845, "step": 5580 }, { - "epoch": 0.41511956037427594, - "grad_norm": 2.5686473846435547, - "learning_rate": 2.7509282637754347e-05, - "loss": 0.1299, + "epoch": 0.8302391207485519, + "grad_norm": 0.8306211829185486, + "learning_rate": 9.169760879251448e-06, + "loss": 0.0658, "step": 5590 }, { - "epoch": 0.4158621713946235, - "grad_norm": 3.5624582767486572, - "learning_rate": 2.750482697163226e-05, - "loss": 0.0667, + "epoch": 0.831724342789247, + "grad_norm": 0.5935698747634888, + "learning_rate": 9.168275657210753e-06, + "loss": 0.0941, "step": 5600 }, { - "epoch": 0.41660478241497106, - "grad_norm": 1.4908101558685303, - "learning_rate": 2.7500371305510174e-05, - "loss": 0.0869, + "epoch": 0.8332095648299421, + "grad_norm": 1.102920651435852, + "learning_rate": 9.166790435170059e-06, + "loss": 0.0695, "step": 5610 }, { - "epoch": 0.41734739343531857, - "grad_norm": 1.9675188064575195, - "learning_rate": 2.7495915639388092e-05, - "loss": 0.0645, + "epoch": 0.8346947868706371, + "grad_norm": 1.0735467672348022, + "learning_rate": 9.165305213129363e-06, + "loss": 0.0748, "step": 5620 }, { - "epoch": 0.41809000445566613, - "grad_norm": 3.775062322616577, - "learning_rate": 2.7491459973266004e-05, - "loss": 0.0985, + "epoch": 0.8361800089113323, + "grad_norm": 0.9008534550666809, + "learning_rate": 9.163819991088668e-06, + "loss": 0.094, "step": 5630 }, { - "epoch": 0.41883261547601364, - "grad_norm": 5.706444263458252, - "learning_rate": 2.748700430714392e-05, - "loss": 0.0996, + "epoch": 0.8376652309520273, + "grad_norm": 0.8445830941200256, + "learning_rate": 9.162334769047974e-06, + "loss": 0.0787, "step": 5640 }, { - "epoch": 0.4195752264963612, - "grad_norm": 2.382413625717163, - "learning_rate": 2.748254864102183e-05, - "loss": 0.079, + "epoch": 0.8391504529927224, + "grad_norm": 0.711890697479248, + "learning_rate": 9.160849547007278e-06, + "loss": 0.0663, "step": 5650 }, { - "epoch": 0.42031783751670876, - "grad_norm": 2.5608088970184326, - "learning_rate": 2.747809297489975e-05, - "loss": 0.0893, + "epoch": 0.8406356750334175, + "grad_norm": 1.391710877418518, + "learning_rate": 9.159364324966583e-06, + "loss": 0.0802, "step": 5660 }, { - "epoch": 0.42106044853705626, - "grad_norm": 2.507960796356201, - "learning_rate": 2.7473637308777664e-05, - "loss": 0.0689, + "epoch": 0.8421208970741125, + "grad_norm": 0.9550698399543762, + "learning_rate": 9.157879102925889e-06, + "loss": 0.0661, "step": 5670 }, { - "epoch": 0.4218030595574038, - "grad_norm": 2.9068281650543213, - "learning_rate": 2.7469181642655575e-05, - "loss": 0.0848, + "epoch": 0.8436061191148077, + "grad_norm": 0.8969228267669678, + "learning_rate": 9.156393880885193e-06, + "loss": 0.0731, "step": 5680 }, { - "epoch": 0.4225456705777514, - "grad_norm": 3.1836397647857666, - "learning_rate": 2.7464725976533494e-05, - "loss": 0.0851, + "epoch": 0.8450913411555028, + "grad_norm": 1.3309462070465088, + "learning_rate": 9.154908658844498e-06, + "loss": 0.0781, "step": 5690 }, { - "epoch": 0.4232882815980989, - "grad_norm": 3.9612765312194824, - "learning_rate": 2.746027031041141e-05, - "loss": 0.0785, + "epoch": 0.8465765631961978, + "grad_norm": 0.9650948643684387, + "learning_rate": 9.153423436803804e-06, + "loss": 0.1078, "step": 5700 }, { - "epoch": 0.42403089261844645, - "grad_norm": 5.2058210372924805, - "learning_rate": 2.745581464428932e-05, - "loss": 0.0883, + "epoch": 0.8480617852368929, + "grad_norm": 1.1342687606811523, + "learning_rate": 9.151938214763107e-06, + "loss": 0.0752, "step": 5710 }, { - "epoch": 0.424773503638794, - "grad_norm": 2.7457072734832764, - "learning_rate": 2.745135897816724e-05, - "loss": 0.0654, + "epoch": 0.849547007277588, + "grad_norm": 0.8959386348724365, + "learning_rate": 9.150452992722413e-06, + "loss": 0.0816, "step": 5720 }, { - "epoch": 0.4255161146591415, - "grad_norm": 1.1056705713272095, - "learning_rate": 2.7446903312045154e-05, - "loss": 0.0996, + "epoch": 0.851032229318283, + "grad_norm": 0.9059763550758362, + "learning_rate": 9.148967770681719e-06, + "loss": 0.0939, "step": 5730 }, { - "epoch": 0.4262587256794891, - "grad_norm": 2.1076269149780273, - "learning_rate": 2.7442447645923065e-05, - "loss": 0.057, + "epoch": 0.8525174513589782, + "grad_norm": 0.9173917174339294, + "learning_rate": 9.147482548641022e-06, + "loss": 0.081, "step": 5740 }, { - "epoch": 0.42700133669983664, - "grad_norm": 2.5549466609954834, - "learning_rate": 2.743799197980098e-05, - "loss": 0.1045, + "epoch": 0.8540026733996733, + "grad_norm": 1.3770872354507446, + "learning_rate": 9.145997326600328e-06, + "loss": 0.0718, "step": 5750 }, { - "epoch": 0.42774394772018415, - "grad_norm": 1.2105517387390137, - "learning_rate": 2.7433536313678895e-05, - "loss": 0.0826, + "epoch": 0.8554878954403683, + "grad_norm": 0.6253504157066345, + "learning_rate": 9.144512104559632e-06, + "loss": 0.1021, "step": 5760 }, { - "epoch": 0.4284865587405317, - "grad_norm": 1.5219643115997314, - "learning_rate": 2.742908064755681e-05, - "loss": 0.0729, + "epoch": 0.8569731174810634, + "grad_norm": 0.6649258136749268, + "learning_rate": 9.143026882518937e-06, + "loss": 0.0719, "step": 5770 }, { - "epoch": 0.42922916976087927, - "grad_norm": 2.4484918117523193, - "learning_rate": 2.7424624981434725e-05, - "loss": 0.1096, + "epoch": 0.8584583395217585, + "grad_norm": 1.2232184410095215, + "learning_rate": 9.141541660478243e-06, + "loss": 0.092, "step": 5780 }, { - "epoch": 0.4299717807812268, - "grad_norm": 2.4884450435638428, - "learning_rate": 2.742016931531264e-05, - "loss": 0.0927, + "epoch": 0.8599435615624536, + "grad_norm": 1.0452312231063843, + "learning_rate": 9.140056438437547e-06, + "loss": 0.0703, "step": 5790 }, { - "epoch": 0.43071439180157434, - "grad_norm": 2.647526502609253, - "learning_rate": 2.7415713649190555e-05, - "loss": 0.0918, + "epoch": 0.8614287836031487, + "grad_norm": 0.5171612501144409, + "learning_rate": 9.138571216396852e-06, + "loss": 0.0653, "step": 5800 }, { - "epoch": 0.4314570028219219, - "grad_norm": 3.45865535736084, - "learning_rate": 2.741125798306847e-05, - "loss": 0.1012, + "epoch": 0.8629140056438438, + "grad_norm": 1.0178922414779663, + "learning_rate": 9.137085994356158e-06, + "loss": 0.0834, "step": 5810 }, { - "epoch": 0.4321996138422694, - "grad_norm": 1.7236058712005615, - "learning_rate": 2.7406802316946382e-05, - "loss": 0.1037, + "epoch": 0.8643992276845388, + "grad_norm": 1.6475141048431396, + "learning_rate": 9.135600772315462e-06, + "loss": 0.0833, "step": 5820 }, { - "epoch": 0.43294222486261696, - "grad_norm": 4.1282572746276855, - "learning_rate": 2.74023466508243e-05, - "loss": 0.0613, + "epoch": 0.8658844497252339, + "grad_norm": 1.547343373298645, + "learning_rate": 9.134115550274767e-06, + "loss": 0.0805, "step": 5830 }, { - "epoch": 0.4336848358829645, - "grad_norm": 2.4355249404907227, - "learning_rate": 2.7397890984702215e-05, - "loss": 0.0899, + "epoch": 0.867369671765929, + "grad_norm": 0.6915701031684875, + "learning_rate": 9.132630328234073e-06, + "loss": 0.0791, "step": 5840 }, { - "epoch": 0.43442744690331203, - "grad_norm": 1.723847508430481, - "learning_rate": 2.7393435318580127e-05, - "loss": 0.101, + "epoch": 0.8688548938066241, + "grad_norm": 1.050729513168335, + "learning_rate": 9.131145106193377e-06, + "loss": 0.066, "step": 5850 }, { - "epoch": 0.4351700579236596, - "grad_norm": 2.700627088546753, - "learning_rate": 2.7388979652458045e-05, - "loss": 0.0907, + "epoch": 0.8703401158473192, + "grad_norm": 0.6713865995407104, + "learning_rate": 9.129659884152682e-06, + "loss": 0.0828, "step": 5860 }, { - "epoch": 0.43591266894400715, - "grad_norm": 1.1109275817871094, - "learning_rate": 2.7384523986335957e-05, - "loss": 0.1118, + "epoch": 0.8718253378880143, + "grad_norm": 1.028895378112793, + "learning_rate": 9.128174662111986e-06, + "loss": 0.096, "step": 5870 }, { - "epoch": 0.43665527996435466, - "grad_norm": 1.6550132036209106, - "learning_rate": 2.738006832021387e-05, - "loss": 0.1243, + "epoch": 0.8733105599287093, + "grad_norm": 0.8316362500190735, + "learning_rate": 9.12668944007129e-06, + "loss": 0.0713, "step": 5880 }, { - "epoch": 0.4373978909847022, - "grad_norm": 1.3361659049987793, - "learning_rate": 2.737561265409179e-05, - "loss": 0.1412, + "epoch": 0.8747957819694044, + "grad_norm": 1.0333486795425415, + "learning_rate": 9.125204218030597e-06, + "loss": 0.0715, "step": 5890 }, { - "epoch": 0.4381405020050498, - "grad_norm": 2.4262852668762207, - "learning_rate": 2.73711569879697e-05, - "loss": 0.0814, + "epoch": 0.8762810040100996, + "grad_norm": 0.9072849750518799, + "learning_rate": 9.123718995989901e-06, + "loss": 0.0811, "step": 5900 }, { - "epoch": 0.4388831130253973, - "grad_norm": 3.202860116958618, - "learning_rate": 2.7366701321847617e-05, - "loss": 0.0694, + "epoch": 0.8777662260507946, + "grad_norm": 0.565025269985199, + "learning_rate": 9.122233773949206e-06, + "loss": 0.0723, "step": 5910 }, { - "epoch": 0.43962572404574485, - "grad_norm": 1.6271086931228638, - "learning_rate": 2.736224565572553e-05, - "loss": 0.0871, + "epoch": 0.8792514480914897, + "grad_norm": 0.9661470055580139, + "learning_rate": 9.120748551908512e-06, + "loss": 0.0705, "step": 5920 }, { - "epoch": 0.44036833506609235, - "grad_norm": 1.3334532976150513, - "learning_rate": 2.7357789989603447e-05, - "loss": 0.1081, + "epoch": 0.8807366701321847, + "grad_norm": 0.7308884263038635, + "learning_rate": 9.119263329867816e-06, + "loss": 0.0713, "step": 5930 }, { - "epoch": 0.4411109460864399, - "grad_norm": 1.230716586112976, - "learning_rate": 2.735333432348136e-05, - "loss": 0.067, + "epoch": 0.8822218921728798, + "grad_norm": 1.2283244132995605, + "learning_rate": 9.11777810782712e-06, + "loss": 0.0845, "step": 5940 }, { - "epoch": 0.4418535571067875, - "grad_norm": 1.8485809564590454, - "learning_rate": 2.7348878657359277e-05, - "loss": 0.069, + "epoch": 0.883707114213575, + "grad_norm": 0.7272588610649109, + "learning_rate": 9.116292885786427e-06, + "loss": 0.0751, "step": 5950 }, { - "epoch": 0.442596168127135, - "grad_norm": 1.9252151250839233, - "learning_rate": 2.734442299123719e-05, - "loss": 0.0703, + "epoch": 0.88519233625427, + "grad_norm": 0.6600573658943176, + "learning_rate": 9.114807663745731e-06, + "loss": 0.073, "step": 5960 }, { - "epoch": 0.44333877914748254, - "grad_norm": 0.5167465209960938, - "learning_rate": 2.7339967325115107e-05, - "loss": 0.0766, + "epoch": 0.8866775582949651, + "grad_norm": 0.5860625505447388, + "learning_rate": 9.113322441705036e-06, + "loss": 0.0865, "step": 5970 }, { - "epoch": 0.4440813901678301, - "grad_norm": 1.259781002998352, - "learning_rate": 2.7335511658993018e-05, - "loss": 0.0752, + "epoch": 0.8881627803356602, + "grad_norm": 0.6887400150299072, + "learning_rate": 9.11183721966434e-06, + "loss": 0.0774, "step": 5980 }, { - "epoch": 0.4448240011881776, - "grad_norm": 0.9502667188644409, - "learning_rate": 2.7331055992870933e-05, - "loss": 0.0699, + "epoch": 0.8896480023763552, + "grad_norm": 1.1417862176895142, + "learning_rate": 9.110351997623646e-06, + "loss": 0.0644, "step": 5990 }, { - "epoch": 0.44556661220852517, - "grad_norm": 2.211690902709961, - "learning_rate": 2.732660032674885e-05, - "loss": 0.0806, + "epoch": 0.8911332244170503, + "grad_norm": 1.1381356716156006, + "learning_rate": 9.10886677558295e-06, + "loss": 0.0769, "step": 6000 }, { - "epoch": 0.44630922322887273, - "grad_norm": 4.159378528594971, - "learning_rate": 2.7322144660626763e-05, - "loss": 0.0766, + "epoch": 0.8926184464577455, + "grad_norm": 1.168483018875122, + "learning_rate": 9.107381553542255e-06, + "loss": 0.0914, "step": 6010 }, { - "epoch": 0.44705183424922024, - "grad_norm": 2.38044810295105, - "learning_rate": 2.7317688994504678e-05, - "loss": 0.1117, + "epoch": 0.8941036684984405, + "grad_norm": 0.7276975512504578, + "learning_rate": 9.105896331501561e-06, + "loss": 0.0807, "step": 6020 }, { - "epoch": 0.4477944452695678, - "grad_norm": 3.320197105407715, - "learning_rate": 2.7313233328382597e-05, - "loss": 0.0819, + "epoch": 0.8955888905391356, + "grad_norm": 2.201474905014038, + "learning_rate": 9.104411109460866e-06, + "loss": 0.0853, "step": 6030 }, { - "epoch": 0.44853705628991536, - "grad_norm": 2.641312599182129, - "learning_rate": 2.7308777662260508e-05, - "loss": 0.0753, + "epoch": 0.8970741125798307, + "grad_norm": 0.44225212931632996, + "learning_rate": 9.10292588742017e-06, + "loss": 0.068, "step": 6040 }, { - "epoch": 0.44927966731026286, - "grad_norm": 3.1988885402679443, - "learning_rate": 2.7304321996138423e-05, - "loss": 0.1066, + "epoch": 0.8985593346205257, + "grad_norm": 0.4434727132320404, + "learning_rate": 9.101440665379474e-06, + "loss": 0.0694, "step": 6050 }, { - "epoch": 0.4500222783306104, - "grad_norm": 0.6954814195632935, - "learning_rate": 2.7299866330016335e-05, - "loss": 0.0885, + "epoch": 0.9000445566612209, + "grad_norm": 0.5112833380699158, + "learning_rate": 9.099955443338779e-06, + "loss": 0.0783, "step": 6060 }, { - "epoch": 0.450764889350958, - "grad_norm": 3.5615670680999756, - "learning_rate": 2.7295410663894253e-05, - "loss": 0.0942, + "epoch": 0.901529778701916, + "grad_norm": 2.047451972961426, + "learning_rate": 9.098470221298085e-06, + "loss": 0.0835, "step": 6070 }, { - "epoch": 0.4515075003713055, - "grad_norm": 0.6206175088882446, - "learning_rate": 2.7290954997772168e-05, - "loss": 0.0688, + "epoch": 0.903015000742611, + "grad_norm": 0.5268721580505371, + "learning_rate": 9.09698499925739e-06, + "loss": 0.0714, "step": 6080 }, { - "epoch": 0.45225011139165305, - "grad_norm": 1.3338674306869507, - "learning_rate": 2.728649933165008e-05, - "loss": 0.0969, + "epoch": 0.9045002227833061, + "grad_norm": 1.2867953777313232, + "learning_rate": 9.095499777216694e-06, + "loss": 0.0864, "step": 6090 }, { - "epoch": 0.4529927224120006, - "grad_norm": 2.5011191368103027, - "learning_rate": 2.7282043665527998e-05, - "loss": 0.0852, + "epoch": 0.9059854448240012, + "grad_norm": 0.9601038098335266, + "learning_rate": 9.094014555176e-06, + "loss": 0.0961, "step": 6100 }, { - "epoch": 0.4537353334323481, - "grad_norm": 4.930363655090332, - "learning_rate": 2.7277587999405913e-05, - "loss": 0.1074, + "epoch": 0.9074706668646962, + "grad_norm": 1.6264362335205078, + "learning_rate": 9.092529333135304e-06, + "loss": 0.0735, "step": 6110 }, { - "epoch": 0.4544779444526957, - "grad_norm": 2.0421066284179688, - "learning_rate": 2.7273132333283825e-05, - "loss": 0.0821, + "epoch": 0.9089558889053914, + "grad_norm": 0.8122678399085999, + "learning_rate": 9.091044111094609e-06, + "loss": 0.0902, "step": 6120 }, { - "epoch": 0.45522055547304324, - "grad_norm": 1.314985752105713, - "learning_rate": 2.7268676667161743e-05, - "loss": 0.0847, + "epoch": 0.9104411109460865, + "grad_norm": 0.6474285125732422, + "learning_rate": 9.089558889053915e-06, + "loss": 0.0775, "step": 6130 }, { - "epoch": 0.45596316649339075, - "grad_norm": 2.257136583328247, - "learning_rate": 2.7264221001039658e-05, - "loss": 0.0911, + "epoch": 0.9119263329867815, + "grad_norm": 0.8998765349388123, + "learning_rate": 9.08807366701322e-06, + "loss": 0.0968, "step": 6140 }, { - "epoch": 0.4567057775137383, - "grad_norm": 2.229437828063965, - "learning_rate": 2.725976533491757e-05, - "loss": 0.081, + "epoch": 0.9134115550274766, + "grad_norm": 0.934053897857666, + "learning_rate": 9.086588444972524e-06, + "loss": 0.0772, "step": 6150 }, { - "epoch": 0.45744838853408587, - "grad_norm": 1.778793215751648, - "learning_rate": 2.7255309668795485e-05, - "loss": 0.1086, + "epoch": 0.9148967770681717, + "grad_norm": 1.0640918016433716, + "learning_rate": 9.08510322293183e-06, + "loss": 0.0964, "step": 6160 }, { - "epoch": 0.4581909995544334, - "grad_norm": 1.5188746452331543, - "learning_rate": 2.72508540026734e-05, - "loss": 0.0933, + "epoch": 0.9163819991088668, + "grad_norm": 0.7316854596138, + "learning_rate": 9.083618000891133e-06, + "loss": 0.0798, "step": 6170 }, { - "epoch": 0.45893361057478094, - "grad_norm": 1.7076901197433472, - "learning_rate": 2.7246398336551315e-05, - "loss": 0.0746, + "epoch": 0.9178672211495619, + "grad_norm": 1.0811574459075928, + "learning_rate": 9.082132778850439e-06, + "loss": 0.0741, "step": 6180 }, { - "epoch": 0.4596762215951285, - "grad_norm": 1.1018537282943726, - "learning_rate": 2.724194267042923e-05, - "loss": 0.0773, + "epoch": 0.919352443190257, + "grad_norm": 0.9577917456626892, + "learning_rate": 9.080647556809743e-06, + "loss": 0.0882, "step": 6190 }, { - "epoch": 0.460418832615476, - "grad_norm": 1.8429148197174072, - "learning_rate": 2.7237487004307145e-05, - "loss": 0.0822, + "epoch": 0.920837665230952, + "grad_norm": 1.3443588018417358, + "learning_rate": 9.079162334769048e-06, + "loss": 0.0722, "step": 6200 }, { - "epoch": 0.46116144363582356, - "grad_norm": 4.57528829574585, - "learning_rate": 2.723303133818506e-05, - "loss": 0.1028, - "step": 6210 + "epoch": 0.9223228872716471, + "grad_norm": 0.8847956657409668, + "learning_rate": 9.077677112728354e-06, + "loss": 0.0959, + "step": 6210 }, { - "epoch": 0.46190405465617107, - "grad_norm": 2.2696962356567383, - "learning_rate": 2.7228575672062975e-05, - "loss": 0.0925, + "epoch": 0.9238081093123421, + "grad_norm": 1.149559736251831, + "learning_rate": 9.076191890687658e-06, + "loss": 0.0884, "step": 6220 }, { - "epoch": 0.46264666567651863, - "grad_norm": 1.2681903839111328, - "learning_rate": 2.7224120005940886e-05, - "loss": 0.1078, + "epoch": 0.9252933313530373, + "grad_norm": 1.0926103591918945, + "learning_rate": 9.074706668646963e-06, + "loss": 0.0812, "step": 6230 }, { - "epoch": 0.4633892766968662, - "grad_norm": 0.9987069964408875, - "learning_rate": 2.7219664339818805e-05, - "loss": 0.0875, + "epoch": 0.9267785533937324, + "grad_norm": 0.3773317337036133, + "learning_rate": 9.073221446606269e-06, + "loss": 0.0829, "step": 6240 }, { - "epoch": 0.4641318877172137, - "grad_norm": 1.8749423027038574, - "learning_rate": 2.721520867369672e-05, - "loss": 0.1022, + "epoch": 0.9282637754344274, + "grad_norm": 0.872042179107666, + "learning_rate": 9.071736224565573e-06, + "loss": 0.0871, "step": 6250 }, { - "epoch": 0.46487449873756126, - "grad_norm": 1.0351048707962036, - "learning_rate": 2.721075300757463e-05, - "loss": 0.0946, + "epoch": 0.9297489974751225, + "grad_norm": 0.8606436848640442, + "learning_rate": 9.070251002524878e-06, + "loss": 0.0694, "step": 6260 }, { - "epoch": 0.4656171097579088, - "grad_norm": 0.9065452814102173, - "learning_rate": 2.720629734145255e-05, - "loss": 0.0955, + "epoch": 0.9312342195158176, + "grad_norm": 0.8578999638557434, + "learning_rate": 9.068765780484184e-06, + "loss": 0.0848, "step": 6270 }, { - "epoch": 0.4663597207782563, - "grad_norm": 0.9384631514549255, - "learning_rate": 2.720184167533046e-05, - "loss": 0.0789, + "epoch": 0.9327194415565127, + "grad_norm": 0.8462734818458557, + "learning_rate": 9.067280558443488e-06, + "loss": 0.0765, "step": 6280 }, { - "epoch": 0.4671023317986039, - "grad_norm": 0.5935912132263184, - "learning_rate": 2.7197386009208376e-05, - "loss": 0.0711, + "epoch": 0.9342046635972078, + "grad_norm": 0.7472706437110901, + "learning_rate": 9.065795336402793e-06, + "loss": 0.1077, "step": 6290 }, { - "epoch": 0.46784494281895145, - "grad_norm": 2.0923197269439697, - "learning_rate": 2.7192930343086295e-05, - "loss": 0.0668, + "epoch": 0.9356898856379029, + "grad_norm": 0.6326286792755127, + "learning_rate": 9.064310114362099e-06, + "loss": 0.0686, "step": 6300 }, { - "epoch": 0.46858755383929895, - "grad_norm": 0.9946518540382385, - "learning_rate": 2.7188474676964206e-05, - "loss": 0.07, + "epoch": 0.9371751076785979, + "grad_norm": 0.9380270838737488, + "learning_rate": 9.062824892321403e-06, + "loss": 0.0684, "step": 6310 }, { - "epoch": 0.4693301648596465, - "grad_norm": 4.637099742889404, - "learning_rate": 2.718401901084212e-05, - "loss": 0.0938, + "epoch": 0.938660329719293, + "grad_norm": 0.7247831225395203, + "learning_rate": 9.061339670280708e-06, + "loss": 0.0975, "step": 6320 }, { - "epoch": 0.4700727758799941, - "grad_norm": 3.6027259826660156, - "learning_rate": 2.7179563344720036e-05, - "loss": 0.0707, + "epoch": 0.9401455517599882, + "grad_norm": 0.8656736612319946, + "learning_rate": 9.059854448240014e-06, + "loss": 0.0647, "step": 6330 }, { - "epoch": 0.4708153869003416, - "grad_norm": 2.179995059967041, - "learning_rate": 2.717510767859795e-05, - "loss": 0.0795, + "epoch": 0.9416307738006832, + "grad_norm": 0.6879330277442932, + "learning_rate": 9.058369226199317e-06, + "loss": 0.0686, "step": 6340 }, { - "epoch": 0.47155799792068914, - "grad_norm": 1.5892603397369385, - "learning_rate": 2.7170652012475866e-05, - "loss": 0.085, + "epoch": 0.9431159958413783, + "grad_norm": 1.0881211757659912, + "learning_rate": 9.056884004158623e-06, + "loss": 0.0892, "step": 6350 }, { - "epoch": 0.4723006089410367, - "grad_norm": 2.146799087524414, - "learning_rate": 2.716619634635378e-05, - "loss": 0.0842, + "epoch": 0.9446012178820734, + "grad_norm": 0.8151282668113708, + "learning_rate": 9.055398782117927e-06, + "loss": 0.0723, "step": 6360 }, { - "epoch": 0.4730432199613842, - "grad_norm": 2.106539249420166, - "learning_rate": 2.7161740680231696e-05, - "loss": 0.0918, + "epoch": 0.9460864399227684, + "grad_norm": 0.6185230612754822, + "learning_rate": 9.053913560077232e-06, + "loss": 0.1003, "step": 6370 }, { - "epoch": 0.47378583098173177, - "grad_norm": 1.503983497619629, - "learning_rate": 2.715728501410961e-05, - "loss": 0.1038, + "epoch": 0.9475716619634635, + "grad_norm": 1.3565391302108765, + "learning_rate": 9.052428338036538e-06, + "loss": 0.0949, "step": 6380 }, { - "epoch": 0.47452844200207933, - "grad_norm": 1.7388819456100464, - "learning_rate": 2.7152829347987523e-05, - "loss": 0.1041, + "epoch": 0.9490568840041587, + "grad_norm": 0.659494936466217, + "learning_rate": 9.050943115995842e-06, + "loss": 0.0682, "step": 6390 }, { - "epoch": 0.47527105302242684, - "grad_norm": 3.1437437534332275, - "learning_rate": 2.7148373681865438e-05, - "loss": 0.0824, + "epoch": 0.9505421060448537, + "grad_norm": 0.8106864094734192, + "learning_rate": 9.049457893955147e-06, + "loss": 0.0706, "step": 6400 }, { - "epoch": 0.4760136640427744, - "grad_norm": 5.636854648590088, - "learning_rate": 2.7143918015743356e-05, - "loss": 0.0835, + "epoch": 0.9520273280855488, + "grad_norm": 0.6366297006607056, + "learning_rate": 9.047972671914453e-06, + "loss": 0.0788, "step": 6410 }, { - "epoch": 0.47675627506312196, - "grad_norm": 1.5559483766555786, - "learning_rate": 2.7139462349621268e-05, - "loss": 0.1108, + "epoch": 0.9535125501262439, + "grad_norm": 1.2151988744735718, + "learning_rate": 9.046487449873757e-06, + "loss": 0.0686, "step": 6420 }, { - "epoch": 0.47749888608346946, - "grad_norm": 2.2242963314056396, - "learning_rate": 2.7135006683499183e-05, - "loss": 0.133, + "epoch": 0.9549977721669389, + "grad_norm": 1.2140624523162842, + "learning_rate": 9.045002227833061e-06, + "loss": 0.0747, "step": 6430 }, { - "epoch": 0.478241497103817, - "grad_norm": 3.61586332321167, - "learning_rate": 2.71305510173771e-05, - "loss": 0.0996, + "epoch": 0.956482994207634, + "grad_norm": 0.8682563304901123, + "learning_rate": 9.043517005792368e-06, + "loss": 0.0802, "step": 6440 }, { - "epoch": 0.4789841081241646, - "grad_norm": 3.0987701416015625, - "learning_rate": 2.7126095351255013e-05, - "loss": 0.0924, + "epoch": 0.9579682162483292, + "grad_norm": 1.3332148790359497, + "learning_rate": 9.042031783751672e-06, + "loss": 0.0852, "step": 6450 }, { - "epoch": 0.4797267191445121, - "grad_norm": 1.8545348644256592, - "learning_rate": 2.7121639685132928e-05, - "loss": 0.082, + "epoch": 0.9594534382890242, + "grad_norm": 0.9207971096038818, + "learning_rate": 9.040546561710976e-06, + "loss": 0.0718, "step": 6460 }, { - "epoch": 0.48046933016485965, - "grad_norm": 0.8686532974243164, - "learning_rate": 2.7117184019010843e-05, - "loss": 0.068, + "epoch": 0.9609386603297193, + "grad_norm": 0.8784974217414856, + "learning_rate": 9.039061339670283e-06, + "loss": 0.0819, "step": 6470 }, { - "epoch": 0.4812119411852072, - "grad_norm": 1.4402474164962769, - "learning_rate": 2.7112728352888758e-05, - "loss": 0.0888, + "epoch": 0.9624238823704144, + "grad_norm": 0.6948875784873962, + "learning_rate": 9.037576117629585e-06, + "loss": 0.0792, "step": 6480 }, { - "epoch": 0.4819545522055547, - "grad_norm": 1.4960230588912964, - "learning_rate": 2.7108272686766673e-05, - "loss": 0.0626, + "epoch": 0.9639091044111094, + "grad_norm": 0.8131117820739746, + "learning_rate": 9.036090895588891e-06, + "loss": 0.0798, "step": 6490 }, { - "epoch": 0.4826971632259023, - "grad_norm": 0.6626843214035034, - "learning_rate": 2.7103817020644584e-05, - "loss": 0.0459, + "epoch": 0.9653943264518046, + "grad_norm": 1.1777347326278687, + "learning_rate": 9.034605673548196e-06, + "loss": 0.0735, "step": 6500 }, { - "epoch": 0.4834397742462498, - "grad_norm": 2.2946035861968994, - "learning_rate": 2.7099361354522503e-05, - "loss": 0.0836, + "epoch": 0.9668795484924996, + "grad_norm": 0.9856818318367004, + "learning_rate": 9.0331204515075e-06, + "loss": 0.0768, "step": 6510 }, { - "epoch": 0.48418238526659735, - "grad_norm": 3.0957255363464355, - "learning_rate": 2.7094905688400418e-05, - "loss": 0.0909, + "epoch": 0.9683647705331947, + "grad_norm": 1.2949861288070679, + "learning_rate": 9.031635229466806e-06, + "loss": 0.0716, "step": 6520 }, { - "epoch": 0.4849249962869449, - "grad_norm": 4.055625915527344, - "learning_rate": 2.709045002227833e-05, - "loss": 0.1138, + "epoch": 0.9698499925738898, + "grad_norm": 0.6529346704483032, + "learning_rate": 9.030150007426111e-06, + "loss": 0.0784, "step": 6530 }, { - "epoch": 0.4856676073072924, - "grad_norm": 1.6780099868774414, - "learning_rate": 2.7085994356156248e-05, - "loss": 0.0946, + "epoch": 0.9713352146145848, + "grad_norm": 1.8309189081192017, + "learning_rate": 9.028664785385415e-06, + "loss": 0.0673, "step": 6540 }, { - "epoch": 0.48641021832764, - "grad_norm": 1.8075953722000122, - "learning_rate": 2.7081538690034163e-05, - "loss": 0.0697, + "epoch": 0.97282043665528, + "grad_norm": 0.7805710434913635, + "learning_rate": 9.027179563344721e-06, + "loss": 0.0959, "step": 6550 }, { - "epoch": 0.48715282934798754, - "grad_norm": 1.8275692462921143, - "learning_rate": 2.7077083023912074e-05, - "loss": 0.0973, + "epoch": 0.9743056586959751, + "grad_norm": 0.4617927670478821, + "learning_rate": 9.025694341304026e-06, + "loss": 0.0785, "step": 6560 }, { - "epoch": 0.48789544036833504, - "grad_norm": 2.2628328800201416, - "learning_rate": 2.707262735778999e-05, - "loss": 0.1178, + "epoch": 0.9757908807366701, + "grad_norm": 1.0437848567962646, + "learning_rate": 9.02420911926333e-06, + "loss": 0.0685, "step": 6570 }, { - "epoch": 0.4886380513886826, - "grad_norm": 1.0537023544311523, - "learning_rate": 2.7068171691667904e-05, - "loss": 0.0633, + "epoch": 0.9772761027773652, + "grad_norm": 0.7941612005233765, + "learning_rate": 9.022723897222635e-06, + "loss": 0.0838, "step": 6580 }, { - "epoch": 0.48938066240903016, - "grad_norm": 0.39916807413101196, - "learning_rate": 2.706371602554582e-05, - "loss": 0.061, + "epoch": 0.9787613248180603, + "grad_norm": 0.5064259171485901, + "learning_rate": 9.021238675181941e-06, + "loss": 0.0695, "step": 6590 }, { - "epoch": 0.49012327342937767, - "grad_norm": 2.486980438232422, - "learning_rate": 2.7059260359423734e-05, - "loss": 0.0502, + "epoch": 0.9802465468587553, + "grad_norm": 0.9026811718940735, + "learning_rate": 9.019753453141245e-06, + "loss": 0.0648, "step": 6600 }, { - "epoch": 0.49086588444972523, - "grad_norm": 1.5549534559249878, - "learning_rate": 2.705480469330165e-05, - "loss": 0.096, + "epoch": 0.9817317688994505, + "grad_norm": 0.8138523101806641, + "learning_rate": 9.01826823110055e-06, + "loss": 0.0638, "step": 6610 }, { - "epoch": 0.4916084954700728, - "grad_norm": 1.8436572551727295, - "learning_rate": 2.7050349027179564e-05, - "loss": 0.0435, + "epoch": 0.9832169909401456, + "grad_norm": 0.5671774744987488, + "learning_rate": 9.016783009059856e-06, + "loss": 0.0544, "step": 6620 }, { - "epoch": 0.4923511064904203, - "grad_norm": 2.9370453357696533, - "learning_rate": 2.704589336105748e-05, - "loss": 0.1154, + "epoch": 0.9847022129808406, + "grad_norm": 1.2543549537658691, + "learning_rate": 9.01529778701916e-06, + "loss": 0.0736, "step": 6630 }, { - "epoch": 0.49309371751076786, - "grad_norm": 1.7184120416641235, - "learning_rate": 2.704143769493539e-05, - "loss": 0.1227, + "epoch": 0.9861874350215357, + "grad_norm": 0.5101591944694519, + "learning_rate": 9.013812564978465e-06, + "loss": 0.0779, "step": 6640 }, { - "epoch": 0.4938363285311154, - "grad_norm": 1.9661284685134888, - "learning_rate": 2.703698202881331e-05, - "loss": 0.0861, + "epoch": 0.9876726570622308, + "grad_norm": 0.8760504126548767, + "learning_rate": 9.01232734293777e-06, + "loss": 0.0792, "step": 6650 }, { - "epoch": 0.4945789395514629, - "grad_norm": 5.240865230560303, - "learning_rate": 2.7032526362691224e-05, - "loss": 0.0973, + "epoch": 0.9891578791029259, + "grad_norm": 1.0827103853225708, + "learning_rate": 9.010842120897075e-06, + "loss": 0.0853, "step": 6660 }, { - "epoch": 0.4953215505718105, - "grad_norm": 0.6822782158851624, - "learning_rate": 2.7028070696569136e-05, - "loss": 0.0736, + "epoch": 0.990643101143621, + "grad_norm": 0.6509794592857361, + "learning_rate": 9.00935689885638e-06, + "loss": 0.0632, "step": 6670 }, { - "epoch": 0.49606416159215805, - "grad_norm": 2.3852436542510986, - "learning_rate": 2.7023615030447054e-05, - "loss": 0.1097, + "epoch": 0.9921283231843161, + "grad_norm": 1.0729295015335083, + "learning_rate": 9.007871676815684e-06, + "loss": 0.082, "step": 6680 }, { - "epoch": 0.49680677261250555, - "grad_norm": 2.0364935398101807, - "learning_rate": 2.7019159364324966e-05, - "loss": 0.0749, + "epoch": 0.9936135452250111, + "grad_norm": 0.7828879356384277, + "learning_rate": 9.006386454774989e-06, + "loss": 0.0745, "step": 6690 }, { - "epoch": 0.4975493836328531, - "grad_norm": 1.774452805519104, - "learning_rate": 2.701470369820288e-05, - "loss": 0.0769, + "epoch": 0.9950987672657062, + "grad_norm": 0.7636898756027222, + "learning_rate": 9.004901232734295e-06, + "loss": 0.0851, "step": 6700 }, { - "epoch": 0.4982919946532007, - "grad_norm": 1.5295710563659668, - "learning_rate": 2.70102480320808e-05, - "loss": 0.0939, + "epoch": 0.9965839893064014, + "grad_norm": 0.9901930689811707, + "learning_rate": 9.0034160106936e-06, + "loss": 0.0589, "step": 6710 }, { - "epoch": 0.4990346056735482, - "grad_norm": 3.159693956375122, - "learning_rate": 2.700579236595871e-05, - "loss": 0.0719, + "epoch": 0.9980692113470964, + "grad_norm": 1.4099174737930298, + "learning_rate": 9.001930788652904e-06, + "loss": 0.0877, "step": 6720 }, { - "epoch": 0.49977721669389574, - "grad_norm": 1.0851925611495972, - "learning_rate": 2.7001336699836626e-05, - "loss": 0.0661, + "epoch": 0.9995544333877915, + "grad_norm": 1.0965951681137085, + "learning_rate": 9.00044556661221e-06, + "loss": 0.0769, "step": 6730 }, { - "epoch": 0.5005198277142433, - "grad_norm": 1.9622503519058228, - "learning_rate": 2.699688103371454e-05, - "loss": 0.0874, + "epoch": 1.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.06574511528015137, + "eval_runtime": 208.1528, + "eval_samples_per_second": 182.65, + "eval_steps_per_second": 5.712, + "step": 6733 + }, + { + "epoch": 1.0010396554284866, + "grad_norm": 0.6327619552612305, + "learning_rate": 8.998960344571514e-06, + "loss": 0.0635, "step": 6740 }, { - "epoch": 0.5012624387345909, - "grad_norm": 1.293068528175354, - "learning_rate": 2.6992425367592456e-05, - "loss": 0.1071, + "epoch": 1.0025248774691817, + "grad_norm": 0.9295978546142578, + "learning_rate": 8.997475122530819e-06, + "loss": 0.0849, "step": 6750 }, { - "epoch": 0.5020050497549383, - "grad_norm": 2.094120740890503, - "learning_rate": 2.698796970147037e-05, - "loss": 0.0936, + "epoch": 1.0040100995098766, + "grad_norm": 0.9286435842514038, + "learning_rate": 8.995989900490125e-06, + "loss": 0.0643, "step": 6760 }, { - "epoch": 0.5027476607752859, - "grad_norm": 2.2570743560791016, - "learning_rate": 2.6983514035348286e-05, - "loss": 0.0788, + "epoch": 1.0054953215505718, + "grad_norm": 1.0415282249450684, + "learning_rate": 8.99450467844943e-06, + "loss": 0.0922, "step": 6770 }, { - "epoch": 0.5034902717956334, - "grad_norm": 2.327422857284546, - "learning_rate": 2.69790583692262e-05, - "loss": 0.0765, + "epoch": 1.0069805435912669, + "grad_norm": 0.9151238799095154, + "learning_rate": 8.993019456408734e-06, + "loss": 0.0825, "step": 6780 }, { - "epoch": 0.504232882815981, - "grad_norm": 2.4832825660705566, - "learning_rate": 2.6974602703104116e-05, + "epoch": 1.008465765631962, + "grad_norm": 1.2567110061645508, + "learning_rate": 8.99153423436804e-06, "loss": 0.0789, "step": 6790 }, { - "epoch": 0.5049754938363286, - "grad_norm": 2.0305325984954834, - "learning_rate": 2.6970147036982027e-05, - "loss": 0.1098, + "epoch": 1.0099509876726571, + "grad_norm": 0.7727917432785034, + "learning_rate": 8.990049012327342e-06, + "loss": 0.0759, "step": 6800 }, { - "epoch": 0.5057181048566761, - "grad_norm": 1.299988865852356, - "learning_rate": 2.6965691370859942e-05, - "loss": 0.1285, + "epoch": 1.0114362097133522, + "grad_norm": 1.1036444902420044, + "learning_rate": 8.988563790286649e-06, + "loss": 0.0775, "step": 6810 }, { - "epoch": 0.5064607158770236, - "grad_norm": 2.6327931880950928, - "learning_rate": 2.696123570473786e-05, - "loss": 0.0914, + "epoch": 1.0129214317540471, + "grad_norm": 0.6470946073532104, + "learning_rate": 8.987078568245953e-06, + "loss": 0.0671, "step": 6820 }, { - "epoch": 0.5072033268973711, - "grad_norm": 2.3139307498931885, - "learning_rate": 2.6956780038615772e-05, - "loss": 0.0831, + "epoch": 1.0144066537947423, + "grad_norm": 0.7344804406166077, + "learning_rate": 8.985593346205257e-06, + "loss": 0.0814, "step": 6830 }, { - "epoch": 0.5079459379177187, - "grad_norm": 1.7000758647918701, - "learning_rate": 2.6952324372493687e-05, - "loss": 0.0996, + "epoch": 1.0158918758354374, + "grad_norm": 0.8188076019287109, + "learning_rate": 8.984108124164564e-06, + "loss": 0.055, "step": 6840 }, { - "epoch": 0.5086885489380663, - "grad_norm": 2.333949089050293, - "learning_rate": 2.6947868706371606e-05, - "loss": 0.1131, + "epoch": 1.0173770978761325, + "grad_norm": 1.11733078956604, + "learning_rate": 8.982622902123868e-06, + "loss": 0.0975, "step": 6850 }, { - "epoch": 0.5094311599584138, - "grad_norm": 0.8475568294525146, - "learning_rate": 2.6943413040249517e-05, - "loss": 0.0658, + "epoch": 1.0188623199168276, + "grad_norm": 0.87245774269104, + "learning_rate": 8.981137680083172e-06, + "loss": 0.0761, "step": 6860 }, { - "epoch": 0.5101737709787614, - "grad_norm": 2.65226411819458, - "learning_rate": 2.6938957374127432e-05, - "loss": 0.0892, + "epoch": 1.0203475419575228, + "grad_norm": 1.0849484205245972, + "learning_rate": 8.979652458042479e-06, + "loss": 0.0813, "step": 6870 }, { - "epoch": 0.5109163819991088, - "grad_norm": 2.692626953125, - "learning_rate": 2.6934501708005347e-05, - "loss": 0.1133, + "epoch": 1.0218327639982177, + "grad_norm": 1.1357029676437378, + "learning_rate": 8.978167236001783e-06, + "loss": 0.0899, "step": 6880 }, { - "epoch": 0.5116589930194564, - "grad_norm": 1.8590973615646362, - "learning_rate": 2.6930046041883262e-05, - "loss": 0.0935, + "epoch": 1.0233179860389128, + "grad_norm": 1.1423171758651733, + "learning_rate": 8.976682013961087e-06, + "loss": 0.0981, "step": 6890 }, { - "epoch": 0.512401604039804, - "grad_norm": 1.9496850967407227, - "learning_rate": 2.6925590375761177e-05, - "loss": 0.0604, + "epoch": 1.024803208079608, + "grad_norm": 0.6787160038948059, + "learning_rate": 8.975196791920394e-06, + "loss": 0.0804, "step": 6900 }, { - "epoch": 0.5131442150601515, - "grad_norm": 3.5495307445526123, - "learning_rate": 2.692113470963909e-05, - "loss": 0.0863, + "epoch": 1.026288430120303, + "grad_norm": 0.884209394454956, + "learning_rate": 8.973711569879698e-06, + "loss": 0.0762, "step": 6910 }, { - "epoch": 0.5138868260804991, - "grad_norm": 5.097157001495361, - "learning_rate": 2.6916679043517007e-05, - "loss": 0.108, + "epoch": 1.0277736521609981, + "grad_norm": 0.9835346937179565, + "learning_rate": 8.972226347839002e-06, + "loss": 0.0772, "step": 6920 }, { - "epoch": 0.5146294371008466, - "grad_norm": 3.4224822521209717, - "learning_rate": 2.6912223377394922e-05, - "loss": 0.1177, + "epoch": 1.0292588742016933, + "grad_norm": 0.6878470778465271, + "learning_rate": 8.970741125798309e-06, + "loss": 0.061, "step": 6930 }, { - "epoch": 0.5153720481211941, - "grad_norm": 2.013091564178467, - "learning_rate": 2.6907767711272834e-05, - "loss": 0.1062, + "epoch": 1.0307440962423882, + "grad_norm": 1.2484465837478638, + "learning_rate": 8.969255903757611e-06, + "loss": 0.0858, "step": 6940 }, { - "epoch": 0.5161146591415416, - "grad_norm": 1.1934363842010498, - "learning_rate": 2.6903312045150752e-05, - "loss": 0.0608, + "epoch": 1.0322293182830833, + "grad_norm": 0.6175143122673035, + "learning_rate": 8.967770681716917e-06, + "loss": 0.0654, "step": 6950 }, { - "epoch": 0.5168572701618892, - "grad_norm": 3.092979669570923, - "learning_rate": 2.6898856379028667e-05, - "loss": 0.1076, + "epoch": 1.0337145403237784, + "grad_norm": 1.05460786819458, + "learning_rate": 8.966285459676224e-06, + "loss": 0.0791, "step": 6960 }, { - "epoch": 0.5175998811822368, - "grad_norm": 1.059200644493103, - "learning_rate": 2.689440071290658e-05, - "loss": 0.0554, + "epoch": 1.0351997623644735, + "grad_norm": 1.1853206157684326, + "learning_rate": 8.964800237635526e-06, + "loss": 0.0986, "step": 6970 }, { - "epoch": 0.5183424922025843, - "grad_norm": 4.116779804229736, - "learning_rate": 2.6889945046784494e-05, - "loss": 0.0815, + "epoch": 1.0366849844051687, + "grad_norm": 1.2502192258834839, + "learning_rate": 8.963315015594832e-06, + "loss": 0.086, "step": 6980 }, { - "epoch": 0.5190851032229318, - "grad_norm": 1.1455808877944946, - "learning_rate": 2.6885489380662412e-05, - "loss": 0.0623, + "epoch": 1.0381702064458636, + "grad_norm": 0.9244298934936523, + "learning_rate": 8.961829793554137e-06, + "loss": 0.0813, "step": 6990 }, { - "epoch": 0.5198277142432793, - "grad_norm": 0.5762424468994141, - "learning_rate": 2.6881033714540324e-05, - "loss": 0.0587, + "epoch": 1.0396554284865587, + "grad_norm": 0.7741544842720032, + "learning_rate": 8.960344571513441e-06, + "loss": 0.0782, "step": 7000 }, { - "epoch": 0.5205703252636269, - "grad_norm": 2.309788942337036, - "learning_rate": 2.687657804841824e-05, - "loss": 0.0826, + "epoch": 1.0411406505272538, + "grad_norm": 0.9779021143913269, + "learning_rate": 8.958859349472747e-06, + "loss": 0.0823, "step": 7010 }, { - "epoch": 0.5213129362839745, - "grad_norm": 3.214755058288574, - "learning_rate": 2.6872122382296154e-05, - "loss": 0.0845, + "epoch": 1.042625872567949, + "grad_norm": 0.8948909640312195, + "learning_rate": 8.957374127432052e-06, + "loss": 0.0856, "step": 7020 }, { - "epoch": 0.522055547304322, - "grad_norm": 1.238411545753479, - "learning_rate": 2.686766671617407e-05, - "loss": 0.046, + "epoch": 1.044111094608644, + "grad_norm": 0.5438656210899353, + "learning_rate": 8.955888905391356e-06, + "loss": 0.0776, "step": 7030 }, { - "epoch": 0.5227981583246696, - "grad_norm": 0.6192235946655273, - "learning_rate": 2.6863211050051984e-05, - "loss": 0.0523, + "epoch": 1.0455963166493392, + "grad_norm": 0.8294696807861328, + "learning_rate": 8.954403683350662e-06, + "loss": 0.0874, "step": 7040 }, { - "epoch": 0.523540769345017, - "grad_norm": 1.7982896566390991, - "learning_rate": 2.6858755383929895e-05, - "loss": 0.0863, + "epoch": 1.047081538690034, + "grad_norm": 0.8454133868217468, + "learning_rate": 8.952918461309967e-06, + "loss": 0.0689, "step": 7050 }, { - "epoch": 0.5242833803653646, - "grad_norm": 1.1628367900848389, - "learning_rate": 2.6854299717807814e-05, - "loss": 0.094, + "epoch": 1.0485667607307292, + "grad_norm": 1.180791974067688, + "learning_rate": 8.951433239269271e-06, + "loss": 0.111, "step": 7060 }, { - "epoch": 0.5250259913857122, - "grad_norm": 0.698805034160614, - "learning_rate": 2.684984405168573e-05, - "loss": 0.0845, + "epoch": 1.0500519827714243, + "grad_norm": 1.108608365058899, + "learning_rate": 8.949948017228577e-06, + "loss": 0.0797, "step": 7070 }, { - "epoch": 0.5257686024060597, - "grad_norm": 0.9980218410491943, - "learning_rate": 2.684538838556364e-05, - "loss": 0.0718, + "epoch": 1.0515372048121194, + "grad_norm": 0.8830604553222656, + "learning_rate": 8.948462795187882e-06, + "loss": 0.09, "step": 7080 }, { - "epoch": 0.5265112134264073, - "grad_norm": 2.250861883163452, - "learning_rate": 2.684093271944156e-05, - "loss": 0.0761, + "epoch": 1.0530224268528146, + "grad_norm": 0.6073690056800842, + "learning_rate": 8.946977573147186e-06, + "loss": 0.0806, "step": 7090 }, { - "epoch": 0.5272538244467548, - "grad_norm": 0.3348923921585083, - "learning_rate": 2.683647705331947e-05, - "loss": 0.0697, + "epoch": 1.0545076488935097, + "grad_norm": 1.0500218868255615, + "learning_rate": 8.94549235110649e-06, + "loss": 0.0637, "step": 7100 }, { - "epoch": 0.5279964354671023, - "grad_norm": 1.0973154306411743, - "learning_rate": 2.6832021387197385e-05, - "loss": 0.0842, + "epoch": 1.0559928709342046, + "grad_norm": 1.460571527481079, + "learning_rate": 8.944007129065795e-06, + "loss": 0.0845, "step": 7110 }, { - "epoch": 0.5287390464874498, - "grad_norm": 1.2300523519515991, - "learning_rate": 2.6827565721075304e-05, - "loss": 0.0847, + "epoch": 1.0574780929748997, + "grad_norm": 1.5901087522506714, + "learning_rate": 8.942521907025101e-06, + "loss": 0.0919, "step": 7120 }, { - "epoch": 0.5294816575077974, - "grad_norm": 1.7506873607635498, - "learning_rate": 2.6823110054953215e-05, - "loss": 0.0802, + "epoch": 1.0589633150155948, + "grad_norm": 0.7899160981178284, + "learning_rate": 8.941036684984406e-06, + "loss": 0.0682, "step": 7130 }, { - "epoch": 0.530224268528145, - "grad_norm": 2.3556385040283203, - "learning_rate": 2.681865438883113e-05, - "loss": 0.0903, + "epoch": 1.06044853705629, + "grad_norm": 0.6329823732376099, + "learning_rate": 8.93955146294371e-06, + "loss": 0.0978, "step": 7140 }, { - "epoch": 0.5309668795484925, - "grad_norm": 8.310062408447266, - "learning_rate": 2.6814198722709045e-05, - "loss": 0.0855, + "epoch": 1.061933759096985, + "grad_norm": 0.9899727702140808, + "learning_rate": 8.938066240903016e-06, + "loss": 0.059, "step": 7150 }, { - "epoch": 0.5317094905688401, - "grad_norm": 3.2438154220581055, - "learning_rate": 2.680974305658696e-05, - "loss": 0.1041, + "epoch": 1.0634189811376802, + "grad_norm": 0.7236705422401428, + "learning_rate": 8.93658101886232e-06, + "loss": 0.0793, "step": 7160 }, { - "epoch": 0.5324521015891875, - "grad_norm": 1.3913241624832153, - "learning_rate": 2.6805287390464875e-05, - "loss": 0.1044, + "epoch": 1.064904203178375, + "grad_norm": 0.7325375080108643, + "learning_rate": 8.935095796821625e-06, + "loss": 0.0656, "step": 7170 }, { - "epoch": 0.5331947126095351, - "grad_norm": 2.0883166790008545, - "learning_rate": 2.680083172434279e-05, - "loss": 0.0913, + "epoch": 1.0663894252190702, + "grad_norm": 0.44422733783721924, + "learning_rate": 8.933610574780931e-06, + "loss": 0.0677, "step": 7180 }, { - "epoch": 0.5339373236298827, - "grad_norm": 1.3850668668746948, - "learning_rate": 2.6796376058220705e-05, - "loss": 0.0936, + "epoch": 1.0678746472597653, + "grad_norm": 1.4638272523880005, + "learning_rate": 8.932125352740236e-06, + "loss": 0.0797, "step": 7190 }, { - "epoch": 0.5346799346502302, - "grad_norm": 2.546489953994751, - "learning_rate": 2.679192039209862e-05, - "loss": 0.0746, + "epoch": 1.0693598693004605, + "grad_norm": 0.761106014251709, + "learning_rate": 8.93064013069954e-06, + "loss": 0.0653, "step": 7200 }, { - "epoch": 0.5354225456705778, - "grad_norm": 2.2672359943389893, - "learning_rate": 2.6787464725976532e-05, - "loss": 0.0837, + "epoch": 1.0708450913411556, + "grad_norm": 0.7225255370140076, + "learning_rate": 8.929154908658845e-06, + "loss": 0.0757, "step": 7210 }, { - "epoch": 0.5361651566909253, - "grad_norm": 1.1645616292953491, - "learning_rate": 2.6783009059854447e-05, - "loss": 0.0967, + "epoch": 1.0723303133818507, + "grad_norm": 0.7644005417823792, + "learning_rate": 8.92766968661815e-06, + "loss": 0.0851, "step": 7220 }, { - "epoch": 0.5369077677112728, - "grad_norm": 0.8145351409912109, - "learning_rate": 2.6778553393732365e-05, - "loss": 0.0901, + "epoch": 1.0738155354225456, + "grad_norm": 1.4343347549438477, + "learning_rate": 8.926184464577455e-06, + "loss": 0.0987, "step": 7230 }, { - "epoch": 0.5376503787316204, - "grad_norm": 1.619238257408142, - "learning_rate": 2.6774097727610277e-05, - "loss": 0.0894, + "epoch": 1.0753007574632407, + "grad_norm": 1.44232177734375, + "learning_rate": 8.92469924253676e-06, + "loss": 0.0762, "step": 7240 }, { - "epoch": 0.5383929897519679, - "grad_norm": 2.810974597930908, - "learning_rate": 2.6769642061488192e-05, - "loss": 0.0848, + "epoch": 1.0767859795039358, + "grad_norm": 1.0850433111190796, + "learning_rate": 8.923214020496066e-06, + "loss": 0.0647, "step": 7250 }, { - "epoch": 0.5391356007723155, - "grad_norm": 1.5741685628890991, - "learning_rate": 2.676518639536611e-05, - "loss": 0.0748, + "epoch": 1.078271201544631, + "grad_norm": 1.018131136894226, + "learning_rate": 8.92172879845537e-06, + "loss": 0.0659, "step": 7260 }, { - "epoch": 0.539878211792663, - "grad_norm": 0.8893304467201233, - "learning_rate": 2.6760730729244022e-05, - "loss": 0.0754, + "epoch": 1.079756423585326, + "grad_norm": 0.7677077054977417, + "learning_rate": 8.920243576414675e-06, + "loss": 0.055, "step": 7270 }, { - "epoch": 0.5406208228130105, - "grad_norm": 1.7500449419021606, - "learning_rate": 2.6756275063121937e-05, - "loss": 0.0704, + "epoch": 1.081241645626021, + "grad_norm": 1.0646945238113403, + "learning_rate": 8.918758354373979e-06, + "loss": 0.0818, "step": 7280 }, { - "epoch": 0.541363433833358, - "grad_norm": 1.3175913095474243, - "learning_rate": 2.6751819396999855e-05, - "loss": 0.103, + "epoch": 1.082726867666716, + "grad_norm": 1.020941138267517, + "learning_rate": 8.917273132333285e-06, + "loss": 0.0949, "step": 7290 }, { - "epoch": 0.5421060448537056, - "grad_norm": 4.421326160430908, - "learning_rate": 2.6747363730877767e-05, - "loss": 0.1161, + "epoch": 1.0842120897074112, + "grad_norm": 0.8514700531959534, + "learning_rate": 8.91578791029259e-06, + "loss": 0.0887, "step": 7300 }, { - "epoch": 0.5428486558740532, - "grad_norm": 3.222348213195801, - "learning_rate": 2.6742908064755682e-05, - "loss": 0.1346, + "epoch": 1.0856973117481064, + "grad_norm": 1.0452910661697388, + "learning_rate": 8.914302688251894e-06, + "loss": 0.0897, "step": 7310 }, { - "epoch": 0.5435912668944007, - "grad_norm": 2.507253885269165, - "learning_rate": 2.6738452398633594e-05, - "loss": 0.0985, + "epoch": 1.0871825337888015, + "grad_norm": 0.6905787587165833, + "learning_rate": 8.912817466211198e-06, + "loss": 0.0632, "step": 7320 }, { - "epoch": 0.5443338779147483, - "grad_norm": 1.2689093351364136, - "learning_rate": 2.6733996732511512e-05, - "loss": 0.087, + "epoch": 1.0886677558294966, + "grad_norm": 1.0077743530273438, + "learning_rate": 8.911332244170505e-06, + "loss": 0.0552, "step": 7330 }, { - "epoch": 0.5450764889350957, - "grad_norm": 2.3243536949157715, - "learning_rate": 2.6729541066389427e-05, - "loss": 0.0801, + "epoch": 1.0901529778701915, + "grad_norm": 0.8955633044242859, + "learning_rate": 8.909847022129809e-06, + "loss": 0.0685, "step": 7340 }, { - "epoch": 0.5458190999554433, - "grad_norm": 1.2682521343231201, - "learning_rate": 2.672508540026734e-05, - "loss": 0.0924, + "epoch": 1.0916381999108866, + "grad_norm": 0.7802417874336243, + "learning_rate": 8.908361800089113e-06, + "loss": 0.0632, "step": 7350 }, { - "epoch": 0.5465617109757909, - "grad_norm": 3.67933988571167, - "learning_rate": 2.6720629734145257e-05, - "loss": 0.0958, + "epoch": 1.0931234219515817, + "grad_norm": 0.8733623027801514, + "learning_rate": 8.90687657804842e-06, + "loss": 0.074, "step": 7360 }, { - "epoch": 0.5473043219961384, - "grad_norm": 1.3130589723587036, - "learning_rate": 2.6716174068023172e-05, - "loss": 0.0962, + "epoch": 1.0946086439922769, + "grad_norm": 0.8323423266410828, + "learning_rate": 8.905391356007724e-06, + "loss": 0.0713, "step": 7370 }, { - "epoch": 0.548046933016486, - "grad_norm": 2.8078672885894775, - "learning_rate": 2.6711718401901084e-05, - "loss": 0.0919, + "epoch": 1.096093866032972, + "grad_norm": 0.5534403324127197, + "learning_rate": 8.903906133967028e-06, + "loss": 0.0965, "step": 7380 }, { - "epoch": 0.5487895440368336, - "grad_norm": 0.7422177195549011, - "learning_rate": 2.6707262735779e-05, - "loss": 0.0666, + "epoch": 1.097579088073667, + "grad_norm": 1.587559700012207, + "learning_rate": 8.902420911926335e-06, + "loss": 0.0879, "step": 7390 }, { - "epoch": 0.549532155057181, - "grad_norm": 1.9167085886001587, - "learning_rate": 2.6702807069656917e-05, - "loss": 0.1218, + "epoch": 1.099064310114362, + "grad_norm": 0.9629437327384949, + "learning_rate": 8.900935689885639e-06, + "loss": 0.0612, "step": 7400 }, { - "epoch": 0.5502747660775286, - "grad_norm": 2.299405336380005, - "learning_rate": 2.669835140353483e-05, - "loss": 0.0889, + "epoch": 1.1005495321550571, + "grad_norm": 0.6589235067367554, + "learning_rate": 8.899450467844943e-06, + "loss": 0.0443, "step": 7410 }, { - "epoch": 0.5510173770978761, - "grad_norm": 1.3493014574050903, - "learning_rate": 2.6693895737412744e-05, - "loss": 0.0697, + "epoch": 1.1020347541957523, + "grad_norm": 1.0450845956802368, + "learning_rate": 8.89796524580425e-06, + "loss": 0.0619, "step": 7420 }, { - "epoch": 0.5517599881182237, - "grad_norm": 1.426650881767273, - "learning_rate": 2.668944007129066e-05, - "loss": 0.0771, + "epoch": 1.1035199762364474, + "grad_norm": 0.8385497331619263, + "learning_rate": 8.896480023763552e-06, + "loss": 0.0836, "step": 7430 }, { - "epoch": 0.5525025991385712, - "grad_norm": 1.6747151613235474, - "learning_rate": 2.6684984405168573e-05, - "loss": 0.0956, + "epoch": 1.1050051982771425, + "grad_norm": 1.0231484174728394, + "learning_rate": 8.894994801722858e-06, + "loss": 0.0764, "step": 7440 }, { - "epoch": 0.5532452101589188, - "grad_norm": 2.746018171310425, - "learning_rate": 2.668052873904649e-05, - "loss": 0.1107, + "epoch": 1.1064904203178376, + "grad_norm": 0.6052321195602417, + "learning_rate": 8.893509579682163e-06, + "loss": 0.0749, "step": 7450 }, { - "epoch": 0.5539878211792663, - "grad_norm": 2.0050714015960693, - "learning_rate": 2.66760730729244e-05, - "loss": 0.0637, + "epoch": 1.1079756423585325, + "grad_norm": 0.5850058197975159, + "learning_rate": 8.892024357641467e-06, + "loss": 0.0707, "step": 7460 }, { - "epoch": 0.5547304321996138, - "grad_norm": 1.4880417585372925, - "learning_rate": 2.667161740680232e-05, - "loss": 0.13, + "epoch": 1.1094608643992276, + "grad_norm": 0.8802666664123535, + "learning_rate": 8.890539135600773e-06, + "loss": 0.0778, "step": 7470 }, { - "epoch": 0.5554730432199614, - "grad_norm": 0.5780320167541504, - "learning_rate": 2.6667161740680233e-05, - "loss": 0.0815, + "epoch": 1.1109460864399228, + "grad_norm": 1.2311819791793823, + "learning_rate": 8.889053913560078e-06, + "loss": 0.0889, "step": 7480 }, { - "epoch": 0.5562156542403089, - "grad_norm": 4.6798882484436035, - "learning_rate": 2.6662706074558145e-05, - "loss": 0.1212, + "epoch": 1.1124313084806179, + "grad_norm": 0.708466649055481, + "learning_rate": 8.887568691519382e-06, + "loss": 0.0906, "step": 7490 }, { - "epoch": 0.5569582652606565, - "grad_norm": 1.5042418241500854, - "learning_rate": 2.6658250408436063e-05, - "loss": 0.0902, + "epoch": 1.113916530521313, + "grad_norm": 1.0136058330535889, + "learning_rate": 8.886083469478688e-06, + "loss": 0.0673, "step": 7500 }, { - "epoch": 0.5577008762810041, - "grad_norm": 1.637436032295227, - "learning_rate": 2.6653794742313975e-05, - "loss": 0.0608, + "epoch": 1.1154017525620081, + "grad_norm": 1.758001446723938, + "learning_rate": 8.884598247437993e-06, + "loss": 0.0994, "step": 7510 }, { - "epoch": 0.5584434873013515, - "grad_norm": 0.7876498699188232, - "learning_rate": 2.664933907619189e-05, - "loss": 0.0649, + "epoch": 1.116886974602703, + "grad_norm": 1.1525338888168335, + "learning_rate": 8.883113025397297e-06, + "loss": 0.0551, "step": 7520 }, { - "epoch": 0.5591860983216991, - "grad_norm": 1.2821192741394043, - "learning_rate": 2.664488341006981e-05, - "loss": 0.1155, + "epoch": 1.1183721966433982, + "grad_norm": 0.47890013456344604, + "learning_rate": 8.881627803356603e-06, + "loss": 0.0703, "step": 7530 }, { - "epoch": 0.5599287093420466, - "grad_norm": 1.3642898797988892, - "learning_rate": 2.664042774394772e-05, - "loss": 0.0802, + "epoch": 1.1198574186840933, + "grad_norm": 1.017448902130127, + "learning_rate": 8.880142581315908e-06, + "loss": 0.0866, "step": 7540 }, { - "epoch": 0.5606713203623942, - "grad_norm": 1.7505029439926147, - "learning_rate": 2.6635972077825635e-05, - "loss": 0.1007, + "epoch": 1.1213426407247884, + "grad_norm": 0.5454069375991821, + "learning_rate": 8.878657359275212e-06, + "loss": 0.0641, "step": 7550 }, { - "epoch": 0.5614139313827418, - "grad_norm": 0.7114013433456421, - "learning_rate": 2.663151641170355e-05, - "loss": 0.0941, + "epoch": 1.1228278627654835, + "grad_norm": 1.2257490158081055, + "learning_rate": 8.877172137234518e-06, + "loss": 0.0748, "step": 7560 }, { - "epoch": 0.5621565424030892, - "grad_norm": 1.6477638483047485, - "learning_rate": 2.6627060745581465e-05, - "loss": 0.1173, + "epoch": 1.1243130848061784, + "grad_norm": 1.6047561168670654, + "learning_rate": 8.875686915193821e-06, + "loss": 0.0897, "step": 7570 }, { - "epoch": 0.5628991534234368, - "grad_norm": 2.2498269081115723, - "learning_rate": 2.662260507945938e-05, - "loss": 0.1056, + "epoch": 1.1257983068468735, + "grad_norm": 0.7648993730545044, + "learning_rate": 8.874201693153127e-06, + "loss": 0.0707, "step": 7580 }, { - "epoch": 0.5636417644437843, - "grad_norm": 1.7520428895950317, - "learning_rate": 2.6618149413337295e-05, - "loss": 0.0584, + "epoch": 1.1272835288875687, + "grad_norm": 0.9259288907051086, + "learning_rate": 8.872716471112432e-06, + "loss": 0.0869, "step": 7590 }, { - "epoch": 0.5643843754641319, - "grad_norm": 1.958605170249939, - "learning_rate": 2.661369374721521e-05, - "loss": 0.0787, + "epoch": 1.1287687509282638, + "grad_norm": 0.8534162640571594, + "learning_rate": 8.871231249071736e-06, + "loss": 0.0969, "step": 7600 }, { - "epoch": 0.5651269864844795, - "grad_norm": 4.134826183319092, - "learning_rate": 2.6609238081093125e-05, - "loss": 0.1431, + "epoch": 1.130253972968959, + "grad_norm": 0.9752341508865356, + "learning_rate": 8.869746027031042e-06, + "loss": 0.0661, "step": 7610 }, { - "epoch": 0.565869597504827, - "grad_norm": 1.3306151628494263, - "learning_rate": 2.6604782414971037e-05, - "loss": 0.083, + "epoch": 1.131739195009654, + "grad_norm": 0.9760097861289978, + "learning_rate": 8.868260804990347e-06, + "loss": 0.086, "step": 7620 }, { - "epoch": 0.5666122085251745, - "grad_norm": 2.795405626296997, - "learning_rate": 2.660032674884895e-05, - "loss": 0.1119, + "epoch": 1.133224417050349, + "grad_norm": 1.0568870306015015, + "learning_rate": 8.866775582949651e-06, + "loss": 0.0717, "step": 7630 }, { - "epoch": 0.567354819545522, - "grad_norm": 1.0165034532546997, - "learning_rate": 2.659587108272687e-05, - "loss": 0.0823, + "epoch": 1.134709639091044, + "grad_norm": 0.6620248556137085, + "learning_rate": 8.865290360908957e-06, + "loss": 0.0764, "step": 7640 }, { - "epoch": 0.5680974305658696, - "grad_norm": 4.020211219787598, - "learning_rate": 2.659141541660478e-05, - "loss": 0.0867, + "epoch": 1.1361948611317392, + "grad_norm": 0.6814618706703186, + "learning_rate": 8.863805138868262e-06, + "loss": 0.0878, "step": 7650 }, { - "epoch": 0.5688400415862171, - "grad_norm": 1.6462618112564087, - "learning_rate": 2.6586959750482697e-05, - "loss": 0.0917, + "epoch": 1.1376800831724343, + "grad_norm": 0.7755681276321411, + "learning_rate": 8.862319916827566e-06, + "loss": 0.0881, "step": 7660 }, { - "epoch": 0.5695826526065647, - "grad_norm": 1.3625034093856812, - "learning_rate": 2.6582504084360615e-05, - "loss": 0.0833, + "epoch": 1.1391653052131294, + "grad_norm": 1.2066744565963745, + "learning_rate": 8.860834694786872e-06, + "loss": 0.0802, "step": 7670 }, { - "epoch": 0.5703252636269123, - "grad_norm": 4.22099494934082, - "learning_rate": 2.6578048418238527e-05, - "loss": 0.065, + "epoch": 1.1406505272538245, + "grad_norm": 0.7700151801109314, + "learning_rate": 8.859349472746177e-06, + "loss": 0.0635, "step": 7680 }, { - "epoch": 0.5710678746472597, - "grad_norm": 2.1432032585144043, - "learning_rate": 2.657359275211644e-05, - "loss": 0.1104, + "epoch": 1.1421357492945194, + "grad_norm": 0.769986629486084, + "learning_rate": 8.857864250705481e-06, + "loss": 0.0758, "step": 7690 }, { - "epoch": 0.5718104856676073, - "grad_norm": 1.7687879800796509, - "learning_rate": 2.656913708599436e-05, - "loss": 0.0829, + "epoch": 1.1436209713352146, + "grad_norm": 1.4506511688232422, + "learning_rate": 8.856379028664787e-06, + "loss": 0.0762, "step": 7700 }, { - "epoch": 0.5725530966879548, - "grad_norm": 2.1364059448242188, - "learning_rate": 2.656468141987227e-05, - "loss": 0.0949, + "epoch": 1.1451061933759097, + "grad_norm": 0.9908505082130432, + "learning_rate": 8.854893806624092e-06, + "loss": 0.0516, "step": 7710 }, { - "epoch": 0.5732957077083024, - "grad_norm": 1.6391818523406982, - "learning_rate": 2.6560225753750187e-05, - "loss": 0.0945, + "epoch": 1.1465914154166048, + "grad_norm": 0.6923515200614929, + "learning_rate": 8.853408584583396e-06, + "loss": 0.0843, "step": 7720 }, { - "epoch": 0.57403831872865, - "grad_norm": 1.3026098012924194, - "learning_rate": 2.6555770087628098e-05, - "loss": 0.0608, + "epoch": 1.1480766374573, + "grad_norm": 0.9555417895317078, + "learning_rate": 8.8519233625427e-06, + "loss": 0.0599, "step": 7730 }, { - "epoch": 0.5747809297489975, - "grad_norm": 3.263688325881958, - "learning_rate": 2.6551314421506017e-05, - "loss": 0.0749, + "epoch": 1.1495618594979948, + "grad_norm": 1.2183533906936646, + "learning_rate": 8.850438140502005e-06, + "loss": 0.107, "step": 7740 }, { - "epoch": 0.575523540769345, - "grad_norm": 1.6053320169448853, - "learning_rate": 2.654685875538393e-05, - "loss": 0.0739, + "epoch": 1.15104708153869, + "grad_norm": 1.6629198789596558, + "learning_rate": 8.848952918461311e-06, + "loss": 0.0905, "step": 7750 }, { - "epoch": 0.5762661517896925, - "grad_norm": 2.198606491088867, - "learning_rate": 2.6542403089261843e-05, - "loss": 0.1319, + "epoch": 1.152532303579385, + "grad_norm": 0.6286592483520508, + "learning_rate": 8.847467696420616e-06, + "loss": 0.0552, "step": 7760 }, { - "epoch": 0.5770087628100401, - "grad_norm": 2.7301955223083496, - "learning_rate": 2.653794742313976e-05, - "loss": 0.1023, + "epoch": 1.1540175256200802, + "grad_norm": 0.8661282062530518, + "learning_rate": 8.84598247437992e-06, + "loss": 0.0988, "step": 7770 }, { - "epoch": 0.5777513738303877, - "grad_norm": 1.7216728925704956, - "learning_rate": 2.6533491757017677e-05, - "loss": 0.0893, + "epoch": 1.1555027476607753, + "grad_norm": 0.8588027358055115, + "learning_rate": 8.844497252339226e-06, + "loss": 0.063, "step": 7780 }, { - "epoch": 0.5784939848507352, - "grad_norm": 2.964611053466797, - "learning_rate": 2.6529036090895588e-05, - "loss": 0.0759, + "epoch": 1.1569879697014704, + "grad_norm": 0.9548928141593933, + "learning_rate": 8.84301203029853e-06, + "loss": 0.0968, "step": 7790 }, { - "epoch": 0.5792365958710828, - "grad_norm": 1.5537538528442383, - "learning_rate": 2.6524580424773503e-05, - "loss": 0.0987, + "epoch": 1.1584731917421656, + "grad_norm": 0.8153151273727417, + "learning_rate": 8.841526808257835e-06, + "loss": 0.0866, "step": 7800 }, { - "epoch": 0.5799792068914302, - "grad_norm": 5.19601583480835, - "learning_rate": 2.652012475865142e-05, - "loss": 0.0846, + "epoch": 1.1599584137828605, + "grad_norm": 1.0286554098129272, + "learning_rate": 8.840041586217141e-06, + "loss": 0.09, "step": 7810 }, { - "epoch": 0.5807218179117778, - "grad_norm": 1.2774734497070312, - "learning_rate": 2.6515669092529333e-05, - "loss": 0.0721, + "epoch": 1.1614436358235556, + "grad_norm": 0.9278287291526794, + "learning_rate": 8.838556364176445e-06, + "loss": 0.053, "step": 7820 }, { - "epoch": 0.5814644289321254, - "grad_norm": 1.1469454765319824, - "learning_rate": 2.6511213426407248e-05, - "loss": 0.114, + "epoch": 1.1629288578642507, + "grad_norm": 0.8616372346878052, + "learning_rate": 8.83707114213575e-06, + "loss": 0.0755, "step": 7830 }, { - "epoch": 0.5822070399524729, - "grad_norm": 2.6085078716278076, - "learning_rate": 2.6506757760285163e-05, - "loss": 0.0769, + "epoch": 1.1644140799049458, + "grad_norm": 2.0150420665740967, + "learning_rate": 8.835585920095054e-06, + "loss": 0.1022, "step": 7840 }, { - "epoch": 0.5829496509728205, - "grad_norm": 0.7333324551582336, - "learning_rate": 2.6502302094163078e-05, - "loss": 0.0583, + "epoch": 1.165899301945641, + "grad_norm": 0.729625940322876, + "learning_rate": 8.83410069805436e-06, + "loss": 0.0548, "step": 7850 }, { - "epoch": 0.5836922619931679, - "grad_norm": 3.896169900894165, - "learning_rate": 2.6497846428040993e-05, - "loss": 0.0971, + "epoch": 1.1673845239863359, + "grad_norm": 0.643584668636322, + "learning_rate": 8.832615476013665e-06, + "loss": 0.0702, "step": 7860 }, { - "epoch": 0.5844348730135155, - "grad_norm": 0.8232213854789734, - "learning_rate": 2.6493390761918908e-05, - "loss": 0.0795, + "epoch": 1.168869746027031, + "grad_norm": 0.6835783123970032, + "learning_rate": 8.83113025397297e-06, + "loss": 0.057, "step": 7870 }, { - "epoch": 0.585177484033863, - "grad_norm": 2.7149336338043213, - "learning_rate": 2.6488935095796823e-05, - "loss": 0.0847, + "epoch": 1.170354968067726, + "grad_norm": 1.8533565998077393, + "learning_rate": 8.829645031932274e-06, + "loss": 0.0726, "step": 7880 }, { - "epoch": 0.5859200950542106, - "grad_norm": 2.990295886993408, - "learning_rate": 2.6484479429674738e-05, - "loss": 0.0692, + "epoch": 1.1718401901084212, + "grad_norm": 0.8263382911682129, + "learning_rate": 8.82815980989158e-06, + "loss": 0.0695, "step": 7890 }, { - "epoch": 0.5866627060745582, - "grad_norm": 3.7285399436950684, - "learning_rate": 2.648002376355265e-05, - "loss": 0.0734, + "epoch": 1.1733254121491163, + "grad_norm": 0.6495395302772522, + "learning_rate": 8.826674587850884e-06, + "loss": 0.0593, "step": 7900 }, { - "epoch": 0.5874053170949057, - "grad_norm": 1.7510877847671509, - "learning_rate": 2.6475568097430568e-05, - "loss": 0.0944, + "epoch": 1.1748106341898115, + "grad_norm": 0.5065099000930786, + "learning_rate": 8.825189365810189e-06, + "loss": 0.0544, "step": 7910 }, { - "epoch": 0.5881479281152532, - "grad_norm": 2.186464786529541, - "learning_rate": 2.6471112431308483e-05, - "loss": 0.1009, + "epoch": 1.1762958562305064, + "grad_norm": 0.8669567108154297, + "learning_rate": 8.823704143769495e-06, + "loss": 0.0947, "step": 7920 }, { - "epoch": 0.5888905391356007, - "grad_norm": 3.2270450592041016, - "learning_rate": 2.6466656765186395e-05, - "loss": 0.0815, + "epoch": 1.1777810782712015, + "grad_norm": 0.9733984470367432, + "learning_rate": 8.8222189217288e-06, + "loss": 0.1057, "step": 7930 }, { - "epoch": 0.5896331501559483, - "grad_norm": 2.993773937225342, - "learning_rate": 2.6462201099064313e-05, - "loss": 0.0764, + "epoch": 1.1792663003118966, + "grad_norm": 1.1606910228729248, + "learning_rate": 8.820733699688104e-06, + "loss": 0.0854, "step": 7940 }, { - "epoch": 0.5903757611762959, - "grad_norm": 0.9298529624938965, - "learning_rate": 2.6457745432942225e-05, - "loss": 0.0933, + "epoch": 1.1807515223525917, + "grad_norm": 1.0298819541931152, + "learning_rate": 8.819248477647408e-06, + "loss": 0.068, "step": 7950 }, { - "epoch": 0.5911183721966434, - "grad_norm": 3.5018069744110107, - "learning_rate": 2.645328976682014e-05, - "loss": 0.1001, + "epoch": 1.1822367443932869, + "grad_norm": 0.5206531882286072, + "learning_rate": 8.817763255606714e-06, + "loss": 0.0684, "step": 7960 }, { - "epoch": 0.591860983216991, - "grad_norm": 1.2015966176986694, - "learning_rate": 2.6448834100698055e-05, - "loss": 0.0839, + "epoch": 1.183721966433982, + "grad_norm": 0.8717692494392395, + "learning_rate": 8.816278033566019e-06, + "loss": 0.0844, "step": 7970 }, { - "epoch": 0.5926035942373384, - "grad_norm": 1.9032807350158691, - "learning_rate": 2.644437843457597e-05, - "loss": 0.1019, + "epoch": 1.1852071884746769, + "grad_norm": 1.166616439819336, + "learning_rate": 8.814792811525323e-06, + "loss": 0.0636, "step": 7980 }, { - "epoch": 0.593346205257686, - "grad_norm": 2.2178187370300293, - "learning_rate": 2.6439922768453885e-05, - "loss": 0.0525, + "epoch": 1.186692410515372, + "grad_norm": 1.462874412536621, + "learning_rate": 8.81330758948463e-06, + "loss": 0.0764, "step": 7990 }, { - "epoch": 0.5940888162780336, - "grad_norm": 2.323493719100952, - "learning_rate": 2.64354671023318e-05, - "loss": 0.1142, + "epoch": 1.1881776325560671, + "grad_norm": 0.7792450189590454, + "learning_rate": 8.811822367443934e-06, + "loss": 0.0603, "step": 8000 }, { - "epoch": 0.5948314272983811, - "grad_norm": 5.971455097198486, - "learning_rate": 2.6431011436209715e-05, - "loss": 0.1032, + "epoch": 1.1896628545967622, + "grad_norm": 0.9004107117652893, + "learning_rate": 8.810337145403238e-06, + "loss": 0.0753, "step": 8010 }, { - "epoch": 0.5955740383187287, - "grad_norm": 1.563317060470581, - "learning_rate": 2.642655577008763e-05, - "loss": 0.0701, + "epoch": 1.1911480766374574, + "grad_norm": 0.97831130027771, + "learning_rate": 8.808851923362544e-06, + "loss": 0.066, "step": 8020 }, { - "epoch": 0.5963166493390762, - "grad_norm": 0.8707819581031799, - "learning_rate": 2.642210010396554e-05, - "loss": 0.1017, + "epoch": 1.1926332986781523, + "grad_norm": 0.7797183990478516, + "learning_rate": 8.807366701321847e-06, + "loss": 0.083, "step": 8030 }, { - "epoch": 0.5970592603594237, - "grad_norm": 1.7506704330444336, - "learning_rate": 2.6417644437843456e-05, - "loss": 0.0926, + "epoch": 1.1941185207188474, + "grad_norm": 0.5634157657623291, + "learning_rate": 8.805881479281153e-06, + "loss": 0.0778, "step": 8040 }, { - "epoch": 0.5978018713797713, - "grad_norm": 1.1731964349746704, - "learning_rate": 2.6413188771721375e-05, - "loss": 0.0853, + "epoch": 1.1956037427595425, + "grad_norm": 0.5460097193717957, + "learning_rate": 8.804396257240458e-06, + "loss": 0.0744, "step": 8050 }, { - "epoch": 0.5985444824001188, - "grad_norm": 2.8358826637268066, - "learning_rate": 2.6408733105599286e-05, - "loss": 0.0797, + "epoch": 1.1970889648002376, + "grad_norm": 0.8219186067581177, + "learning_rate": 8.802911035199762e-06, + "loss": 0.0696, "step": 8060 }, { - "epoch": 0.5992870934204664, - "grad_norm": 2.624128818511963, - "learning_rate": 2.64042774394772e-05, - "loss": 0.1109, + "epoch": 1.1985741868409328, + "grad_norm": 1.00192391872406, + "learning_rate": 8.801425813159068e-06, + "loss": 0.0998, "step": 8070 }, { - "epoch": 0.6000297044408139, - "grad_norm": 1.6009690761566162, - "learning_rate": 2.639982177335512e-05, - "loss": 0.0783, + "epoch": 1.2000594088816279, + "grad_norm": 0.5614769458770752, + "learning_rate": 8.799940591118373e-06, + "loss": 0.0612, "step": 8080 }, { - "epoch": 0.6007723154611615, - "grad_norm": 1.9895691871643066, - "learning_rate": 2.639536610723303e-05, - "loss": 0.1047, + "epoch": 1.201544630922323, + "grad_norm": 0.6137503981590271, + "learning_rate": 8.798455369077677e-06, + "loss": 0.0624, "step": 8090 }, { - "epoch": 0.601514926481509, - "grad_norm": 0.5253069996833801, - "learning_rate": 2.6390910441110946e-05, - "loss": 0.0767, + "epoch": 1.203029852963018, + "grad_norm": 0.8568705916404724, + "learning_rate": 8.796970147036983e-06, + "loss": 0.0826, "step": 8100 }, { - "epoch": 0.6022575375018565, - "grad_norm": 2.7466979026794434, - "learning_rate": 2.6386454774988865e-05, - "loss": 0.1141, + "epoch": 1.204515075003713, + "grad_norm": 1.0990469455718994, + "learning_rate": 8.795484924996288e-06, + "loss": 0.0755, "step": 8110 }, { - "epoch": 0.6030001485222041, - "grad_norm": 0.8582619428634644, - "learning_rate": 2.6381999108866776e-05, - "loss": 0.0747, + "epoch": 1.2060002970444081, + "grad_norm": 0.5455279350280762, + "learning_rate": 8.793999702955592e-06, + "loss": 0.0614, "step": 8120 }, { - "epoch": 0.6037427595425516, - "grad_norm": 1.5232957601547241, - "learning_rate": 2.637754344274469e-05, - "loss": 0.0756, + "epoch": 1.2074855190851033, + "grad_norm": 1.0191229581832886, + "learning_rate": 8.792514480914898e-06, + "loss": 0.0721, "step": 8130 }, { - "epoch": 0.6044853705628992, - "grad_norm": 1.6779173612594604, - "learning_rate": 2.6373087776622603e-05, - "loss": 0.06, + "epoch": 1.2089707411257984, + "grad_norm": 1.4468733072280884, + "learning_rate": 8.791029258874203e-06, + "loss": 0.0728, "step": 8140 }, { - "epoch": 0.6052279815832466, - "grad_norm": 1.4858782291412354, - "learning_rate": 2.636863211050052e-05, - "loss": 0.0934, + "epoch": 1.2104559631664933, + "grad_norm": 0.4742625653743744, + "learning_rate": 8.789544036833507e-06, + "loss": 0.0678, "step": 8150 }, { - "epoch": 0.6059705926035942, - "grad_norm": 1.1481568813323975, - "learning_rate": 2.6364176444378436e-05, - "loss": 0.1104, + "epoch": 1.2119411852071884, + "grad_norm": 0.7147884368896484, + "learning_rate": 8.788058814792813e-06, + "loss": 0.0823, "step": 8160 }, { - "epoch": 0.6067132036239418, - "grad_norm": 2.9699254035949707, - "learning_rate": 2.6359720778256348e-05, - "loss": 0.0974, + "epoch": 1.2134264072478835, + "grad_norm": 0.9481222629547119, + "learning_rate": 8.786573592752116e-06, + "loss": 0.0847, "step": 8170 }, { - "epoch": 0.6074558146442893, - "grad_norm": 2.6445741653442383, - "learning_rate": 2.6355265112134266e-05, - "loss": 0.0873, + "epoch": 1.2149116292885787, + "grad_norm": 0.30851230025291443, + "learning_rate": 8.785088370711422e-06, + "loss": 0.0688, "step": 8180 }, { - "epoch": 0.6081984256646369, - "grad_norm": 0.4445909559726715, - "learning_rate": 2.635080944601218e-05, - "loss": 0.0713, + "epoch": 1.2163968513292738, + "grad_norm": 1.409021258354187, + "learning_rate": 8.783603148670728e-06, + "loss": 0.0663, "step": 8190 }, { - "epoch": 0.6089410366849844, - "grad_norm": 2.068956136703491, - "learning_rate": 2.6346353779890093e-05, - "loss": 0.0632, + "epoch": 1.217882073369969, + "grad_norm": 0.8977643847465515, + "learning_rate": 8.782117926630031e-06, + "loss": 0.0757, "step": 8200 }, { - "epoch": 0.6096836477053319, - "grad_norm": 2.9205379486083984, - "learning_rate": 2.6341898113768008e-05, - "loss": 0.061, + "epoch": 1.2193672954106638, + "grad_norm": 0.9754537343978882, + "learning_rate": 8.780632704589337e-06, + "loss": 0.0637, "step": 8210 }, { - "epoch": 0.6104262587256795, - "grad_norm": 0.8605203032493591, - "learning_rate": 2.6337442447645926e-05, - "loss": 0.1332, + "epoch": 1.220852517451359, + "grad_norm": 0.9389632344245911, + "learning_rate": 8.779147482548641e-06, + "loss": 0.0804, "step": 8220 }, { - "epoch": 0.611168869746027, - "grad_norm": 1.654402256011963, - "learning_rate": 2.6332986781523838e-05, - "loss": 0.0808, + "epoch": 1.222337739492054, + "grad_norm": 0.9132267236709595, + "learning_rate": 8.777662260507946e-06, + "loss": 0.0666, "step": 8230 }, { - "epoch": 0.6119114807663746, - "grad_norm": 3.046501636505127, - "learning_rate": 2.6328531115401753e-05, - "loss": 0.1068, + "epoch": 1.2238229615327492, + "grad_norm": 1.6683911085128784, + "learning_rate": 8.776177038467252e-06, + "loss": 0.0733, "step": 8240 }, { - "epoch": 0.6126540917867221, - "grad_norm": 2.973254680633545, - "learning_rate": 2.6324075449279668e-05, - "loss": 0.0951, + "epoch": 1.2253081835734443, + "grad_norm": 0.4304860532283783, + "learning_rate": 8.774691816426556e-06, + "loss": 0.0705, "step": 8250 }, { - "epoch": 0.6133967028070697, - "grad_norm": 0.9132028222084045, - "learning_rate": 2.6319619783157583e-05, - "loss": 0.0798, + "epoch": 1.2267934056141394, + "grad_norm": 0.8889797329902649, + "learning_rate": 8.773206594385861e-06, + "loss": 0.1116, "step": 8260 }, { - "epoch": 0.6141393138274172, - "grad_norm": 1.8893526792526245, - "learning_rate": 2.6315164117035498e-05, - "loss": 0.0725, + "epoch": 1.2282786276548343, + "grad_norm": 0.8538875579833984, + "learning_rate": 8.771721372345167e-06, + "loss": 0.1, "step": 8270 }, { - "epoch": 0.6148819248477647, - "grad_norm": 2.337425708770752, - "learning_rate": 2.6310708450913413e-05, - "loss": 0.0709, + "epoch": 1.2297638496955294, + "grad_norm": 1.2892072200775146, + "learning_rate": 8.770236150304471e-06, + "loss": 0.0844, "step": 8280 }, { - "epoch": 0.6156245358681123, - "grad_norm": 1.1997939348220825, - "learning_rate": 2.6306252784791328e-05, - "loss": 0.0892, + "epoch": 1.2312490717362246, + "grad_norm": 1.4475103616714478, + "learning_rate": 8.768750928263776e-06, + "loss": 0.0649, "step": 8290 }, { - "epoch": 0.6163671468884598, - "grad_norm": 2.1006369590759277, - "learning_rate": 2.6301797118669243e-05, - "loss": 0.0877, + "epoch": 1.2327342937769197, + "grad_norm": 0.9254520535469055, + "learning_rate": 8.767265706223082e-06, + "loss": 0.0577, "step": 8300 }, { - "epoch": 0.6171097579088074, - "grad_norm": 1.0404902696609497, - "learning_rate": 2.6297341452547154e-05, - "loss": 0.0733, + "epoch": 1.2342195158176148, + "grad_norm": 0.4279841482639313, + "learning_rate": 8.765780484182386e-06, + "loss": 0.0777, "step": 8310 }, { - "epoch": 0.617852368929155, - "grad_norm": 1.4689126014709473, - "learning_rate": 2.6292885786425073e-05, - "loss": 0.0738, + "epoch": 1.2357047378583097, + "grad_norm": 0.579756498336792, + "learning_rate": 8.764295262141691e-06, + "loss": 0.0858, "step": 8320 }, { - "epoch": 0.6185949799495024, - "grad_norm": 1.669219970703125, - "learning_rate": 2.6288430120302988e-05, - "loss": 0.0795, + "epoch": 1.2371899598990048, + "grad_norm": 0.5551837086677551, + "learning_rate": 8.762810040100997e-06, + "loss": 0.0794, "step": 8330 }, { - "epoch": 0.61933759096985, - "grad_norm": 1.8779352903366089, - "learning_rate": 2.62839744541809e-05, - "loss": 0.0967, + "epoch": 1.2386751819397, + "grad_norm": 1.2064443826675415, + "learning_rate": 8.7613248180603e-06, + "loss": 0.0644, "step": 8340 }, { - "epoch": 0.6200802019901975, - "grad_norm": 2.112928867340088, - "learning_rate": 2.6279518788058818e-05, - "loss": 0.0695, + "epoch": 1.240160403980395, + "grad_norm": 1.0003966093063354, + "learning_rate": 8.759839596019606e-06, + "loss": 0.0757, "step": 8350 }, { - "epoch": 0.6208228130105451, - "grad_norm": 1.240665078163147, - "learning_rate": 2.627506312193673e-05, - "loss": 0.1107, + "epoch": 1.2416456260210902, + "grad_norm": 0.8278673887252808, + "learning_rate": 8.75835437397891e-06, + "loss": 0.0797, "step": 8360 }, { - "epoch": 0.6215654240308927, - "grad_norm": 1.4730993509292603, - "learning_rate": 2.6270607455814644e-05, - "loss": 0.0955, + "epoch": 1.2431308480617853, + "grad_norm": 0.7456088066101074, + "learning_rate": 8.756869151938215e-06, + "loss": 0.0763, "step": 8370 }, { - "epoch": 0.6223080350512402, - "grad_norm": 1.134709119796753, - "learning_rate": 2.626615178969256e-05, - "loss": 0.1074, + "epoch": 1.2446160701024804, + "grad_norm": 0.812319815158844, + "learning_rate": 8.755383929897521e-06, + "loss": 0.0918, "step": 8380 }, { - "epoch": 0.6230506460715877, - "grad_norm": 2.589599132537842, - "learning_rate": 2.6261696123570474e-05, - "loss": 0.0796, + "epoch": 1.2461012921431753, + "grad_norm": 0.9069823026657104, + "learning_rate": 8.753898707856825e-06, + "loss": 0.0558, "step": 8390 }, { - "epoch": 0.6237932570919352, - "grad_norm": 2.0985918045043945, - "learning_rate": 2.625724045744839e-05, - "loss": 0.0877, + "epoch": 1.2475865141838705, + "grad_norm": 1.3703268766403198, + "learning_rate": 8.75241348581613e-06, + "loss": 0.0841, "step": 8400 }, { - "epoch": 0.6245358681122828, - "grad_norm": 4.56246280670166, - "learning_rate": 2.6252784791326304e-05, - "loss": 0.0804, + "epoch": 1.2490717362245656, + "grad_norm": 0.5158610343933105, + "learning_rate": 8.750928263775436e-06, + "loss": 0.0826, "step": 8410 }, { - "epoch": 0.6252784791326303, - "grad_norm": 2.3391928672790527, - "learning_rate": 2.624832912520422e-05, - "loss": 0.0987, + "epoch": 1.2505569582652607, + "grad_norm": 0.916165292263031, + "learning_rate": 8.74944304173474e-06, + "loss": 0.0682, "step": 8420 }, { - "epoch": 0.6260210901529779, - "grad_norm": 2.231879472732544, - "learning_rate": 2.6243873459082134e-05, - "loss": 0.0951, + "epoch": 1.2520421803059558, + "grad_norm": 0.6337945461273193, + "learning_rate": 8.747957819694045e-06, + "loss": 0.0843, "step": 8430 }, { - "epoch": 0.6267637011733254, - "grad_norm": 0.44721463322639465, - "learning_rate": 2.623941779296005e-05, - "loss": 0.0713, + "epoch": 1.2535274023466507, + "grad_norm": 0.45069465041160583, + "learning_rate": 8.746472597653351e-06, + "loss": 0.0725, "step": 8440 }, { - "epoch": 0.6275063121936729, - "grad_norm": 1.6443843841552734, - "learning_rate": 2.623496212683796e-05, - "loss": 0.1187, + "epoch": 1.2550126243873458, + "grad_norm": 0.5957522988319397, + "learning_rate": 8.744987375612655e-06, + "loss": 0.0728, "step": 8450 }, { - "epoch": 0.6282489232140205, - "grad_norm": 3.586520195007324, - "learning_rate": 2.623050646071588e-05, - "loss": 0.0957, + "epoch": 1.256497846428041, + "grad_norm": 0.9234186410903931, + "learning_rate": 8.74350215357196e-06, + "loss": 0.0751, "step": 8460 }, { - "epoch": 0.628991534234368, - "grad_norm": 1.5607584714889526, - "learning_rate": 2.622605079459379e-05, - "loss": 0.0959, + "epoch": 1.257983068468736, + "grad_norm": 1.337033987045288, + "learning_rate": 8.742016931531264e-06, + "loss": 0.0743, "step": 8470 }, { - "epoch": 0.6297341452547156, - "grad_norm": 2.6211838722229004, - "learning_rate": 2.6221595128471706e-05, - "loss": 0.1139, + "epoch": 1.2594682905094312, + "grad_norm": 0.7764634490013123, + "learning_rate": 8.74053170949057e-06, + "loss": 0.0659, "step": 8480 }, { - "epoch": 0.6304767562750632, - "grad_norm": 2.3073689937591553, - "learning_rate": 2.6217139462349624e-05, - "loss": 0.0912, + "epoch": 1.260953512550126, + "grad_norm": 0.7629880905151367, + "learning_rate": 8.739046487449875e-06, + "loss": 0.0865, "step": 8490 }, { - "epoch": 0.6312193672954106, - "grad_norm": 1.4929405450820923, - "learning_rate": 2.6212683796227536e-05, - "loss": 0.0652, + "epoch": 1.2624387345908215, + "grad_norm": 0.7267442941665649, + "learning_rate": 8.73756126540918e-06, + "loss": 0.0788, "step": 8500 }, { - "epoch": 0.6319619783157582, - "grad_norm": 4.748650074005127, - "learning_rate": 2.620822813010545e-05, - "loss": 0.0861, + "epoch": 1.2639239566315164, + "grad_norm": 0.7518733143806458, + "learning_rate": 8.736076043368484e-06, + "loss": 0.075, "step": 8510 }, { - "epoch": 0.6327045893361057, - "grad_norm": 1.6058298349380493, - "learning_rate": 2.620377246398337e-05, - "loss": 0.094, + "epoch": 1.2654091786722115, + "grad_norm": 0.6665178537368774, + "learning_rate": 8.73459082132779e-06, + "loss": 0.066, "step": 8520 }, { - "epoch": 0.6334472003564533, - "grad_norm": 0.9887398481369019, - "learning_rate": 2.619931679786128e-05, - "loss": 0.0531, + "epoch": 1.2668944007129066, + "grad_norm": 1.201786756515503, + "learning_rate": 8.733105599287094e-06, + "loss": 0.0843, "step": 8530 }, { - "epoch": 0.6341898113768009, - "grad_norm": 1.5614607334136963, - "learning_rate": 2.6194861131739196e-05, - "loss": 0.0575, + "epoch": 1.2683796227536017, + "grad_norm": 0.5769052505493164, + "learning_rate": 8.731620377246399e-06, + "loss": 0.0721, "step": 8540 }, { - "epoch": 0.6349324223971484, - "grad_norm": 0.485836923122406, - "learning_rate": 2.6190405465617107e-05, - "loss": 0.0738, + "epoch": 1.2698648447942968, + "grad_norm": 0.56085205078125, + "learning_rate": 8.730135155205703e-06, + "loss": 0.0684, "step": 8550 }, { - "epoch": 0.6356750334174959, - "grad_norm": 2.3071041107177734, - "learning_rate": 2.6185949799495026e-05, - "loss": 0.0965, + "epoch": 1.2713500668349917, + "grad_norm": 0.7175249457359314, + "learning_rate": 8.728649933165009e-06, + "loss": 0.0693, "step": 8560 }, { - "epoch": 0.6364176444378434, - "grad_norm": 1.3034030199050903, - "learning_rate": 2.618149413337294e-05, - "loss": 0.0766, + "epoch": 1.2728352888756869, + "grad_norm": 0.6406940221786499, + "learning_rate": 8.727164711124314e-06, + "loss": 0.0802, "step": 8570 }, { - "epoch": 0.637160255458191, - "grad_norm": 2.9087538719177246, - "learning_rate": 2.6177038467250852e-05, - "loss": 0.0808, + "epoch": 1.274320510916382, + "grad_norm": 0.6077917218208313, + "learning_rate": 8.725679489083618e-06, + "loss": 0.0711, "step": 8580 }, { - "epoch": 0.6379028664785386, - "grad_norm": 1.7364327907562256, - "learning_rate": 2.617258280112877e-05, - "loss": 0.0727, + "epoch": 1.275805732957077, + "grad_norm": 0.6041744947433472, + "learning_rate": 8.724194267042924e-06, + "loss": 0.0625, "step": 8590 }, { - "epoch": 0.6386454774988861, - "grad_norm": 1.3691768646240234, - "learning_rate": 2.6168127135006686e-05, - "loss": 0.1037, + "epoch": 1.2772909549977722, + "grad_norm": 0.5732704401016235, + "learning_rate": 8.722709045002229e-06, + "loss": 0.0795, "step": 8600 }, { - "epoch": 0.6393880885192337, - "grad_norm": 3.924298048019409, - "learning_rate": 2.6163671468884597e-05, - "loss": 0.1055, + "epoch": 1.2787761770384671, + "grad_norm": 0.7540349364280701, + "learning_rate": 8.721223822961533e-06, + "loss": 0.0585, "step": 8610 }, { - "epoch": 0.6401306995395811, - "grad_norm": 4.720126152038574, - "learning_rate": 2.6159215802762512e-05, - "loss": 0.0634, + "epoch": 1.2802613990791623, + "grad_norm": 0.8742958903312683, + "learning_rate": 8.719738600920839e-06, + "loss": 0.0833, "step": 8620 }, { - "epoch": 0.6408733105599287, - "grad_norm": 0.719524621963501, - "learning_rate": 2.615476013664043e-05, - "loss": 0.105, + "epoch": 1.2817466211198574, + "grad_norm": 1.4646620750427246, + "learning_rate": 8.718253378880144e-06, + "loss": 0.11, "step": 8630 }, { - "epoch": 0.6416159215802762, - "grad_norm": 2.0264840126037598, - "learning_rate": 2.6150304470518342e-05, - "loss": 0.0827, + "epoch": 1.2832318431605525, + "grad_norm": 1.2368395328521729, + "learning_rate": 8.716768156839448e-06, + "loss": 0.0799, "step": 8640 }, { - "epoch": 0.6423585326006238, - "grad_norm": 2.5915403366088867, - "learning_rate": 2.6145848804396257e-05, - "loss": 0.0989, + "epoch": 1.2847170652012476, + "grad_norm": 1.3969573974609375, + "learning_rate": 8.715282934798754e-06, + "loss": 0.0703, "step": 8650 }, { - "epoch": 0.6431011436209714, - "grad_norm": 1.406114935874939, - "learning_rate": 2.6141393138274172e-05, - "loss": 0.0776, + "epoch": 1.2862022872419427, + "grad_norm": 0.9338309168815613, + "learning_rate": 8.713797712758057e-06, + "loss": 0.0826, "step": 8660 }, { - "epoch": 0.6438437546413188, - "grad_norm": 1.9596368074417114, - "learning_rate": 2.6136937472152087e-05, - "loss": 0.1162, + "epoch": 1.2876875092826379, + "grad_norm": 0.808988094329834, + "learning_rate": 8.712312490717363e-06, + "loss": 0.0809, "step": 8670 }, { - "epoch": 0.6445863656616664, - "grad_norm": 1.9101582765579224, - "learning_rate": 2.6132481806030002e-05, - "loss": 0.1058, + "epoch": 1.2891727313233328, + "grad_norm": 1.0200146436691284, + "learning_rate": 8.710827268676667e-06, + "loss": 0.0738, "step": 8680 }, { - "epoch": 0.6453289766820139, - "grad_norm": 3.665165424346924, - "learning_rate": 2.6128026139907917e-05, - "loss": 0.0834, + "epoch": 1.2906579533640279, + "grad_norm": 0.5110995173454285, + "learning_rate": 8.709342046635972e-06, + "loss": 0.0787, "step": 8690 }, { - "epoch": 0.6460715877023615, - "grad_norm": 1.8130497932434082, - "learning_rate": 2.6123570473785832e-05, - "loss": 0.0572, + "epoch": 1.292143175404723, + "grad_norm": 0.5956591963768005, + "learning_rate": 8.707856824595278e-06, + "loss": 0.0571, "step": 8700 }, { - "epoch": 0.6468141987227091, - "grad_norm": 2.0652337074279785, - "learning_rate": 2.6119114807663747e-05, - "loss": 0.1246, + "epoch": 1.2936283974454181, + "grad_norm": 0.5521881580352783, + "learning_rate": 8.706371602554582e-06, + "loss": 0.0896, "step": 8710 }, { - "epoch": 0.6475568097430566, - "grad_norm": 1.8479968309402466, - "learning_rate": 2.611465914154166e-05, - "loss": 0.0879, + "epoch": 1.2951136194861133, + "grad_norm": 0.5107810497283936, + "learning_rate": 8.704886380513887e-06, + "loss": 0.0755, "step": 8720 }, { - "epoch": 0.6482994207634041, - "grad_norm": 1.4413061141967773, - "learning_rate": 2.6110203475419577e-05, - "loss": 0.0743, + "epoch": 1.2965988415268082, + "grad_norm": 1.5277434587478638, + "learning_rate": 8.703401158473193e-06, + "loss": 0.0691, "step": 8730 }, { - "epoch": 0.6490420317837516, - "grad_norm": 2.89367413520813, - "learning_rate": 2.6105747809297492e-05, - "loss": 0.0707, + "epoch": 1.2980840635675033, + "grad_norm": 0.8021115660667419, + "learning_rate": 8.701915936432497e-06, + "loss": 0.0826, "step": 8740 }, { - "epoch": 0.6497846428040992, - "grad_norm": 3.1368815898895264, - "learning_rate": 2.6101292143175404e-05, - "loss": 0.0773, + "epoch": 1.2995692856081984, + "grad_norm": 0.7268383502960205, + "learning_rate": 8.700430714391802e-06, + "loss": 0.0752, "step": 8750 }, { - "epoch": 0.6505272538244468, - "grad_norm": 2.096843719482422, - "learning_rate": 2.6096836477053322e-05, - "loss": 0.0732, + "epoch": 1.3010545076488935, + "grad_norm": 0.7827374339103699, + "learning_rate": 8.698945492351108e-06, + "loss": 0.0718, "step": 8760 }, { - "epoch": 0.6512698648447943, - "grad_norm": 2.454930067062378, - "learning_rate": 2.6092380810931234e-05, - "loss": 0.0926, + "epoch": 1.3025397296895886, + "grad_norm": 0.793857753276825, + "learning_rate": 8.697460270310412e-06, + "loss": 0.0532, "step": 8770 }, { - "epoch": 0.6520124758651419, - "grad_norm": 2.2393689155578613, - "learning_rate": 2.608792514480915e-05, - "loss": 0.113, + "epoch": 1.3040249517302835, + "grad_norm": 0.9983842968940735, + "learning_rate": 8.695975048269717e-06, + "loss": 0.0745, "step": 8780 }, { - "epoch": 0.6527550868854893, - "grad_norm": 1.3184117078781128, - "learning_rate": 2.6083469478687064e-05, - "loss": 0.0803, + "epoch": 1.3055101737709789, + "grad_norm": 0.48715001344680786, + "learning_rate": 8.694489826229023e-06, + "loss": 0.069, "step": 8790 }, { - "epoch": 0.6534976979058369, - "grad_norm": 1.2592401504516602, - "learning_rate": 2.607901381256498e-05, - "loss": 0.06, + "epoch": 1.3069953958116738, + "grad_norm": 1.0066664218902588, + "learning_rate": 8.693004604188326e-06, + "loss": 0.1001, "step": 8800 }, { - "epoch": 0.6542403089261845, - "grad_norm": 1.8193804025650024, - "learning_rate": 2.6074558146442894e-05, - "loss": 0.0818, + "epoch": 1.308480617852369, + "grad_norm": 0.587794303894043, + "learning_rate": 8.691519382147632e-06, + "loss": 0.0761, "step": 8810 }, { - "epoch": 0.654982919946532, - "grad_norm": 0.5750879049301147, - "learning_rate": 2.607010248032081e-05, - "loss": 0.0704, + "epoch": 1.309965839893064, + "grad_norm": 0.6250380873680115, + "learning_rate": 8.690034160106938e-06, + "loss": 0.0826, "step": 8820 }, { - "epoch": 0.6557255309668796, - "grad_norm": 2.028292655944824, - "learning_rate": 2.6065646814198724e-05, - "loss": 0.0577, + "epoch": 1.3114510619337592, + "grad_norm": 1.0876086950302124, + "learning_rate": 8.68854893806624e-06, + "loss": 0.0909, "step": 8830 }, { - "epoch": 0.6564681419872271, - "grad_norm": 2.086024522781372, - "learning_rate": 2.606119114807664e-05, - "loss": 0.0675, + "epoch": 1.3129362839744543, + "grad_norm": 0.633162260055542, + "learning_rate": 8.687063716025547e-06, + "loss": 0.0915, "step": 8840 }, { - "epoch": 0.6572107530075746, - "grad_norm": 3.66861891746521, - "learning_rate": 2.6056735481954554e-05, - "loss": 0.066, + "epoch": 1.3144215060151492, + "grad_norm": 0.5343165397644043, + "learning_rate": 8.685578493984851e-06, + "loss": 0.0747, "step": 8850 }, { - "epoch": 0.6579533640279221, - "grad_norm": 1.3219988346099854, - "learning_rate": 2.6052279815832465e-05, - "loss": 0.0709, + "epoch": 1.3159067280558443, + "grad_norm": 1.269322156906128, + "learning_rate": 8.684093271944156e-06, + "loss": 0.0633, "step": 8860 }, { - "epoch": 0.6586959750482697, - "grad_norm": 1.395115852355957, - "learning_rate": 2.6047824149710384e-05, - "loss": 0.1049, + "epoch": 1.3173919500965394, + "grad_norm": 1.0117168426513672, + "learning_rate": 8.682608049903462e-06, + "loss": 0.0793, "step": 8870 }, { - "epoch": 0.6594385860686173, - "grad_norm": 2.2025349140167236, - "learning_rate": 2.6043368483588295e-05, - "loss": 0.0716, + "epoch": 1.3188771721372345, + "grad_norm": 0.692861795425415, + "learning_rate": 8.681122827862766e-06, + "loss": 0.0819, "step": 8880 }, { - "epoch": 0.6601811970889648, - "grad_norm": 0.7800239324569702, - "learning_rate": 2.603891281746621e-05, - "loss": 0.0774, + "epoch": 1.3203623941779297, + "grad_norm": 0.8399760723114014, + "learning_rate": 8.67963760582207e-06, + "loss": 0.0673, "step": 8890 }, { - "epoch": 0.6609238081093124, - "grad_norm": 1.6750237941741943, - "learning_rate": 2.603445715134413e-05, - "loss": 0.0824, + "epoch": 1.3218476162186246, + "grad_norm": 1.1416538953781128, + "learning_rate": 8.678152383781377e-06, + "loss": 0.058, "step": 8900 }, { - "epoch": 0.6616664191296598, - "grad_norm": 3.150371789932251, - "learning_rate": 2.603000148522204e-05, - "loss": 0.1005, + "epoch": 1.3233328382593197, + "grad_norm": 0.5253177881240845, + "learning_rate": 8.676667161740681e-06, + "loss": 0.061, "step": 8910 }, { - "epoch": 0.6624090301500074, - "grad_norm": 2.6133267879486084, - "learning_rate": 2.6025545819099955e-05, - "loss": 0.0906, + "epoch": 1.3248180603000148, + "grad_norm": 0.8652992844581604, + "learning_rate": 8.675181939699986e-06, + "loss": 0.0655, "step": 8920 }, { - "epoch": 0.663151641170355, - "grad_norm": 2.1227505207061768, - "learning_rate": 2.6021090152977874e-05, - "loss": 0.094, + "epoch": 1.32630328234071, + "grad_norm": 0.769469141960144, + "learning_rate": 8.673696717659292e-06, + "loss": 0.0464, "step": 8930 }, { - "epoch": 0.6638942521907025, - "grad_norm": 3.7070045471191406, - "learning_rate": 2.6016634486855785e-05, - "loss": 0.1054, + "epoch": 1.327788504381405, + "grad_norm": 0.8097725510597229, + "learning_rate": 8.672211495618596e-06, + "loss": 0.0656, "step": 8940 }, { - "epoch": 0.6646368632110501, - "grad_norm": 2.8598554134368896, - "learning_rate": 2.60121788207337e-05, - "loss": 0.101, + "epoch": 1.3292737264221002, + "grad_norm": 1.2059855461120605, + "learning_rate": 8.6707262735779e-06, + "loss": 0.0676, "step": 8950 }, { - "epoch": 0.6653794742313975, - "grad_norm": 1.542912483215332, - "learning_rate": 2.6007723154611615e-05, - "loss": 0.0896, + "epoch": 1.3307589484627953, + "grad_norm": 1.0563139915466309, + "learning_rate": 8.669241051537207e-06, + "loss": 0.0624, "step": 8960 }, { - "epoch": 0.6661220852517451, - "grad_norm": 2.263106346130371, - "learning_rate": 2.600326748848953e-05, - "loss": 0.0893, + "epoch": 1.3322441705034902, + "grad_norm": 1.094596266746521, + "learning_rate": 8.66775582949651e-06, + "loss": 0.0755, "step": 8970 }, { - "epoch": 0.6668646962720927, - "grad_norm": 1.0385371446609497, - "learning_rate": 2.5998811822367445e-05, - "loss": 0.1132, + "epoch": 1.3337293925441853, + "grad_norm": 0.9370036125183105, + "learning_rate": 8.666270607455816e-06, + "loss": 0.0868, "step": 8980 }, { - "epoch": 0.6676073072924402, - "grad_norm": 3.194511890411377, - "learning_rate": 2.5994356156245357e-05, - "loss": 0.0824, + "epoch": 1.3352146145848804, + "grad_norm": 1.1293418407440186, + "learning_rate": 8.66478538541512e-06, + "loss": 0.0707, "step": 8990 }, { - "epoch": 0.6683499183127878, - "grad_norm": 1.4233129024505615, - "learning_rate": 2.5989900490123275e-05, - "loss": 0.0689, + "epoch": 1.3366998366255756, + "grad_norm": 1.6295610666275024, + "learning_rate": 8.663300163374425e-06, + "loss": 0.0655, "step": 9000 }, { - "epoch": 0.6690925293331353, - "grad_norm": 1.2096024751663208, - "learning_rate": 2.598544482400119e-05, - "loss": 0.0736, + "epoch": 1.3381850586662707, + "grad_norm": 1.074697494506836, + "learning_rate": 8.66181494133373e-06, + "loss": 0.0617, "step": 9010 }, { - "epoch": 0.6698351403534828, - "grad_norm": 2.155372381210327, - "learning_rate": 2.5980989157879102e-05, + "epoch": 1.3396702807069656, + "grad_norm": 0.906007707118988, + "learning_rate": 8.660329719293035e-06, "loss": 0.077, "step": 9020 }, { - "epoch": 0.6705777513738304, - "grad_norm": 1.661603331565857, - "learning_rate": 2.5976533491757017e-05, - "loss": 0.097, + "epoch": 1.3411555027476607, + "grad_norm": 0.7099418044090271, + "learning_rate": 8.65884449725234e-06, + "loss": 0.085, "step": 9030 }, { - "epoch": 0.6713203623941779, - "grad_norm": 2.2005343437194824, - "learning_rate": 2.5972077825634935e-05, - "loss": 0.0742, + "epoch": 1.3426407247883558, + "grad_norm": 0.9479149580001831, + "learning_rate": 8.657359275211646e-06, + "loss": 0.0784, "step": 9040 }, { - "epoch": 0.6720629734145255, - "grad_norm": 1.2867567539215088, - "learning_rate": 2.5967622159512847e-05, - "loss": 0.0691, + "epoch": 1.344125946829051, + "grad_norm": 0.6403526067733765, + "learning_rate": 8.65587405317095e-06, + "loss": 0.0639, "step": 9050 }, { - "epoch": 0.672805584434873, - "grad_norm": 2.7160210609436035, - "learning_rate": 2.5963166493390762e-05, - "loss": 0.1008, + "epoch": 1.345611168869746, + "grad_norm": 1.300971269607544, + "learning_rate": 8.654388831130255e-06, + "loss": 0.0795, "step": 9060 }, { - "epoch": 0.6735481954552206, - "grad_norm": 2.456948757171631, - "learning_rate": 2.5958710827268677e-05, - "loss": 0.0676, + "epoch": 1.347096390910441, + "grad_norm": 0.9193373918533325, + "learning_rate": 8.652903609089559e-06, + "loss": 0.0877, "step": 9070 }, { - "epoch": 0.674290806475568, - "grad_norm": 1.7581907510757446, - "learning_rate": 2.5954255161146592e-05, - "loss": 0.0949, + "epoch": 1.3485816129511363, + "grad_norm": 1.093873381614685, + "learning_rate": 8.651418387048865e-06, + "loss": 0.0671, "step": 9080 }, { - "epoch": 0.6750334174959156, - "grad_norm": 1.2283096313476562, - "learning_rate": 2.5949799495024507e-05, - "loss": 0.0792, + "epoch": 1.3500668349918312, + "grad_norm": 0.40460872650146484, + "learning_rate": 8.64993316500817e-06, + "loss": 0.0754, "step": 9090 }, { - "epoch": 0.6757760285162632, - "grad_norm": 1.879252552986145, - "learning_rate": 2.5945343828902422e-05, - "loss": 0.0732, + "epoch": 1.3515520570325263, + "grad_norm": 0.5930770039558411, + "learning_rate": 8.648447942967474e-06, + "loss": 0.0636, "step": 9100 }, { - "epoch": 0.6765186395366107, - "grad_norm": 2.652205228805542, - "learning_rate": 2.5940888162780337e-05, - "loss": 0.0759, + "epoch": 1.3530372790732215, + "grad_norm": 0.925861120223999, + "learning_rate": 8.64696272092678e-06, + "loss": 0.0823, "step": 9110 }, { - "epoch": 0.6772612505569583, - "grad_norm": 4.162420749664307, - "learning_rate": 2.5936432496658252e-05, - "loss": 0.0737, + "epoch": 1.3545225011139166, + "grad_norm": 0.7909414768218994, + "learning_rate": 8.645477498886085e-06, + "loss": 0.0783, "step": 9120 }, { - "epoch": 0.6780038615773059, - "grad_norm": 1.89590585231781, - "learning_rate": 2.5931976830536163e-05, - "loss": 0.0805, + "epoch": 1.3560077231546117, + "grad_norm": 0.7667768597602844, + "learning_rate": 8.643992276845389e-06, + "loss": 0.0712, "step": 9130 }, { - "epoch": 0.6787464725976533, - "grad_norm": 1.6626734733581543, - "learning_rate": 2.5927521164414082e-05, - "loss": 0.1003, + "epoch": 1.3574929451953066, + "grad_norm": 0.738560140132904, + "learning_rate": 8.642507054804693e-06, + "loss": 0.0909, "step": 9140 }, { - "epoch": 0.6794890836180009, - "grad_norm": 1.87484610080719, - "learning_rate": 2.5923065498291997e-05, - "loss": 0.0795, + "epoch": 1.3589781672360017, + "grad_norm": 1.9173917770385742, + "learning_rate": 8.641021832764e-06, + "loss": 0.0746, "step": 9150 }, { - "epoch": 0.6802316946383484, - "grad_norm": 1.9725035429000854, - "learning_rate": 2.591860983216991e-05, - "loss": 0.0936, + "epoch": 1.3604633892766969, + "grad_norm": 1.3944460153579712, + "learning_rate": 8.639536610723304e-06, + "loss": 0.0663, "step": 9160 }, { - "epoch": 0.680974305658696, - "grad_norm": 2.27907395362854, - "learning_rate": 2.5914154166047827e-05, - "loss": 0.0865, + "epoch": 1.361948611317392, + "grad_norm": 0.8866845965385437, + "learning_rate": 8.638051388682608e-06, + "loss": 0.0816, "step": 9170 }, { - "epoch": 0.6817169166790435, - "grad_norm": 1.4247010946273804, - "learning_rate": 2.590969849992574e-05, - "loss": 0.0751, + "epoch": 1.363433833358087, + "grad_norm": 1.4640837907791138, + "learning_rate": 8.636566166641913e-06, + "loss": 0.1226, "step": 9180 }, { - "epoch": 0.6824595276993911, - "grad_norm": 2.569737195968628, - "learning_rate": 2.5905242833803653e-05, - "loss": 0.1007, + "epoch": 1.364919055398782, + "grad_norm": 0.7436324954032898, + "learning_rate": 8.635080944601219e-06, + "loss": 0.0692, "step": 9190 }, { - "epoch": 0.6832021387197386, - "grad_norm": 3.3012797832489014, - "learning_rate": 2.590078716768157e-05, - "loss": 0.0889, + "epoch": 1.3664042774394771, + "grad_norm": 0.8947879076004028, + "learning_rate": 8.633595722560523e-06, + "loss": 0.0933, "step": 9200 }, { - "epoch": 0.6839447497400861, - "grad_norm": 2.0903170108795166, - "learning_rate": 2.5896331501559483e-05, - "loss": 0.082, + "epoch": 1.3678894994801722, + "grad_norm": 0.7265494465827942, + "learning_rate": 8.632110500519828e-06, + "loss": 0.0803, "step": 9210 }, { - "epoch": 0.6846873607604337, - "grad_norm": 1.6836172342300415, - "learning_rate": 2.58918758354374e-05, - "loss": 0.0873, + "epoch": 1.3693747215208674, + "grad_norm": 1.0423144102096558, + "learning_rate": 8.630625278479134e-06, + "loss": 0.0862, "step": 9220 }, { - "epoch": 0.6854299717807812, - "grad_norm": 3.3756263256073, - "learning_rate": 2.5887420169315313e-05, - "loss": 0.0769, + "epoch": 1.3708599435615625, + "grad_norm": 0.8711174726486206, + "learning_rate": 8.629140056438438e-06, + "loss": 0.069, "step": 9230 }, { - "epoch": 0.6861725828011288, - "grad_norm": 1.1910730600357056, - "learning_rate": 2.588296450319323e-05, - "loss": 0.0894, + "epoch": 1.3723451656022576, + "grad_norm": 1.1988892555236816, + "learning_rate": 8.627654834397743e-06, + "loss": 0.076, "step": 9240 }, { - "epoch": 0.6869151938214763, - "grad_norm": 1.0612378120422363, - "learning_rate": 2.5878508837071143e-05, - "loss": 0.062, + "epoch": 1.3738303876429527, + "grad_norm": 0.5930973887443542, + "learning_rate": 8.626169612357049e-06, + "loss": 0.0658, "step": 9250 }, { - "epoch": 0.6876578048418238, - "grad_norm": 1.0237765312194824, - "learning_rate": 2.587405317094906e-05, - "loss": 0.0904, + "epoch": 1.3753156096836476, + "grad_norm": 1.127451777458191, + "learning_rate": 8.624684390316353e-06, + "loss": 0.0772, "step": 9260 }, { - "epoch": 0.6884004158621714, - "grad_norm": 2.666456460952759, - "learning_rate": 2.5869597504826973e-05, - "loss": 0.0861, + "epoch": 1.3768008317243428, + "grad_norm": 0.7104337215423584, + "learning_rate": 8.623199168275658e-06, + "loss": 0.0856, "step": 9270 }, { - "epoch": 0.6891430268825189, - "grad_norm": 1.1967474222183228, - "learning_rate": 2.586514183870489e-05, - "loss": 0.086, + "epoch": 1.3782860537650379, + "grad_norm": 1.0597851276397705, + "learning_rate": 8.621713946234962e-06, + "loss": 0.0742, "step": 9280 }, { - "epoch": 0.6898856379028665, - "grad_norm": 3.264155626296997, - "learning_rate": 2.58606861725828e-05, - "loss": 0.0903, + "epoch": 1.379771275805733, + "grad_norm": 1.0876798629760742, + "learning_rate": 8.620228724194267e-06, + "loss": 0.0851, "step": 9290 }, { - "epoch": 0.6906282489232141, - "grad_norm": 2.126134157180786, - "learning_rate": 2.5856230506460715e-05, - "loss": 0.1036, + "epoch": 1.3812564978464281, + "grad_norm": 0.7666727900505066, + "learning_rate": 8.618743502153573e-06, + "loss": 0.0463, "step": 9300 }, { - "epoch": 0.6913708599435615, - "grad_norm": 1.6895121335983276, - "learning_rate": 2.5851774840338633e-05, - "loss": 0.067, + "epoch": 1.382741719887123, + "grad_norm": 1.0942326784133911, + "learning_rate": 8.617258280112877e-06, + "loss": 0.0617, "step": 9310 }, { - "epoch": 0.6921134709639091, - "grad_norm": 2.2356975078582764, - "learning_rate": 2.5847319174216545e-05, - "loss": 0.0838, + "epoch": 1.3842269419278181, + "grad_norm": 1.3110612630844116, + "learning_rate": 8.615773058072182e-06, + "loss": 0.092, "step": 9320 }, { - "epoch": 0.6928560819842566, - "grad_norm": 1.7429089546203613, - "learning_rate": 2.584286350809446e-05, - "loss": 0.0731, + "epoch": 1.3857121639685133, + "grad_norm": 0.7441218495368958, + "learning_rate": 8.614287836031488e-06, + "loss": 0.0598, "step": 9330 }, { - "epoch": 0.6935986930046042, - "grad_norm": 1.1210354566574097, - "learning_rate": 2.583840784197238e-05, - "loss": 0.0932, + "epoch": 1.3871973860092084, + "grad_norm": 0.9740796089172363, + "learning_rate": 8.612802613990792e-06, + "loss": 0.0654, "step": 9340 }, { - "epoch": 0.6943413040249518, - "grad_norm": 1.4460147619247437, - "learning_rate": 2.583395217585029e-05, - "loss": 0.0817, + "epoch": 1.3886826080499035, + "grad_norm": 0.8730571269989014, + "learning_rate": 8.611317391950097e-06, + "loss": 0.0648, "step": 9350 }, { - "epoch": 0.6950839150452993, - "grad_norm": 1.1217153072357178, - "learning_rate": 2.5829496509728205e-05, - "loss": 0.0671, + "epoch": 1.3901678300905984, + "grad_norm": 0.9170101881027222, + "learning_rate": 8.609832169909403e-06, + "loss": 0.0776, "step": 9360 }, { - "epoch": 0.6958265260656468, - "grad_norm": 2.2373554706573486, - "learning_rate": 2.582504084360612e-05, - "loss": 0.0681, + "epoch": 1.3916530521312938, + "grad_norm": 0.4509689211845398, + "learning_rate": 8.608346947868707e-06, + "loss": 0.0655, "step": 9370 }, { - "epoch": 0.6965691370859943, - "grad_norm": 2.8909049034118652, - "learning_rate": 2.5820585177484035e-05, - "loss": 0.1121, + "epoch": 1.3931382741719887, + "grad_norm": 2.2347559928894043, + "learning_rate": 8.606861725828012e-06, + "loss": 0.0879, "step": 9380 }, { - "epoch": 0.6973117481063419, - "grad_norm": 0.4152112603187561, - "learning_rate": 2.581612951136195e-05, - "loss": 0.0894, + "epoch": 1.3946234962126838, + "grad_norm": 1.4302359819412231, + "learning_rate": 8.605376503787318e-06, + "loss": 0.0884, "step": 9390 }, { - "epoch": 0.6980543591266894, - "grad_norm": 3.5851147174835205, - "learning_rate": 2.581167384523986e-05, - "loss": 0.0797, + "epoch": 1.396108718253379, + "grad_norm": 0.33778825402259827, + "learning_rate": 8.60389128174662e-06, + "loss": 0.0447, "step": 9400 }, { - "epoch": 0.698796970147037, - "grad_norm": 1.1283321380615234, - "learning_rate": 2.580721817911778e-05, - "loss": 0.0966, + "epoch": 1.397593940294074, + "grad_norm": 0.8757361769676208, + "learning_rate": 8.602406059705927e-06, + "loss": 0.0752, "step": 9410 }, { - "epoch": 0.6995395811673846, - "grad_norm": 2.237506151199341, - "learning_rate": 2.5802762512995695e-05, - "loss": 0.0554, + "epoch": 1.3990791623347691, + "grad_norm": 0.8796318769454956, + "learning_rate": 8.600920837665233e-06, + "loss": 0.0763, "step": 9420 }, { - "epoch": 0.700282192187732, - "grad_norm": 2.4891796112060547, - "learning_rate": 2.5798306846873607e-05, - "loss": 0.0808, + "epoch": 1.400564384375464, + "grad_norm": 1.1154508590698242, + "learning_rate": 8.599435615624536e-06, + "loss": 0.067, "step": 9430 }, { - "epoch": 0.7010248032080796, - "grad_norm": 1.4225846529006958, - "learning_rate": 2.579385118075152e-05, - "loss": 0.0733, + "epoch": 1.4020496064161592, + "grad_norm": 0.722909152507782, + "learning_rate": 8.597950393583842e-06, + "loss": 0.0682, "step": 9440 }, { - "epoch": 0.7017674142284271, - "grad_norm": 3.312795400619507, - "learning_rate": 2.578939551462944e-05, - "loss": 0.0979, + "epoch": 1.4035348284568543, + "grad_norm": 1.0541070699691772, + "learning_rate": 8.596465171543146e-06, + "loss": 0.0538, "step": 9450 }, { - "epoch": 0.7025100252487747, - "grad_norm": 1.2239809036254883, - "learning_rate": 2.578493984850735e-05, - "loss": 0.0735, + "epoch": 1.4050200504975494, + "grad_norm": 0.6118191480636597, + "learning_rate": 8.59497994950245e-06, + "loss": 0.0667, "step": 9460 }, { - "epoch": 0.7032526362691223, - "grad_norm": 3.1901540756225586, - "learning_rate": 2.5780484182385266e-05, - "loss": 0.0929, + "epoch": 1.4065052725382445, + "grad_norm": 1.3948619365692139, + "learning_rate": 8.593494727461757e-06, + "loss": 0.0651, "step": 9470 }, { - "epoch": 0.7039952472894698, - "grad_norm": 1.4800280332565308, - "learning_rate": 2.577602851626318e-05, - "loss": 0.0693, + "epoch": 1.4079904945789394, + "grad_norm": 1.4375851154327393, + "learning_rate": 8.592009505421061e-06, + "loss": 0.0651, "step": 9480 }, { - "epoch": 0.7047378583098173, - "grad_norm": 3.378511667251587, - "learning_rate": 2.5771572850141096e-05, - "loss": 0.0794, + "epoch": 1.4094757166196346, + "grad_norm": 0.42622682452201843, + "learning_rate": 8.590524283380365e-06, + "loss": 0.0744, "step": 9490 }, { - "epoch": 0.7054804693301648, - "grad_norm": 2.557231903076172, - "learning_rate": 2.576711718401901e-05, - "loss": 0.0825, + "epoch": 1.4109609386603297, + "grad_norm": 0.7068700790405273, + "learning_rate": 8.589039061339672e-06, + "loss": 0.0754, "step": 9500 }, { - "epoch": 0.7062230803505124, - "grad_norm": 1.7998268604278564, - "learning_rate": 2.5762661517896926e-05, - "loss": 0.0679, + "epoch": 1.4124461607010248, + "grad_norm": 0.5672358870506287, + "learning_rate": 8.587553839298976e-06, + "loss": 0.0678, "step": 9510 }, { - "epoch": 0.70696569137086, - "grad_norm": 2.5356063842773438, - "learning_rate": 2.575820585177484e-05, - "loss": 0.0851, + "epoch": 1.41393138274172, + "grad_norm": 1.3151499032974243, + "learning_rate": 8.58606861725828e-06, + "loss": 0.0843, "step": 9520 }, { - "epoch": 0.7077083023912075, - "grad_norm": 3.3451857566833496, - "learning_rate": 2.5753750185652756e-05, - "loss": 0.0934, + "epoch": 1.415416604782415, + "grad_norm": 0.579703688621521, + "learning_rate": 8.584583395217587e-06, + "loss": 0.0808, "step": 9530 }, { - "epoch": 0.708450913411555, - "grad_norm": 2.2727510929107666, - "learning_rate": 2.5749294519530668e-05, - "loss": 0.065, + "epoch": 1.4169018268231102, + "grad_norm": 0.921262800693512, + "learning_rate": 8.583098173176891e-06, + "loss": 0.0837, "step": 9540 }, { - "epoch": 0.7091935244319025, - "grad_norm": 3.0308828353881836, - "learning_rate": 2.5744838853408586e-05, - "loss": 0.1067, + "epoch": 1.418387048863805, + "grad_norm": 0.6830768585205078, + "learning_rate": 8.581612951136195e-06, + "loss": 0.065, "step": 9550 }, { - "epoch": 0.7099361354522501, - "grad_norm": 0.393522173166275, - "learning_rate": 2.57403831872865e-05, - "loss": 0.0824, + "epoch": 1.4198722709045002, + "grad_norm": 0.6493134498596191, + "learning_rate": 8.580127729095502e-06, + "loss": 0.06, "step": 9560 }, { - "epoch": 0.7106787464725977, - "grad_norm": 1.6205034255981445, - "learning_rate": 2.5735927521164413e-05, - "loss": 0.0474, + "epoch": 1.4213574929451953, + "grad_norm": 1.3778793811798096, + "learning_rate": 8.578642507054804e-06, + "loss": 0.0637, "step": 9570 }, { - "epoch": 0.7114213574929452, - "grad_norm": 1.4009572267532349, - "learning_rate": 2.573147185504233e-05, - "loss": 0.095, + "epoch": 1.4228427149858904, + "grad_norm": 0.6053674221038818, + "learning_rate": 8.57715728501411e-06, + "loss": 0.0717, "step": 9580 }, { - "epoch": 0.7121639685132928, - "grad_norm": 1.9968441724777222, - "learning_rate": 2.5727016188920243e-05, - "loss": 0.0957, + "epoch": 1.4243279370265856, + "grad_norm": 1.0402705669403076, + "learning_rate": 8.575672062973415e-06, + "loss": 0.0727, "step": 9590 }, { - "epoch": 0.7129065795336402, - "grad_norm": 1.6015273332595825, - "learning_rate": 2.5722560522798158e-05, - "loss": 0.0857, + "epoch": 1.4258131590672805, + "grad_norm": 1.1577720642089844, + "learning_rate": 8.57418684093272e-06, + "loss": 0.0726, "step": 9600 }, { - "epoch": 0.7136491905539878, - "grad_norm": 1.4251041412353516, - "learning_rate": 2.5718104856676073e-05, - "loss": 0.0932, + "epoch": 1.4272983811079756, + "grad_norm": 0.6212295889854431, + "learning_rate": 8.572701618892025e-06, + "loss": 0.0785, "step": 9610 }, { - "epoch": 0.7143918015743354, - "grad_norm": 5.090855598449707, - "learning_rate": 2.5713649190553988e-05, - "loss": 0.1032, + "epoch": 1.4287836031486707, + "grad_norm": 0.5646659731864929, + "learning_rate": 8.57121639685133e-06, + "loss": 0.06, "step": 9620 }, { - "epoch": 0.7151344125946829, - "grad_norm": 2.4273598194122314, - "learning_rate": 2.5709193524431903e-05, - "loss": 0.0827, + "epoch": 1.4302688251893658, + "grad_norm": 0.2798670828342438, + "learning_rate": 8.569731174810634e-06, + "loss": 0.083, "step": 9630 }, { - "epoch": 0.7158770236150305, - "grad_norm": 1.8204997777938843, - "learning_rate": 2.5704737858309818e-05, - "loss": 0.1167, + "epoch": 1.431754047230061, + "grad_norm": 0.8584413528442383, + "learning_rate": 8.56824595276994e-06, + "loss": 0.0845, "step": 9640 }, { - "epoch": 0.716619634635378, - "grad_norm": 1.7066177129745483, - "learning_rate": 2.5700282192187733e-05, - "loss": 0.1037, + "epoch": 1.4332392692707558, + "grad_norm": 0.6633872985839844, + "learning_rate": 8.566760730729245e-06, + "loss": 0.0932, "step": 9650 }, { - "epoch": 0.7173622456557255, - "grad_norm": 2.3941705226898193, - "learning_rate": 2.5695826526065648e-05, - "loss": 0.1003, + "epoch": 1.4347244913114512, + "grad_norm": 1.4569209814071655, + "learning_rate": 8.56527550868855e-06, + "loss": 0.0811, "step": 9660 }, { - "epoch": 0.718104856676073, - "grad_norm": 2.3168444633483887, - "learning_rate": 2.5691370859943563e-05, - "loss": 0.1067, + "epoch": 1.436209713352146, + "grad_norm": 1.3260440826416016, + "learning_rate": 8.563790286647855e-06, + "loss": 0.055, "step": 9670 }, { - "epoch": 0.7188474676964206, - "grad_norm": 1.6166632175445557, - "learning_rate": 2.5686915193821478e-05, - "loss": 0.0904, + "epoch": 1.4376949353928412, + "grad_norm": 1.1164923906326294, + "learning_rate": 8.56230506460716e-06, + "loss": 0.0877, "step": 9680 }, { - "epoch": 0.7195900787167682, - "grad_norm": 0.9666265845298767, - "learning_rate": 2.5682459527699393e-05, - "loss": 0.0967, + "epoch": 1.4391801574335363, + "grad_norm": 0.9574961066246033, + "learning_rate": 8.560819842566464e-06, + "loss": 0.0736, "step": 9690 }, { - "epoch": 0.7203326897371157, - "grad_norm": 0.7397652864456177, - "learning_rate": 2.5678003861577305e-05, - "loss": 0.0604, + "epoch": 1.4406653794742315, + "grad_norm": 1.2333389520645142, + "learning_rate": 8.559334620525769e-06, + "loss": 0.0861, "step": 9700 }, { - "epoch": 0.7210753007574633, - "grad_norm": 3.255927324295044, - "learning_rate": 2.567354819545522e-05, - "loss": 0.0784, + "epoch": 1.4421506015149266, + "grad_norm": 0.932621955871582, + "learning_rate": 8.557849398485075e-06, + "loss": 0.0778, "step": 9710 }, { - "epoch": 0.7218179117778107, - "grad_norm": 2.8680319786071777, - "learning_rate": 2.5669092529333138e-05, - "loss": 0.1131, + "epoch": 1.4436358235556215, + "grad_norm": 0.7551608085632324, + "learning_rate": 8.55636417644438e-06, + "loss": 0.0593, "step": 9720 }, { - "epoch": 0.7225605227981583, - "grad_norm": 1.343375325202942, - "learning_rate": 2.566463686321105e-05, - "loss": 0.1042, + "epoch": 1.4451210455963166, + "grad_norm": 0.7052642107009888, + "learning_rate": 8.554878954403684e-06, + "loss": 0.0467, "step": 9730 }, { - "epoch": 0.7233031338185059, - "grad_norm": 2.072066307067871, - "learning_rate": 2.5660181197088965e-05, - "loss": 0.1135, + "epoch": 1.4466062676370117, + "grad_norm": 2.466881036758423, + "learning_rate": 8.553393732362988e-06, + "loss": 0.0794, "step": 9740 }, { - "epoch": 0.7240457448388534, - "grad_norm": 2.82025408744812, - "learning_rate": 2.5655725530966883e-05, - "loss": 0.0621, + "epoch": 1.4480914896777068, + "grad_norm": 0.7573568224906921, + "learning_rate": 8.551908510322294e-06, + "loss": 0.065, "step": 9750 }, { - "epoch": 0.724788355859201, - "grad_norm": 4.173225402832031, - "learning_rate": 2.5651269864844795e-05, - "loss": 0.0756, + "epoch": 1.449576711718402, + "grad_norm": 1.1070822477340698, + "learning_rate": 8.550423288281599e-06, + "loss": 0.0812, "step": 9760 }, { - "epoch": 0.7255309668795485, - "grad_norm": 0.6784592866897583, - "learning_rate": 2.564681419872271e-05, - "loss": 0.0859, + "epoch": 1.4510619337590969, + "grad_norm": 0.355160117149353, + "learning_rate": 8.548938066240903e-06, + "loss": 0.0664, "step": 9770 }, { - "epoch": 0.726273577899896, - "grad_norm": 2.3363256454467773, - "learning_rate": 2.5642358532600625e-05, - "loss": 0.0596, + "epoch": 1.452547155799792, + "grad_norm": 1.617323398590088, + "learning_rate": 8.54745284420021e-06, + "loss": 0.0887, "step": 9780 }, { - "epoch": 0.7270161889202436, - "grad_norm": 1.6436067819595337, - "learning_rate": 2.563790286647854e-05, - "loss": 0.0875, + "epoch": 1.4540323778404871, + "grad_norm": 0.6931330561637878, + "learning_rate": 8.545967622159514e-06, + "loss": 0.0678, "step": 9790 }, { - "epoch": 0.7277587999405911, - "grad_norm": 2.9929933547973633, - "learning_rate": 2.5633447200356455e-05, - "loss": 0.1146, + "epoch": 1.4555175998811822, + "grad_norm": 0.6744693517684937, + "learning_rate": 8.544482400118818e-06, + "loss": 0.0754, "step": 9800 }, { - "epoch": 0.7285014109609387, - "grad_norm": 2.5027360916137695, - "learning_rate": 2.5628991534234366e-05, - "loss": 0.0916, + "epoch": 1.4570028219218774, + "grad_norm": 1.0592255592346191, + "learning_rate": 8.542997178078123e-06, + "loss": 0.0632, "step": 9810 }, { - "epoch": 0.7292440219812862, - "grad_norm": 0.6115292310714722, - "learning_rate": 2.5624535868112285e-05, - "loss": 0.0381, + "epoch": 1.4584880439625725, + "grad_norm": 0.8541384339332581, + "learning_rate": 8.541511956037429e-06, + "loss": 0.0736, "step": 9820 }, { - "epoch": 0.7299866330016337, - "grad_norm": 3.5652284622192383, - "learning_rate": 2.56200802019902e-05, - "loss": 0.0678, + "epoch": 1.4599732660032676, + "grad_norm": 0.8458141684532166, + "learning_rate": 8.540026733996733e-06, + "loss": 0.0563, "step": 9830 }, { - "epoch": 0.7307292440219813, - "grad_norm": 2.814704179763794, - "learning_rate": 2.561562453586811e-05, - "loss": 0.1019, + "epoch": 1.4614584880439625, + "grad_norm": 0.6952449083328247, + "learning_rate": 8.538541511956038e-06, + "loss": 0.0789, "step": 9840 }, { - "epoch": 0.7314718550423288, - "grad_norm": 2.0167160034179688, - "learning_rate": 2.5611168869746026e-05, - "loss": 0.0719, + "epoch": 1.4629437100846576, + "grad_norm": 0.8950325846672058, + "learning_rate": 8.537056289915344e-06, + "loss": 0.0683, "step": 9850 }, { - "epoch": 0.7322144660626764, - "grad_norm": 1.6718881130218506, - "learning_rate": 2.5606713203623944e-05, - "loss": 0.0965, + "epoch": 1.4644289321253527, + "grad_norm": 0.9191376566886902, + "learning_rate": 8.535571067874648e-06, + "loss": 0.0629, "step": 9860 }, { - "epoch": 0.7329570770830239, - "grad_norm": 1.5811102390289307, - "learning_rate": 2.5602257537501856e-05, - "loss": 0.1263, + "epoch": 1.4659141541660479, + "grad_norm": 0.8940352201461792, + "learning_rate": 8.534085845833953e-06, + "loss": 0.0674, "step": 9870 }, { - "epoch": 0.7336996881033715, - "grad_norm": 3.2773425579071045, - "learning_rate": 2.559780187137977e-05, - "loss": 0.0771, + "epoch": 1.467399376206743, + "grad_norm": 0.9834596514701843, + "learning_rate": 8.532600623793259e-06, + "loss": 0.073, "step": 9880 }, { - "epoch": 0.734442299123719, - "grad_norm": 1.7898057699203491, - "learning_rate": 2.559334620525769e-05, - "loss": 0.0871, + "epoch": 1.468884598247438, + "grad_norm": 0.592193603515625, + "learning_rate": 8.531115401752563e-06, + "loss": 0.0619, "step": 9890 }, { - "epoch": 0.7351849101440665, - "grad_norm": 2.309032917022705, - "learning_rate": 2.55888905391356e-05, - "loss": 0.0703, + "epoch": 1.470369820288133, + "grad_norm": 0.8372761011123657, + "learning_rate": 8.529630179711868e-06, + "loss": 0.0832, "step": 9900 }, { - "epoch": 0.7359275211644141, - "grad_norm": 1.4760417938232422, - "learning_rate": 2.5584434873013516e-05, - "loss": 0.0877, + "epoch": 1.4718550423288281, + "grad_norm": 0.7133629322052002, + "learning_rate": 8.528144957671172e-06, + "loss": 0.0592, "step": 9910 }, { - "epoch": 0.7366701321847616, - "grad_norm": 1.4691712856292725, - "learning_rate": 2.557997920689143e-05, - "loss": 0.0909, + "epoch": 1.4733402643695233, + "grad_norm": 0.4714134633541107, + "learning_rate": 8.526659735630476e-06, + "loss": 0.0669, "step": 9920 }, { - "epoch": 0.7374127432051092, - "grad_norm": 1.479776382446289, - "learning_rate": 2.5575523540769346e-05, - "loss": 0.0983, + "epoch": 1.4748254864102184, + "grad_norm": 1.3863356113433838, + "learning_rate": 8.525174513589783e-06, + "loss": 0.067, "step": 9930 }, { - "epoch": 0.7381553542254568, - "grad_norm": 2.160743474960327, - "learning_rate": 2.557106787464726e-05, - "loss": 0.0964, + "epoch": 1.4763107084509133, + "grad_norm": 1.3102223873138428, + "learning_rate": 8.523689291549087e-06, + "loss": 0.0734, "step": 9940 }, { - "epoch": 0.7388979652458042, - "grad_norm": 1.2513461112976074, - "learning_rate": 2.5566612208525173e-05, - "loss": 0.0811, + "epoch": 1.4777959304916086, + "grad_norm": 1.2417453527450562, + "learning_rate": 8.522204069508391e-06, + "loss": 0.0668, "step": 9950 }, { - "epoch": 0.7396405762661518, - "grad_norm": 1.080775499343872, - "learning_rate": 2.556215654240309e-05, - "loss": 0.0922, + "epoch": 1.4792811525323035, + "grad_norm": 0.8425410389900208, + "learning_rate": 8.520718847467698e-06, + "loss": 0.0542, "step": 9960 }, { - "epoch": 0.7403831872864993, - "grad_norm": 2.420680284500122, - "learning_rate": 2.5557700876281006e-05, - "loss": 0.0645, + "epoch": 1.4807663745729986, + "grad_norm": 0.3685753643512726, + "learning_rate": 8.519233625427002e-06, + "loss": 0.0675, "step": 9970 }, { - "epoch": 0.7411257983068469, - "grad_norm": 2.0995841026306152, - "learning_rate": 2.5553245210158918e-05, - "loss": 0.106, + "epoch": 1.4822515966136938, + "grad_norm": 0.9921769499778748, + "learning_rate": 8.517748403386306e-06, + "loss": 0.0739, "step": 9980 }, { - "epoch": 0.7418684093271944, - "grad_norm": 3.2964303493499756, - "learning_rate": 2.5548789544036836e-05, - "loss": 0.0891, + "epoch": 1.483736818654389, + "grad_norm": 0.7862120270729065, + "learning_rate": 8.516263181345613e-06, + "loss": 0.051, "step": 9990 }, { - "epoch": 0.742611020347542, - "grad_norm": 1.5144083499908447, - "learning_rate": 2.5544333877914748e-05, - "loss": 0.0698, + "epoch": 1.485222040695084, + "grad_norm": 0.8443083763122559, + "learning_rate": 8.514777959304917e-06, + "loss": 0.0634, "step": 10000 }, { - "epoch": 0.7433536313678895, - "grad_norm": 3.1648800373077393, - "learning_rate": 2.5539878211792663e-05, - "loss": 0.1023, + "epoch": 1.486707262735779, + "grad_norm": 0.7549251914024353, + "learning_rate": 8.513292737264221e-06, + "loss": 0.0729, "step": 10010 }, { - "epoch": 0.744096242388237, - "grad_norm": 2.7684147357940674, - "learning_rate": 2.5535422545670578e-05, - "loss": 0.0946, + "epoch": 1.488192484776474, + "grad_norm": 0.8449190855026245, + "learning_rate": 8.511807515223528e-06, + "loss": 0.0838, "step": 10020 }, { - "epoch": 0.7448388534085846, - "grad_norm": 2.4703927040100098, - "learning_rate": 2.5530966879548493e-05, - "loss": 0.0917, + "epoch": 1.4896777068171692, + "grad_norm": 0.5930236577987671, + "learning_rate": 8.51032229318283e-06, + "loss": 0.0778, "step": 10030 }, { - "epoch": 0.7455814644289321, - "grad_norm": 4.016003131866455, - "learning_rate": 2.5526511213426408e-05, - "loss": 0.0888, + "epoch": 1.4911629288578643, + "grad_norm": 1.0783277750015259, + "learning_rate": 8.508837071142136e-06, + "loss": 0.0951, "step": 10040 }, { - "epoch": 0.7463240754492797, - "grad_norm": 0.4022844135761261, - "learning_rate": 2.5522055547304323e-05, - "loss": 0.059, + "epoch": 1.4926481508985594, + "grad_norm": 0.6660608053207397, + "learning_rate": 8.507351849101443e-06, + "loss": 0.0804, "step": 10050 }, { - "epoch": 0.7470666864696273, - "grad_norm": 0.8048895597457886, - "learning_rate": 2.5517599881182238e-05, - "loss": 0.0885, + "epoch": 1.4941333729392543, + "grad_norm": 0.7329056262969971, + "learning_rate": 8.505866627060745e-06, + "loss": 0.0658, "step": 10060 }, { - "epoch": 0.7478092974899747, - "grad_norm": 3.6403074264526367, - "learning_rate": 2.5513144215060153e-05, - "loss": 0.0704, + "epoch": 1.4956185949799494, + "grad_norm": 0.9868343472480774, + "learning_rate": 8.504381405020051e-06, + "loss": 0.0598, "step": 10070 }, { - "epoch": 0.7485519085103223, - "grad_norm": 1.1787481307983398, - "learning_rate": 2.5508688548938068e-05, - "loss": 0.0869, + "epoch": 1.4971038170206445, + "grad_norm": 0.851028323173523, + "learning_rate": 8.502896182979356e-06, + "loss": 0.0823, "step": 10080 }, { - "epoch": 0.7492945195306698, - "grad_norm": 2.7455785274505615, - "learning_rate": 2.5504232882815983e-05, - "loss": 0.0837, + "epoch": 1.4985890390613397, + "grad_norm": 0.9429115056991577, + "learning_rate": 8.50141096093866e-06, + "loss": 0.0762, "step": 10090 }, { - "epoch": 0.7500371305510174, - "grad_norm": 1.82301664352417, - "learning_rate": 2.5499777216693898e-05, - "loss": 0.0581, + "epoch": 1.5000742611020348, + "grad_norm": 1.0672980546951294, + "learning_rate": 8.499925738897966e-06, + "loss": 0.1005, "step": 10100 }, { - "epoch": 0.750779741571365, - "grad_norm": 1.8503745794296265, - "learning_rate": 2.549532155057181e-05, - "loss": 0.0519, + "epoch": 1.5015594831427297, + "grad_norm": 0.5856247544288635, + "learning_rate": 8.498440516857271e-06, + "loss": 0.0566, "step": 10110 }, { - "epoch": 0.7515223525917124, - "grad_norm": 1.0572456121444702, - "learning_rate": 2.5490865884449724e-05, - "loss": 0.055, + "epoch": 1.503044705183425, + "grad_norm": 0.8225235939025879, + "learning_rate": 8.496955294816575e-06, + "loss": 0.0627, "step": 10120 }, { - "epoch": 0.75226496361206, - "grad_norm": 1.1795002222061157, - "learning_rate": 2.5486410218327643e-05, - "loss": 0.1019, + "epoch": 1.50452992722412, + "grad_norm": 0.6043829917907715, + "learning_rate": 8.495470072775881e-06, + "loss": 0.0823, "step": 10130 }, { - "epoch": 0.7530075746324075, - "grad_norm": 2.340430736541748, - "learning_rate": 2.5481954552205554e-05, - "loss": 0.0892, + "epoch": 1.506015149264815, + "grad_norm": 1.159949541091919, + "learning_rate": 8.493984850735186e-06, + "loss": 0.0661, "step": 10140 }, { - "epoch": 0.7537501856527551, - "grad_norm": 2.2384378910064697, - "learning_rate": 2.547749888608347e-05, - "loss": 0.0461, + "epoch": 1.5075003713055102, + "grad_norm": 0.4439176917076111, + "learning_rate": 8.49249962869449e-06, + "loss": 0.084, "step": 10150 }, { - "epoch": 0.7544927966731027, - "grad_norm": 3.9596447944641113, - "learning_rate": 2.5473043219961388e-05, - "loss": 0.0699, + "epoch": 1.5089855933462053, + "grad_norm": 0.9722158908843994, + "learning_rate": 8.491014406653796e-06, + "loss": 0.0814, "step": 10160 }, { - "epoch": 0.7552354076934502, - "grad_norm": 2.694197654724121, - "learning_rate": 2.54685875538393e-05, - "loss": 0.087, + "epoch": 1.5104708153869004, + "grad_norm": 0.7936907410621643, + "learning_rate": 8.4895291846131e-06, + "loss": 0.0846, "step": 10170 }, { - "epoch": 0.7559780187137977, - "grad_norm": 1.5229603052139282, - "learning_rate": 2.5464131887717214e-05, - "loss": 0.0611, + "epoch": 1.5119560374275953, + "grad_norm": 1.1649501323699951, + "learning_rate": 8.488043962572405e-06, + "loss": 0.0571, "step": 10180 }, { - "epoch": 0.7567206297341452, - "grad_norm": 1.1745027303695679, - "learning_rate": 2.545967622159513e-05, - "loss": 0.0937, + "epoch": 1.5134412594682907, + "grad_norm": 0.690048098564148, + "learning_rate": 8.486558740531711e-06, + "loss": 0.0976, "step": 10190 }, { - "epoch": 0.7574632407544928, - "grad_norm": 2.827160120010376, - "learning_rate": 2.5455220555473044e-05, - "loss": 0.0918, + "epoch": 1.5149264815089856, + "grad_norm": 1.2330018281936646, + "learning_rate": 8.485073518491014e-06, + "loss": 0.0663, "step": 10200 }, { - "epoch": 0.7582058517748403, - "grad_norm": 0.49699798226356506, - "learning_rate": 2.545076488935096e-05, - "loss": 0.039, + "epoch": 1.5164117035496807, + "grad_norm": 0.9722781777381897, + "learning_rate": 8.48358829645032e-06, + "loss": 0.0618, "step": 10210 }, { - "epoch": 0.7589484627951879, - "grad_norm": 0.5466452240943909, - "learning_rate": 2.544630922322887e-05, - "loss": 0.0774, + "epoch": 1.5178969255903758, + "grad_norm": 0.9569472074508667, + "learning_rate": 8.482103074409625e-06, + "loss": 0.0753, "step": 10220 }, { - "epoch": 0.7596910738155355, - "grad_norm": 1.8753949403762817, - "learning_rate": 2.544185355710679e-05, - "loss": 0.0982, + "epoch": 1.5193821476310707, + "grad_norm": 0.5346283316612244, + "learning_rate": 8.480617852368929e-06, + "loss": 0.0699, "step": 10230 }, { - "epoch": 0.7604336848358829, - "grad_norm": 2.802274465560913, - "learning_rate": 2.5437397890984704e-05, - "loss": 0.114, + "epoch": 1.520867369671766, + "grad_norm": 0.6866028308868408, + "learning_rate": 8.479132630328235e-06, + "loss": 0.0764, "step": 10240 }, { - "epoch": 0.7611762958562305, - "grad_norm": 2.2179017066955566, - "learning_rate": 2.5432942224862616e-05, - "loss": 0.0404, + "epoch": 1.522352591712461, + "grad_norm": 1.042005181312561, + "learning_rate": 8.47764740828754e-06, + "loss": 0.0731, "step": 10250 }, { - "epoch": 0.761918906876578, - "grad_norm": 1.2496877908706665, - "learning_rate": 2.5428486558740534e-05, - "loss": 0.0764, + "epoch": 1.523837813753156, + "grad_norm": 0.5577150583267212, + "learning_rate": 8.476162186246844e-06, + "loss": 0.0851, "step": 10260 }, { - "epoch": 0.7626615178969256, - "grad_norm": 1.20204496383667, - "learning_rate": 2.542403089261845e-05, - "loss": 0.0817, + "epoch": 1.5253230357938512, + "grad_norm": 0.5276694893836975, + "learning_rate": 8.47467696420615e-06, + "loss": 0.0534, "step": 10270 }, { - "epoch": 0.7634041289172732, - "grad_norm": 2.656388521194458, - "learning_rate": 2.541957522649636e-05, - "loss": 0.0801, + "epoch": 1.526808257834546, + "grad_norm": 0.6905069947242737, + "learning_rate": 8.473191742165455e-06, + "loss": 0.091, "step": 10280 }, { - "epoch": 0.7641467399376207, - "grad_norm": 0.9805976748466492, - "learning_rate": 2.5415119560374276e-05, - "loss": 0.0655, + "epoch": 1.5282934798752414, + "grad_norm": 0.3890121877193451, + "learning_rate": 8.471706520124759e-06, + "loss": 0.0794, "step": 10290 }, { - "epoch": 0.7648893509579682, - "grad_norm": 1.0946846008300781, - "learning_rate": 2.5410663894252194e-05, - "loss": 0.0897, + "epoch": 1.5297787019159363, + "grad_norm": 1.8997670412063599, + "learning_rate": 8.470221298084065e-06, + "loss": 0.0778, "step": 10300 }, { - "epoch": 0.7656319619783157, - "grad_norm": 1.9143744707107544, - "learning_rate": 2.5406208228130106e-05, - "loss": 0.1053, + "epoch": 1.5312639239566315, + "grad_norm": 1.0573625564575195, + "learning_rate": 8.46873607604337e-06, + "loss": 0.0758, "step": 10310 }, { - "epoch": 0.7663745729986633, - "grad_norm": 2.236309766769409, - "learning_rate": 2.540175256200802e-05, - "loss": 0.0781, + "epoch": 1.5327491459973266, + "grad_norm": 0.6973865628242493, + "learning_rate": 8.467250854002674e-06, + "loss": 0.0797, "step": 10320 }, { - "epoch": 0.7671171840190109, - "grad_norm": 0.839529275894165, - "learning_rate": 2.5397296895885936e-05, - "loss": 0.0727, + "epoch": 1.5342343680380217, + "grad_norm": 1.8891311883926392, + "learning_rate": 8.465765631961979e-06, + "loss": 0.0709, "step": 10330 }, { - "epoch": 0.7678597950393584, - "grad_norm": 1.2142996788024902, - "learning_rate": 2.539284122976385e-05, - "loss": 0.069, + "epoch": 1.5357195900787168, + "grad_norm": 1.0354690551757812, + "learning_rate": 8.464280409921285e-06, + "loss": 0.0747, "step": 10340 }, { - "epoch": 0.768602406059706, - "grad_norm": 3.3854808807373047, - "learning_rate": 2.5388385563641766e-05, - "loss": 0.086, + "epoch": 1.5372048121194117, + "grad_norm": 0.9112366437911987, + "learning_rate": 8.462795187880589e-06, + "loss": 0.0794, "step": 10350 }, { - "epoch": 0.7693450170800534, - "grad_norm": 1.9810289144515991, - "learning_rate": 2.5383929897519677e-05, - "loss": 0.0621, + "epoch": 1.538690034160107, + "grad_norm": 1.0346076488494873, + "learning_rate": 8.461309965839894e-06, + "loss": 0.074, "step": 10360 }, { - "epoch": 0.770087628100401, - "grad_norm": 1.3424344062805176, - "learning_rate": 2.5379474231397596e-05, - "loss": 0.0884, + "epoch": 1.540175256200802, + "grad_norm": 1.108900547027588, + "learning_rate": 8.459824743799198e-06, + "loss": 0.0797, "step": 10370 }, { - "epoch": 0.7708302391207486, - "grad_norm": 1.7278804779052734, - "learning_rate": 2.537501856527551e-05, - "loss": 0.0618, + "epoch": 1.541660478241497, + "grad_norm": 0.5557714700698853, + "learning_rate": 8.458339521758504e-06, + "loss": 0.0792, "step": 10380 }, { - "epoch": 0.7715728501410961, - "grad_norm": 2.9425151348114014, - "learning_rate": 2.5370562899153422e-05, - "loss": 0.1162, + "epoch": 1.5431457002821922, + "grad_norm": 0.8850210309028625, + "learning_rate": 8.456854299717809e-06, + "loss": 0.061, "step": 10390 }, { - "epoch": 0.7723154611614437, - "grad_norm": 0.7557898759841919, - "learning_rate": 2.536610723303134e-05, - "loss": 0.1009, + "epoch": 1.5446309223228871, + "grad_norm": 1.057712435722351, + "learning_rate": 8.455369077677113e-06, + "loss": 0.0889, "step": 10400 }, { - "epoch": 0.7730580721817911, - "grad_norm": 0.9816102981567383, - "learning_rate": 2.5361651566909256e-05, - "loss": 0.0806, + "epoch": 1.5461161443635825, + "grad_norm": 1.0709397792816162, + "learning_rate": 8.453883855636419e-06, + "loss": 0.0661, "step": 10410 }, { - "epoch": 0.7738006832021387, - "grad_norm": 0.9218798875808716, - "learning_rate": 2.5357195900787167e-05, - "loss": 0.0424, + "epoch": 1.5476013664042774, + "grad_norm": 1.3140729665756226, + "learning_rate": 8.452398633595724e-06, + "loss": 0.0694, "step": 10420 }, { - "epoch": 0.7745432942224862, - "grad_norm": 1.2472357749938965, - "learning_rate": 2.5352740234665082e-05, - "loss": 0.0706, + "epoch": 1.5490865884449725, + "grad_norm": 0.6008923053741455, + "learning_rate": 8.450913411555028e-06, + "loss": 0.0644, "step": 10430 }, { - "epoch": 0.7752859052428338, - "grad_norm": 3.426825523376465, - "learning_rate": 2.5348284568542997e-05, - "loss": 0.0776, + "epoch": 1.5505718104856676, + "grad_norm": 0.8166418075561523, + "learning_rate": 8.449428189514332e-06, + "loss": 0.0616, "step": 10440 }, { - "epoch": 0.7760285162631814, - "grad_norm": 4.194761753082275, - "learning_rate": 2.5343828902420912e-05, - "loss": 0.0697, + "epoch": 1.5520570325263627, + "grad_norm": 1.0653516054153442, + "learning_rate": 8.447942967473639e-06, + "loss": 0.0842, "step": 10450 }, { - "epoch": 0.7767711272835289, - "grad_norm": 0.678124189376831, - "learning_rate": 2.5339373236298827e-05, - "loss": 0.1128, + "epoch": 1.5535422545670579, + "grad_norm": 1.1640278100967407, + "learning_rate": 8.446457745432943e-06, + "loss": 0.0669, "step": 10460 }, { - "epoch": 0.7775137383038764, - "grad_norm": 2.648623466491699, - "learning_rate": 2.5334917570176742e-05, - "loss": 0.0876, + "epoch": 1.5550274766077528, + "grad_norm": 1.5584324598312378, + "learning_rate": 8.444972523392247e-06, + "loss": 0.0671, "step": 10470 }, { - "epoch": 0.7782563493242239, - "grad_norm": 1.699841856956482, - "learning_rate": 2.5330461904054657e-05, - "loss": 0.074, + "epoch": 1.556512698648448, + "grad_norm": 0.6926531195640564, + "learning_rate": 8.443487301351554e-06, + "loss": 0.0703, "step": 10480 }, { - "epoch": 0.7789989603445715, - "grad_norm": 3.2049789428710938, - "learning_rate": 2.5326006237932572e-05, - "loss": 0.0872, + "epoch": 1.557997920689143, + "grad_norm": 1.2360625267028809, + "learning_rate": 8.442002079310858e-06, + "loss": 0.0771, "step": 10490 }, { - "epoch": 0.7797415713649191, - "grad_norm": 3.888385057449341, - "learning_rate": 2.5321550571810487e-05, - "loss": 0.0717, + "epoch": 1.5594831427298381, + "grad_norm": 1.1805577278137207, + "learning_rate": 8.440516857270162e-06, + "loss": 0.0665, "step": 10500 }, { - "epoch": 0.7804841823852666, - "grad_norm": 2.0463638305664062, - "learning_rate": 2.5317094905688402e-05, - "loss": 0.1034, + "epoch": 1.5609683647705332, + "grad_norm": 0.6443304419517517, + "learning_rate": 8.439031635229467e-06, + "loss": 0.0645, "step": 10510 }, { - "epoch": 0.7812267934056142, - "grad_norm": 0.6997508406639099, - "learning_rate": 2.5312639239566314e-05, - "loss": 0.0717, + "epoch": 1.5624535868112281, + "grad_norm": 0.92369145154953, + "learning_rate": 8.437546413188771e-06, + "loss": 0.0547, "step": 10520 }, { - "epoch": 0.7819694044259616, - "grad_norm": 1.8925009965896606, - "learning_rate": 2.530818357344423e-05, - "loss": 0.0693, + "epoch": 1.5639388088519235, + "grad_norm": 0.5048189759254456, + "learning_rate": 8.436061191148077e-06, + "loss": 0.0737, "step": 10530 }, { - "epoch": 0.7827120154463092, - "grad_norm": 1.6464449167251587, - "learning_rate": 2.5303727907322147e-05, - "loss": 0.0822, + "epoch": 1.5654240308926184, + "grad_norm": 0.6624378561973572, + "learning_rate": 8.434575969107382e-06, + "loss": 0.0757, "step": 10540 }, { - "epoch": 0.7834546264666568, - "grad_norm": 0.8865845203399658, - "learning_rate": 2.529927224120006e-05, - "loss": 0.0847, + "epoch": 1.5669092529333135, + "grad_norm": 0.5888051986694336, + "learning_rate": 8.433090747066686e-06, + "loss": 0.0664, "step": 10550 }, { - "epoch": 0.7841972374870043, - "grad_norm": 2.912022113800049, - "learning_rate": 2.5294816575077974e-05, - "loss": 0.0784, + "epoch": 1.5683944749740086, + "grad_norm": 1.3048902750015259, + "learning_rate": 8.431605525025992e-06, + "loss": 0.071, "step": 10560 }, { - "epoch": 0.7849398485073519, - "grad_norm": 2.305199146270752, - "learning_rate": 2.5290360908955892e-05, - "loss": 0.0715, + "epoch": 1.5698796970147035, + "grad_norm": 0.6339612007141113, + "learning_rate": 8.430120302985297e-06, + "loss": 0.0737, "step": 10570 }, { - "epoch": 0.7856824595276994, - "grad_norm": 3.301766872406006, - "learning_rate": 2.5285905242833804e-05, - "loss": 0.0818, + "epoch": 1.5713649190553989, + "grad_norm": 0.9797409176826477, + "learning_rate": 8.428635080944601e-06, + "loss": 0.0743, "step": 10580 }, { - "epoch": 0.7864250705480469, - "grad_norm": 0.7540196180343628, - "learning_rate": 2.528144957671172e-05, - "loss": 0.0804, + "epoch": 1.5728501410960938, + "grad_norm": 0.8972368240356445, + "learning_rate": 8.427149858903907e-06, + "loss": 0.0625, "step": 10590 }, { - "epoch": 0.7871676815683945, - "grad_norm": 4.044961452484131, - "learning_rate": 2.5276993910589634e-05, - "loss": 0.0827, + "epoch": 1.574335363136789, + "grad_norm": 0.6709739565849304, + "learning_rate": 8.425664636863212e-06, + "loss": 0.0602, "step": 10600 }, { - "epoch": 0.787910292588742, - "grad_norm": 1.4841824769973755, - "learning_rate": 2.527253824446755e-05, - "loss": 0.1031, + "epoch": 1.575820585177484, + "grad_norm": 0.9978169202804565, + "learning_rate": 8.424179414822516e-06, + "loss": 0.0593, "step": 10610 }, { - "epoch": 0.7886529036090896, - "grad_norm": 1.3933384418487549, - "learning_rate": 2.5268082578345464e-05, - "loss": 0.057, + "epoch": 1.5773058072181791, + "grad_norm": 1.1661869287490845, + "learning_rate": 8.422694192781822e-06, + "loss": 0.0692, "step": 10620 }, { - "epoch": 0.7893955146294371, - "grad_norm": 2.6198787689208984, - "learning_rate": 2.5263626912223375e-05, - "loss": 0.0856, + "epoch": 1.5787910292588743, + "grad_norm": 0.9158114194869995, + "learning_rate": 8.421208970741127e-06, + "loss": 0.0609, "step": 10630 }, { - "epoch": 0.7901381256497847, - "grad_norm": 1.5979726314544678, - "learning_rate": 2.5259171246101294e-05, - "loss": 0.0774, + "epoch": 1.5802762512995692, + "grad_norm": 0.8171471953392029, + "learning_rate": 8.419723748700431e-06, + "loss": 0.0645, "step": 10640 }, { - "epoch": 0.7908807366701321, - "grad_norm": 1.575772762298584, - "learning_rate": 2.525471557997921e-05, - "loss": 0.0775, + "epoch": 1.5817614733402645, + "grad_norm": 0.5352485179901123, + "learning_rate": 8.418238526659737e-06, + "loss": 0.0776, "step": 10650 }, { - "epoch": 0.7916233476904797, - "grad_norm": 2.2343573570251465, - "learning_rate": 2.525025991385712e-05, - "loss": 0.109, + "epoch": 1.5832466953809594, + "grad_norm": 0.9403135776519775, + "learning_rate": 8.41675330461904e-06, + "loss": 0.0865, "step": 10660 }, { - "epoch": 0.7923659587108273, - "grad_norm": 0.6971462368965149, - "learning_rate": 2.524580424773504e-05, - "loss": 0.072, + "epoch": 1.5847319174216545, + "grad_norm": 0.7482515573501587, + "learning_rate": 8.415268082578346e-06, + "loss": 0.056, "step": 10670 }, { - "epoch": 0.7931085697311748, - "grad_norm": 1.1593713760375977, - "learning_rate": 2.5241348581612954e-05, - "loss": 0.1163, + "epoch": 1.5862171394623497, + "grad_norm": 0.3600064814090729, + "learning_rate": 8.41378286053765e-06, + "loss": 0.0766, "step": 10680 }, { - "epoch": 0.7938511807515224, - "grad_norm": 2.554516077041626, - "learning_rate": 2.5236892915490865e-05, - "loss": 0.096, + "epoch": 1.5877023615030446, + "grad_norm": 1.0959473848342896, + "learning_rate": 8.412297638496955e-06, + "loss": 0.0699, "step": 10690 }, { - "epoch": 0.7945937917718698, - "grad_norm": 1.123022198677063, - "learning_rate": 2.523243724936878e-05, - "loss": 0.0701, + "epoch": 1.58918758354374, + "grad_norm": 0.479476660490036, + "learning_rate": 8.410812416456261e-06, + "loss": 0.0676, "step": 10700 }, { - "epoch": 0.7953364027922174, - "grad_norm": 2.6108126640319824, - "learning_rate": 2.52279815832467e-05, - "loss": 0.0747, + "epoch": 1.5906728055844348, + "grad_norm": 1.0282557010650635, + "learning_rate": 8.409327194415566e-06, + "loss": 0.0993, "step": 10710 }, { - "epoch": 0.796079013812565, - "grad_norm": 0.9052862524986267, - "learning_rate": 2.522352591712461e-05, - "loss": 0.0864, + "epoch": 1.59215802762513, + "grad_norm": 0.6192666888237, + "learning_rate": 8.40784197237487e-06, + "loss": 0.0599, "step": 10720 }, { - "epoch": 0.7968216248329125, - "grad_norm": 1.4516713619232178, - "learning_rate": 2.5219070251002525e-05, - "loss": 0.0646, + "epoch": 1.593643249665825, + "grad_norm": 0.8534938097000122, + "learning_rate": 8.406356750334176e-06, + "loss": 0.0635, "step": 10730 }, { - "epoch": 0.7975642358532601, - "grad_norm": 4.304675579071045, - "learning_rate": 2.521461458488044e-05, - "loss": 0.0857, + "epoch": 1.5951284717065202, + "grad_norm": 1.2471647262573242, + "learning_rate": 8.40487152829348e-06, + "loss": 0.0793, "step": 10740 }, { - "epoch": 0.7983068468736076, - "grad_norm": 1.783659815788269, - "learning_rate": 2.5210158918758355e-05, - "loss": 0.0866, + "epoch": 1.5966136937472153, + "grad_norm": 0.9096415638923645, + "learning_rate": 8.403386306252785e-06, + "loss": 0.0877, "step": 10750 }, { - "epoch": 0.7990494578939551, - "grad_norm": 1.544155240058899, - "learning_rate": 2.520570325263627e-05, - "loss": 0.0547, + "epoch": 1.5980989157879102, + "grad_norm": 0.5800266861915588, + "learning_rate": 8.401901084212091e-06, + "loss": 0.0669, "step": 10760 }, { - "epoch": 0.7997920689143027, - "grad_norm": 3.2248337268829346, - "learning_rate": 2.5201247586514182e-05, - "loss": 0.0715, + "epoch": 1.5995841378286055, + "grad_norm": 1.3576295375823975, + "learning_rate": 8.400415862171396e-06, + "loss": 0.0812, "step": 10770 }, { - "epoch": 0.8005346799346502, - "grad_norm": 1.370150089263916, - "learning_rate": 2.51967919203921e-05, - "loss": 0.0913, + "epoch": 1.6010693598693004, + "grad_norm": 1.076351523399353, + "learning_rate": 8.3989306401307e-06, + "loss": 0.0692, "step": 10780 }, { - "epoch": 0.8012772909549978, - "grad_norm": 1.1197993755340576, - "learning_rate": 2.5192336254270015e-05, - "loss": 0.0771, + "epoch": 1.6025545819099956, + "grad_norm": 1.3364313840866089, + "learning_rate": 8.397445418090006e-06, + "loss": 0.0735, "step": 10790 }, { - "epoch": 0.8020199019753453, - "grad_norm": 1.1327694654464722, - "learning_rate": 2.5187880588147927e-05, - "loss": 0.0875, + "epoch": 1.6040398039506907, + "grad_norm": 0.7803967595100403, + "learning_rate": 8.395960196049309e-06, + "loss": 0.0639, "step": 10800 }, { - "epoch": 0.8027625129956929, - "grad_norm": 1.8613241910934448, - "learning_rate": 2.5183424922025845e-05, - "loss": 0.0681, + "epoch": 1.6055250259913856, + "grad_norm": 0.8426043391227722, + "learning_rate": 8.394474974008615e-06, + "loss": 0.0795, "step": 10810 }, { - "epoch": 0.8035051240160404, - "grad_norm": 2.5763204097747803, - "learning_rate": 2.517896925590376e-05, - "loss": 0.0884, + "epoch": 1.607010248032081, + "grad_norm": 0.7319867014884949, + "learning_rate": 8.392989751967921e-06, + "loss": 0.0739, "step": 10820 }, { - "epoch": 0.8042477350363879, - "grad_norm": 2.63012433052063, - "learning_rate": 2.5174513589781672e-05, - "loss": 0.0685, + "epoch": 1.6084954700727758, + "grad_norm": 0.9259944558143616, + "learning_rate": 8.391504529927224e-06, + "loss": 0.0585, "step": 10830 }, { - "epoch": 0.8049903460567355, - "grad_norm": 0.752113401889801, - "learning_rate": 2.5170057923659587e-05, - "loss": 0.1005, + "epoch": 1.609980692113471, + "grad_norm": 0.9031527042388916, + "learning_rate": 8.39001930788653e-06, + "loss": 0.0805, "step": 10840 }, { - "epoch": 0.805732957077083, - "grad_norm": 1.2157506942749023, - "learning_rate": 2.5165602257537502e-05, - "loss": 0.0859, + "epoch": 1.611465914154166, + "grad_norm": 0.9274294972419739, + "learning_rate": 8.388534085845835e-06, + "loss": 0.0867, "step": 10850 }, { - "epoch": 0.8064755680974306, - "grad_norm": 2.4420969486236572, - "learning_rate": 2.5161146591415417e-05, - "loss": 0.0972, + "epoch": 1.612951136194861, + "grad_norm": 1.1121625900268555, + "learning_rate": 8.387048863805139e-06, + "loss": 0.0728, "step": 10860 }, { - "epoch": 0.8072181791177782, - "grad_norm": 2.374080181121826, - "learning_rate": 2.5156690925293332e-05, - "loss": 0.0523, + "epoch": 1.6144363582355563, + "grad_norm": 1.631706714630127, + "learning_rate": 8.385563641764445e-06, + "loss": 0.0841, "step": 10870 }, { - "epoch": 0.8079607901381256, - "grad_norm": 0.5466364622116089, - "learning_rate": 2.5152235259171247e-05, - "loss": 0.0482, + "epoch": 1.6159215802762512, + "grad_norm": 0.4203210175037384, + "learning_rate": 8.38407841972375e-06, + "loss": 0.0599, "step": 10880 }, { - "epoch": 0.8087034011584732, - "grad_norm": 0.722277045249939, - "learning_rate": 2.5147779593049162e-05, - "loss": 0.0616, + "epoch": 1.6174068023169463, + "grad_norm": 0.3640812039375305, + "learning_rate": 8.382593197683054e-06, + "loss": 0.0574, "step": 10890 }, { - "epoch": 0.8094460121788207, - "grad_norm": 3.784972906112671, - "learning_rate": 2.5143323926927077e-05, - "loss": 0.0835, + "epoch": 1.6188920243576415, + "grad_norm": 1.4708698987960815, + "learning_rate": 8.38110797564236e-06, + "loss": 0.0679, "step": 10900 }, { - "epoch": 0.8101886231991683, - "grad_norm": 1.7221379280090332, - "learning_rate": 2.5138868260804992e-05, - "loss": 0.0891, + "epoch": 1.6203772463983366, + "grad_norm": 0.6763873100280762, + "learning_rate": 8.379622753601664e-06, + "loss": 0.0818, "step": 10910 }, { - "epoch": 0.8109312342195159, - "grad_norm": 1.2730120420455933, - "learning_rate": 2.5134412594682907e-05, - "loss": 0.078, + "epoch": 1.6218624684390317, + "grad_norm": 0.653501033782959, + "learning_rate": 8.378137531560969e-06, + "loss": 0.0709, "step": 10920 }, { - "epoch": 0.8116738452398634, - "grad_norm": 1.8988823890686035, - "learning_rate": 2.512995692856082e-05, - "loss": 0.0918, + "epoch": 1.6233476904797266, + "grad_norm": 1.1871000528335571, + "learning_rate": 8.376652309520273e-06, + "loss": 0.0776, "step": 10930 }, { - "epoch": 0.8124164562602109, - "grad_norm": 0.7136462926864624, - "learning_rate": 2.5125501262438733e-05, - "loss": 0.047, + "epoch": 1.624832912520422, + "grad_norm": 1.2294886112213135, + "learning_rate": 8.37516708747958e-06, + "loss": 0.0795, "step": 10940 }, { - "epoch": 0.8131590672805584, - "grad_norm": 1.3096719980239868, - "learning_rate": 2.5121045596316652e-05, - "loss": 0.09, + "epoch": 1.6263181345611168, + "grad_norm": 1.0966838598251343, + "learning_rate": 8.373681865438884e-06, + "loss": 0.0715, "step": 10950 }, { - "epoch": 0.813901678300906, - "grad_norm": 1.3436990976333618, - "learning_rate": 2.5116589930194563e-05, - "loss": 0.0795, + "epoch": 1.627803356601812, + "grad_norm": 0.4268195331096649, + "learning_rate": 8.372196643398188e-06, + "loss": 0.0572, "step": 10960 }, { - "epoch": 0.8146442893212535, - "grad_norm": 1.0467826128005981, - "learning_rate": 2.511213426407248e-05, - "loss": 0.0906, + "epoch": 1.629288578642507, + "grad_norm": 0.8819934129714966, + "learning_rate": 8.370711421357493e-06, + "loss": 0.0737, "step": 10970 }, { - "epoch": 0.8153869003416011, - "grad_norm": 0.924268364906311, - "learning_rate": 2.5107678597950397e-05, - "loss": 0.094, + "epoch": 1.630773800683202, + "grad_norm": 0.6157822012901306, + "learning_rate": 8.369226199316799e-06, + "loss": 0.0777, "step": 10980 }, { - "epoch": 0.8161295113619486, - "grad_norm": 1.8887720108032227, - "learning_rate": 2.510322293182831e-05, - "loss": 0.0878, + "epoch": 1.6322590227238973, + "grad_norm": 1.0847469568252563, + "learning_rate": 8.367740977276103e-06, + "loss": 0.0871, "step": 10990 }, { - "epoch": 0.8168721223822961, - "grad_norm": 3.255546808242798, - "learning_rate": 2.5098767265706223e-05, - "loss": 0.0729, + "epoch": 1.6337442447645922, + "grad_norm": 1.0257930755615234, + "learning_rate": 8.366255755235408e-06, + "loss": 0.0679, "step": 11000 }, { - "epoch": 0.8176147334026437, - "grad_norm": 1.3691035509109497, - "learning_rate": 2.509431159958414e-05, - "loss": 0.0854, + "epoch": 1.6352294668052874, + "grad_norm": 0.9728336334228516, + "learning_rate": 8.364770533194714e-06, + "loss": 0.0565, "step": 11010 }, { - "epoch": 0.8183573444229912, - "grad_norm": 0.6990775465965271, - "learning_rate": 2.5089855933462053e-05, - "loss": 0.046, + "epoch": 1.6367146888459825, + "grad_norm": 1.1196662187576294, + "learning_rate": 8.363285311154018e-06, + "loss": 0.0632, "step": 11020 }, { - "epoch": 0.8190999554433388, - "grad_norm": 2.0553324222564697, - "learning_rate": 2.508540026733997e-05, - "loss": 0.0902, + "epoch": 1.6381999108866776, + "grad_norm": 0.7816924452781677, + "learning_rate": 8.361800089113323e-06, + "loss": 0.0894, "step": 11030 }, { - "epoch": 0.8198425664636864, - "grad_norm": 2.257805109024048, - "learning_rate": 2.508094460121788e-05, - "loss": 0.0885, + "epoch": 1.6396851329273727, + "grad_norm": 1.277018427848816, + "learning_rate": 8.360314867072627e-06, + "loss": 0.0642, "step": 11040 }, { - "epoch": 0.8205851774840338, - "grad_norm": 1.704160451889038, - "learning_rate": 2.50764889350958e-05, - "loss": 0.0632, + "epoch": 1.6411703549680676, + "grad_norm": 0.7299898862838745, + "learning_rate": 8.358829645031933e-06, + "loss": 0.0779, "step": 11050 }, { - "epoch": 0.8213277885043814, - "grad_norm": 0.8274914622306824, - "learning_rate": 2.5072033268973713e-05, - "loss": 0.0925, + "epoch": 1.642655577008763, + "grad_norm": 1.1374343633651733, + "learning_rate": 8.357344422991238e-06, + "loss": 0.062, "step": 11060 }, { - "epoch": 0.8220703995247289, - "grad_norm": 0.8776381015777588, - "learning_rate": 2.5067577602851625e-05, - "loss": 0.0756, + "epoch": 1.6441407990494579, + "grad_norm": 1.098451018333435, + "learning_rate": 8.355859200950542e-06, + "loss": 0.074, "step": 11070 }, { - "epoch": 0.8228130105450765, - "grad_norm": 1.621468424797058, - "learning_rate": 2.5063121936729543e-05, - "loss": 0.0895, + "epoch": 1.645626021090153, + "grad_norm": 0.7910609245300293, + "learning_rate": 8.354373978909848e-06, + "loss": 0.0675, "step": 11080 }, { - "epoch": 0.8235556215654241, - "grad_norm": 0.5569895505905151, - "learning_rate": 2.5058666270607458e-05, - "loss": 0.0859, + "epoch": 1.6471112431308481, + "grad_norm": 0.676892876625061, + "learning_rate": 8.352888756869153e-06, + "loss": 0.0681, "step": 11090 }, { - "epoch": 0.8242982325857716, - "grad_norm": 1.4502453804016113, - "learning_rate": 2.505421060448537e-05, - "loss": 0.113, + "epoch": 1.648596465171543, + "grad_norm": 1.1646522283554077, + "learning_rate": 8.351403534828457e-06, + "loss": 0.0691, "step": 11100 }, { - "epoch": 0.8250408436061191, - "grad_norm": 2.805652141571045, - "learning_rate": 2.5049754938363285e-05, - "loss": 0.0803, + "epoch": 1.6500816872122384, + "grad_norm": 1.6074599027633667, + "learning_rate": 8.349918312787763e-06, + "loss": 0.0718, "step": 11110 }, { - "epoch": 0.8257834546264666, - "grad_norm": 1.2207911014556885, - "learning_rate": 2.5045299272241203e-05, - "loss": 0.0753, + "epoch": 1.6515669092529333, + "grad_norm": 0.8327904939651489, + "learning_rate": 8.348433090747068e-06, + "loss": 0.0817, "step": 11120 }, { - "epoch": 0.8265260656468142, - "grad_norm": 1.3321232795715332, - "learning_rate": 2.5040843606119115e-05, - "loss": 0.0789, + "epoch": 1.6530521312936284, + "grad_norm": 0.8040626645088196, + "learning_rate": 8.346947868706372e-06, + "loss": 0.0774, "step": 11130 }, { - "epoch": 0.8272686766671618, - "grad_norm": 0.8445536494255066, - "learning_rate": 2.503638793999703e-05, - "loss": 0.1171, + "epoch": 1.6545373533343235, + "grad_norm": 0.6327669620513916, + "learning_rate": 8.345462646665677e-06, + "loss": 0.0855, "step": 11140 }, { - "epoch": 0.8280112876875093, - "grad_norm": 1.156607985496521, - "learning_rate": 2.5031932273874945e-05, - "loss": 0.0651, + "epoch": 1.6560225753750184, + "grad_norm": 1.1430256366729736, + "learning_rate": 8.343977424624981e-06, + "loss": 0.0673, "step": 11150 }, { - "epoch": 0.8287538987078569, - "grad_norm": 2.5844602584838867, - "learning_rate": 2.502747660775286e-05, - "loss": 0.0992, + "epoch": 1.6575077974157137, + "grad_norm": 0.7978767156600952, + "learning_rate": 8.342492202584287e-06, + "loss": 0.0803, "step": 11160 }, { - "epoch": 0.8294965097282043, - "grad_norm": 2.682854413986206, - "learning_rate": 2.5023020941630775e-05, - "loss": 0.0615, + "epoch": 1.6589930194564086, + "grad_norm": 0.7723683714866638, + "learning_rate": 8.341006980543592e-06, + "loss": 0.0841, "step": 11170 }, { - "epoch": 0.8302391207485519, - "grad_norm": 1.1782902479171753, - "learning_rate": 2.5018565275508686e-05, - "loss": 0.075, + "epoch": 1.6604782414971038, + "grad_norm": 0.4924938380718231, + "learning_rate": 8.339521758502896e-06, + "loss": 0.0562, "step": 11180 }, { - "epoch": 0.8309817317688994, - "grad_norm": 3.394202709197998, - "learning_rate": 2.5014109609386605e-05, - "loss": 0.1351, + "epoch": 1.661963463537799, + "grad_norm": 0.5342621207237244, + "learning_rate": 8.338036536462202e-06, + "loss": 0.0738, "step": 11190 }, { - "epoch": 0.831724342789247, - "grad_norm": 2.0263335704803467, - "learning_rate": 2.500965394326452e-05, - "loss": 0.086, + "epoch": 1.663448685578494, + "grad_norm": 0.4038882851600647, + "learning_rate": 8.336551314421507e-06, + "loss": 0.0536, "step": 11200 }, { - "epoch": 0.8324669538095946, - "grad_norm": 2.133747100830078, - "learning_rate": 2.500519827714243e-05, - "loss": 0.0788, + "epoch": 1.6649339076191891, + "grad_norm": 1.0539206266403198, + "learning_rate": 8.335066092380811e-06, + "loss": 0.0806, "step": 11210 }, { - "epoch": 0.8332095648299421, - "grad_norm": 3.7382562160491943, - "learning_rate": 2.500074261102035e-05, - "loss": 0.067, + "epoch": 1.666419129659884, + "grad_norm": 0.8110040426254272, + "learning_rate": 8.333580870340117e-06, + "loss": 0.0718, "step": 11220 }, { - "epoch": 0.8339521758502896, - "grad_norm": 1.4864078760147095, - "learning_rate": 2.4996286944898265e-05, - "loss": 0.0822, + "epoch": 1.6679043517005794, + "grad_norm": 1.0522854328155518, + "learning_rate": 8.332095648299422e-06, + "loss": 0.0784, "step": 11230 }, { - "epoch": 0.8346947868706371, - "grad_norm": 2.0430474281311035, - "learning_rate": 2.4991831278776176e-05, - "loss": 0.0774, + "epoch": 1.6693895737412743, + "grad_norm": 1.0400513410568237, + "learning_rate": 8.330610426258726e-06, + "loss": 0.058, "step": 11240 }, { - "epoch": 0.8354373978909847, - "grad_norm": 3.536273956298828, - "learning_rate": 2.498737561265409e-05, - "loss": 0.1103, + "epoch": 1.6708747957819694, + "grad_norm": 0.8211175799369812, + "learning_rate": 8.329125204218032e-06, + "loss": 0.0707, "step": 11250 }, { - "epoch": 0.8361800089113323, - "grad_norm": 0.7639611959457397, - "learning_rate": 2.4982919946532006e-05, - "loss": 0.0826, + "epoch": 1.6723600178226645, + "grad_norm": 1.2590583562850952, + "learning_rate": 8.327639982177335e-06, + "loss": 0.0787, "step": 11260 }, { - "epoch": 0.8369226199316798, - "grad_norm": 1.1882314682006836, - "learning_rate": 2.497846428040992e-05, - "loss": 0.0952, + "epoch": 1.6738452398633594, + "grad_norm": 1.1944266557693481, + "learning_rate": 8.326154760136641e-06, + "loss": 0.0774, "step": 11270 }, { - "epoch": 0.8376652309520273, - "grad_norm": 1.9526349306106567, - "learning_rate": 2.4974008614287836e-05, - "loss": 0.0446, + "epoch": 1.6753304619040548, + "grad_norm": 0.900720477104187, + "learning_rate": 8.324669538095947e-06, + "loss": 0.0661, "step": 11280 }, { - "epoch": 0.8384078419723748, - "grad_norm": 2.2650139331817627, - "learning_rate": 2.496955294816575e-05, - "loss": 0.0896, + "epoch": 1.6768156839447497, + "grad_norm": 0.4314388036727905, + "learning_rate": 8.32318431605525e-06, + "loss": 0.0628, "step": 11290 }, { - "epoch": 0.8391504529927224, - "grad_norm": 1.7543269395828247, - "learning_rate": 2.4965097282043666e-05, - "loss": 0.0582, + "epoch": 1.6783009059854448, + "grad_norm": 0.7443414330482483, + "learning_rate": 8.321699094014556e-06, + "loss": 0.0806, "step": 11300 }, { - "epoch": 0.83989306401307, - "grad_norm": 1.1936362981796265, - "learning_rate": 2.496064161592158e-05, - "loss": 0.0939, + "epoch": 1.67978612802614, + "grad_norm": 0.8457760214805603, + "learning_rate": 8.32021387197386e-06, + "loss": 0.0639, "step": 11310 }, { - "epoch": 0.8406356750334175, - "grad_norm": 2.0941545963287354, - "learning_rate": 2.4956185949799496e-05, - "loss": 0.0691, + "epoch": 1.681271350066835, + "grad_norm": 1.0876080989837646, + "learning_rate": 8.318728649933165e-06, + "loss": 0.0857, "step": 11320 }, { - "epoch": 0.8413782860537651, - "grad_norm": 3.267097234725952, - "learning_rate": 2.495173028367741e-05, - "loss": 0.0826, + "epoch": 1.6827565721075302, + "grad_norm": 0.6526848077774048, + "learning_rate": 8.317243427892471e-06, + "loss": 0.0603, "step": 11330 }, { - "epoch": 0.8421208970741125, - "grad_norm": 2.769155263900757, - "learning_rate": 2.4947274617555326e-05, - "loss": 0.0572, + "epoch": 1.684241794148225, + "grad_norm": 1.3263145685195923, + "learning_rate": 8.315758205851775e-06, + "loss": 0.0819, "step": 11340 }, { - "epoch": 0.8428635080944601, - "grad_norm": 0.9428232312202454, - "learning_rate": 2.4942818951433238e-05, - "loss": 0.081, + "epoch": 1.6857270161889204, + "grad_norm": 0.781700611114502, + "learning_rate": 8.31427298381108e-06, + "loss": 0.0758, "step": 11350 }, { - "epoch": 0.8436061191148077, - "grad_norm": 1.1093528270721436, - "learning_rate": 2.4938363285311156e-05, - "loss": 0.0688, + "epoch": 1.6872122382296153, + "grad_norm": 0.9908173680305481, + "learning_rate": 8.312787761770386e-06, + "loss": 0.0681, "step": 11360 }, { - "epoch": 0.8443487301351552, - "grad_norm": 1.8220789432525635, - "learning_rate": 2.4933907619189068e-05, - "loss": 0.0744, + "epoch": 1.6886974602703104, + "grad_norm": 1.1831738948822021, + "learning_rate": 8.31130253972969e-06, + "loss": 0.0985, "step": 11370 }, { - "epoch": 0.8450913411555028, - "grad_norm": 3.5718438625335693, - "learning_rate": 2.4929451953066983e-05, - "loss": 0.0902, + "epoch": 1.6901826823110055, + "grad_norm": 0.9816341400146484, + "learning_rate": 8.309817317688995e-06, + "loss": 0.0777, "step": 11380 }, { - "epoch": 0.8458339521758503, - "grad_norm": 1.5545248985290527, - "learning_rate": 2.49249962869449e-05, - "loss": 0.1087, + "epoch": 1.6916679043517004, + "grad_norm": 0.4548829197883606, + "learning_rate": 8.308332095648301e-06, + "loss": 0.0643, "step": 11390 }, { - "epoch": 0.8465765631961978, - "grad_norm": 1.3270010948181152, - "learning_rate": 2.4920540620822813e-05, - "loss": 0.1128, + "epoch": 1.6931531263923958, + "grad_norm": 0.7465964555740356, + "learning_rate": 8.306846873607605e-06, + "loss": 0.0554, "step": 11400 }, { - "epoch": 0.8473191742165453, - "grad_norm": 1.9811359643936157, - "learning_rate": 2.4916084954700728e-05, - "loss": 0.0802, + "epoch": 1.6946383484330907, + "grad_norm": 0.7259760499000549, + "learning_rate": 8.30536165156691e-06, + "loss": 0.0694, "step": 11410 }, { - "epoch": 0.8480617852368929, - "grad_norm": 2.4535109996795654, - "learning_rate": 2.4911629288578643e-05, - "loss": 0.076, + "epoch": 1.6961235704737858, + "grad_norm": 1.0586191415786743, + "learning_rate": 8.303876429526216e-06, + "loss": 0.0796, "step": 11420 }, { - "epoch": 0.8488043962572405, - "grad_norm": 2.042264223098755, - "learning_rate": 2.4907173622456558e-05, - "loss": 0.0889, + "epoch": 1.697608792514481, + "grad_norm": 1.0120733976364136, + "learning_rate": 8.302391207485519e-06, + "loss": 0.0755, "step": 11430 }, { - "epoch": 0.849547007277588, - "grad_norm": 1.1611895561218262, - "learning_rate": 2.4902717956334473e-05, - "loss": 0.0782, + "epoch": 1.6990940145551758, + "grad_norm": 0.8327755928039551, + "learning_rate": 8.300905985444825e-06, + "loss": 0.0774, "step": 11440 }, { - "epoch": 0.8502896182979356, - "grad_norm": 1.567514181137085, - "learning_rate": 2.4898262290212385e-05, - "loss": 0.1065, + "epoch": 1.7005792365958712, + "grad_norm": 0.8092065453529358, + "learning_rate": 8.29942076340413e-06, + "loss": 0.0659, "step": 11450 }, { - "epoch": 0.851032229318283, - "grad_norm": 1.7414668798446655, - "learning_rate": 2.4893806624090303e-05, - "loss": 0.0752, + "epoch": 1.702064458636566, + "grad_norm": 0.67973792552948, + "learning_rate": 8.297935541363434e-06, + "loss": 0.081, "step": 11460 }, { - "epoch": 0.8517748403386306, - "grad_norm": 1.3344578742980957, - "learning_rate": 2.4889350957968218e-05, - "loss": 0.0794, + "epoch": 1.7035496806772612, + "grad_norm": 0.8815967440605164, + "learning_rate": 8.29645031932274e-06, + "loss": 0.0709, "step": 11470 }, { - "epoch": 0.8525174513589782, - "grad_norm": 1.340126395225525, - "learning_rate": 2.488489529184613e-05, - "loss": 0.0936, + "epoch": 1.7050349027179563, + "grad_norm": 0.5321938395500183, + "learning_rate": 8.294965097282044e-06, + "loss": 0.0701, "step": 11480 }, { - "epoch": 0.8532600623793257, - "grad_norm": 2.9865872859954834, - "learning_rate": 2.4880439625724048e-05, - "loss": 0.0619, + "epoch": 1.7065201247586514, + "grad_norm": 0.9086109399795532, + "learning_rate": 8.293479875241349e-06, + "loss": 0.0784, "step": 11490 }, { - "epoch": 0.8540026733996733, - "grad_norm": 2.3079800605773926, - "learning_rate": 2.4875983959601963e-05, - "loss": 0.0819, + "epoch": 1.7080053467993466, + "grad_norm": 0.6548341512680054, + "learning_rate": 8.291994653200655e-06, + "loss": 0.0796, "step": 11500 }, { - "epoch": 0.8547452844200208, - "grad_norm": 2.029001474380493, - "learning_rate": 2.4871528293479874e-05, - "loss": 0.0964, + "epoch": 1.7094905688400415, + "grad_norm": 0.8495252728462219, + "learning_rate": 8.29050943115996e-06, + "loss": 0.0722, "step": 11510 }, { - "epoch": 0.8554878954403683, - "grad_norm": 0.8514242768287659, - "learning_rate": 2.486707262735779e-05, - "loss": 0.1067, + "epoch": 1.7109757908807368, + "grad_norm": 0.9425396919250488, + "learning_rate": 8.289024209119264e-06, + "loss": 0.0788, "step": 11520 }, { - "epoch": 0.8562305064607159, - "grad_norm": 3.7588460445404053, - "learning_rate": 2.4862616961235708e-05, - "loss": 0.0691, + "epoch": 1.7124610129214317, + "grad_norm": 0.9021424055099487, + "learning_rate": 8.28753898707857e-06, + "loss": 0.0528, "step": 11530 }, { - "epoch": 0.8569731174810634, - "grad_norm": 1.4834811687469482, - "learning_rate": 2.485816129511362e-05, - "loss": 0.0713, + "epoch": 1.7139462349621268, + "grad_norm": 0.8903309106826782, + "learning_rate": 8.286053765037874e-06, + "loss": 0.089, "step": 11540 }, { - "epoch": 0.857715728501411, - "grad_norm": 1.4009684324264526, - "learning_rate": 2.4853705628991534e-05, - "loss": 0.1032, + "epoch": 1.715431457002822, + "grad_norm": 0.8740152716636658, + "learning_rate": 8.284568542997179e-06, + "loss": 0.0725, "step": 11550 }, { - "epoch": 0.8584583395217585, - "grad_norm": 4.322129249572754, - "learning_rate": 2.484924996286945e-05, - "loss": 0.1125, + "epoch": 1.7169166790435169, + "grad_norm": 0.9269596934318542, + "learning_rate": 8.283083320956483e-06, + "loss": 0.0617, "step": 11560 }, { - "epoch": 0.859200950542106, - "grad_norm": 2.336434841156006, - "learning_rate": 2.4844794296747364e-05, - "loss": 0.1049, + "epoch": 1.7184019010842122, + "grad_norm": 0.9568021893501282, + "learning_rate": 8.28159809891579e-06, + "loss": 0.079, "step": 11570 }, { - "epoch": 0.8599435615624536, - "grad_norm": 1.3329766988754272, - "learning_rate": 2.484033863062528e-05, - "loss": 0.0499, + "epoch": 1.719887123124907, + "grad_norm": 0.48596277832984924, + "learning_rate": 8.280112876875094e-06, + "loss": 0.0717, "step": 11580 }, { - "epoch": 0.8606861725828011, - "grad_norm": 2.4188973903656006, - "learning_rate": 2.483588296450319e-05, - "loss": 0.0634, + "epoch": 1.7213723451656022, + "grad_norm": 0.8595500588417053, + "learning_rate": 8.278627654834398e-06, + "loss": 0.0821, "step": 11590 }, { - "epoch": 0.8614287836031487, - "grad_norm": 0.7930353283882141, - "learning_rate": 2.483142729838111e-05, - "loss": 0.0704, + "epoch": 1.7228575672062973, + "grad_norm": 1.2430893182754517, + "learning_rate": 8.277142432793703e-06, + "loss": 0.0834, "step": 11600 }, { - "epoch": 0.8621713946234962, - "grad_norm": 1.0637152194976807, - "learning_rate": 2.4826971632259024e-05, - "loss": 0.0958, + "epoch": 1.7243427892469925, + "grad_norm": 1.033395528793335, + "learning_rate": 8.275657210753009e-06, + "loss": 0.0747, "step": 11610 }, { - "epoch": 0.8629140056438438, - "grad_norm": 1.6092619895935059, - "learning_rate": 2.4822515966136936e-05, - "loss": 0.0967, + "epoch": 1.7258280112876876, + "grad_norm": 1.0437674522399902, + "learning_rate": 8.274171988712313e-06, + "loss": 0.0724, "step": 11620 }, { - "epoch": 0.8636566166641912, - "grad_norm": 1.6927438974380493, - "learning_rate": 2.4818060300014854e-05, - "loss": 0.0814, + "epoch": 1.7273132333283825, + "grad_norm": 0.4802999496459961, + "learning_rate": 8.272686766671618e-06, + "loss": 0.0665, "step": 11630 }, { - "epoch": 0.8643992276845388, - "grad_norm": 2.1163792610168457, - "learning_rate": 2.481360463389277e-05, - "loss": 0.0845, + "epoch": 1.7287984553690778, + "grad_norm": 0.8245465159416199, + "learning_rate": 8.271201544630924e-06, + "loss": 0.0748, "step": 11640 }, { - "epoch": 0.8651418387048864, - "grad_norm": 3.7081539630889893, - "learning_rate": 2.480914896777068e-05, - "loss": 0.0802, + "epoch": 1.7302836774097727, + "grad_norm": 1.0788676738739014, + "learning_rate": 8.269716322590228e-06, + "loss": 0.0703, "step": 11650 }, { - "epoch": 0.8658844497252339, - "grad_norm": 0.5612799525260925, - "learning_rate": 2.48046933016486e-05, - "loss": 0.0845, + "epoch": 1.7317688994504679, + "grad_norm": 0.7828205227851868, + "learning_rate": 8.268231100549533e-06, + "loss": 0.087, "step": 11660 }, { - "epoch": 0.8666270607455815, - "grad_norm": 3.5866827964782715, - "learning_rate": 2.480023763552651e-05, - "loss": 0.0662, + "epoch": 1.733254121491163, + "grad_norm": 0.7612335085868835, + "learning_rate": 8.266745878508837e-06, + "loss": 0.0559, "step": 11670 }, { - "epoch": 0.867369671765929, - "grad_norm": 2.168499231338501, - "learning_rate": 2.4795781969404426e-05, - "loss": 0.077, + "epoch": 1.7347393435318579, + "grad_norm": 1.6535578966140747, + "learning_rate": 8.265260656468143e-06, + "loss": 0.098, "step": 11680 }, { - "epoch": 0.8681122827862765, - "grad_norm": 1.5439636707305908, - "learning_rate": 2.479132630328234e-05, - "loss": 0.0681, + "epoch": 1.7362245655725532, + "grad_norm": 1.2606977224349976, + "learning_rate": 8.263775434427448e-06, + "loss": 0.0772, "step": 11690 }, { - "epoch": 0.8688548938066241, - "grad_norm": 5.759429931640625, - "learning_rate": 2.4786870637160256e-05, - "loss": 0.0536, + "epoch": 1.7377097876132481, + "grad_norm": 0.5617197155952454, + "learning_rate": 8.262290212386752e-06, + "loss": 0.0788, "step": 11700 }, { - "epoch": 0.8695975048269716, - "grad_norm": 3.6019375324249268, - "learning_rate": 2.478241497103817e-05, - "loss": 0.0706, + "epoch": 1.7391950096539432, + "grad_norm": 0.8515493869781494, + "learning_rate": 8.260804990346058e-06, + "loss": 0.0857, "step": 11710 }, { - "epoch": 0.8703401158473192, - "grad_norm": 2.023331880569458, - "learning_rate": 2.4777959304916086e-05, - "loss": 0.1183, + "epoch": 1.7406802316946384, + "grad_norm": 0.7699450254440308, + "learning_rate": 8.259319768305363e-06, + "loss": 0.0731, "step": 11720 }, { - "epoch": 0.8710827268676667, - "grad_norm": 2.2907047271728516, - "learning_rate": 2.4773503638794e-05, - "loss": 0.1194, + "epoch": 1.7421654537353333, + "grad_norm": 0.876996636390686, + "learning_rate": 8.257834546264667e-06, + "loss": 0.072, "step": 11730 }, { - "epoch": 0.8718253378880143, - "grad_norm": 0.9772320985794067, - "learning_rate": 2.4769047972671916e-05, - "loss": 0.0775, + "epoch": 1.7436506757760286, + "grad_norm": 0.9236237406730652, + "learning_rate": 8.256349324223973e-06, + "loss": 0.0795, "step": 11740 }, { - "epoch": 0.8725679489083618, - "grad_norm": 2.4488956928253174, - "learning_rate": 2.476459230654983e-05, - "loss": 0.0775, + "epoch": 1.7451358978167235, + "grad_norm": 0.9495792388916016, + "learning_rate": 8.254864102183278e-06, + "loss": 0.0971, "step": 11750 }, { - "epoch": 0.8733105599287093, - "grad_norm": 1.9681178331375122, - "learning_rate": 2.4760136640427743e-05, - "loss": 0.0738, + "epoch": 1.7466211198574186, + "grad_norm": 1.4671180248260498, + "learning_rate": 8.253378880142582e-06, + "loss": 0.0752, "step": 11760 }, { - "epoch": 0.8740531709490569, - "grad_norm": 1.475229024887085, - "learning_rate": 2.475568097430566e-05, - "loss": 0.0754, + "epoch": 1.7481063418981138, + "grad_norm": 0.5850898027420044, + "learning_rate": 8.251893658101886e-06, + "loss": 0.0797, "step": 11770 }, { - "epoch": 0.8747957819694044, - "grad_norm": 2.42449951171875, - "learning_rate": 2.4751225308183573e-05, - "loss": 0.0802, + "epoch": 1.7495915639388089, + "grad_norm": 0.6802064180374146, + "learning_rate": 8.250408436061191e-06, + "loss": 0.0686, "step": 11780 }, { - "epoch": 0.875538392989752, - "grad_norm": 1.4891407489776611, - "learning_rate": 2.4746769642061488e-05, - "loss": 0.0627, + "epoch": 1.751076785979504, + "grad_norm": 1.377536654472351, + "learning_rate": 8.248923214020497e-06, + "loss": 0.0803, "step": 11790 }, { - "epoch": 0.8762810040100996, - "grad_norm": 2.7915236949920654, - "learning_rate": 2.4742313975939406e-05, - "loss": 0.0902, + "epoch": 1.752562008020199, + "grad_norm": 0.8861923217773438, + "learning_rate": 8.247437991979801e-06, + "loss": 0.0711, "step": 11800 }, { - "epoch": 0.877023615030447, - "grad_norm": 0.7196487188339233, - "learning_rate": 2.4737858309817318e-05, - "loss": 0.0803, + "epoch": 1.7540472300608942, + "grad_norm": 0.9996696710586548, + "learning_rate": 8.245952769939106e-06, + "loss": 0.0676, "step": 11810 }, { - "epoch": 0.8777662260507946, - "grad_norm": 2.8779609203338623, - "learning_rate": 2.4733402643695233e-05, - "loss": 0.0804, + "epoch": 1.7555324521015891, + "grad_norm": 0.7519062757492065, + "learning_rate": 8.244467547898412e-06, + "loss": 0.0931, "step": 11820 }, { - "epoch": 0.8785088370711421, - "grad_norm": 1.198697566986084, - "learning_rate": 2.4728946977573148e-05, - "loss": 0.0507, + "epoch": 1.7570176741422843, + "grad_norm": 0.7617473006248474, + "learning_rate": 8.242982325857716e-06, + "loss": 0.1047, "step": 11830 }, { - "epoch": 0.8792514480914897, - "grad_norm": 2.312344789505005, - "learning_rate": 2.4724491311451063e-05, - "loss": 0.0869, + "epoch": 1.7585028961829794, + "grad_norm": 0.5361869931221008, + "learning_rate": 8.24149710381702e-06, + "loss": 0.0704, "step": 11840 }, { - "epoch": 0.8799940591118373, - "grad_norm": 1.2055100202560425, - "learning_rate": 2.4720035645328978e-05, - "loss": 0.0791, + "epoch": 1.7599881182236743, + "grad_norm": 1.0671625137329102, + "learning_rate": 8.240011881776327e-06, + "loss": 0.0721, "step": 11850 }, { - "epoch": 0.8807366701321847, - "grad_norm": 1.9583430290222168, - "learning_rate": 2.4715579979206893e-05, - "loss": 0.0704, + "epoch": 1.7614733402643696, + "grad_norm": 1.3257787227630615, + "learning_rate": 8.238526659735631e-06, + "loss": 0.0787, "step": 11860 }, { - "epoch": 0.8814792811525323, - "grad_norm": 3.8078420162200928, - "learning_rate": 2.4711124313084808e-05, - "loss": 0.0853, + "epoch": 1.7629585623050645, + "grad_norm": 0.47311872243881226, + "learning_rate": 8.237041437694936e-06, + "loss": 0.0844, "step": 11870 }, { - "epoch": 0.8822218921728798, - "grad_norm": 1.3811652660369873, - "learning_rate": 2.4706668646962723e-05, - "loss": 0.0972, + "epoch": 1.7644437843457597, + "grad_norm": 0.9832594990730286, + "learning_rate": 8.235556215654242e-06, + "loss": 0.0589, "step": 11880 }, { - "epoch": 0.8829645031932274, - "grad_norm": 0.9326895475387573, - "learning_rate": 2.4702212980840634e-05, - "loss": 0.0777, + "epoch": 1.7659290063864548, + "grad_norm": 1.4535409212112427, + "learning_rate": 8.234070993613545e-06, + "loss": 0.0787, "step": 11890 }, { - "epoch": 0.883707114213575, - "grad_norm": 1.280218243598938, - "learning_rate": 2.4697757314718552e-05, - "loss": 0.0856, + "epoch": 1.76741422842715, + "grad_norm": 0.5096237063407898, + "learning_rate": 8.23258577157285e-06, + "loss": 0.0668, "step": 11900 }, { - "epoch": 0.8844497252339225, - "grad_norm": 3.6391515731811523, - "learning_rate": 2.4693301648596467e-05, - "loss": 0.0919, + "epoch": 1.768899450467845, + "grad_norm": 0.4844669699668884, + "learning_rate": 8.231100549532155e-06, + "loss": 0.064, "step": 11910 }, { - "epoch": 0.88519233625427, - "grad_norm": 1.1083297729492188, - "learning_rate": 2.468884598247438e-05, - "loss": 0.0576, + "epoch": 1.77038467250854, + "grad_norm": 0.989778995513916, + "learning_rate": 8.22961532749146e-06, + "loss": 0.0832, "step": 11920 }, { - "epoch": 0.8859349472746175, - "grad_norm": 1.3229732513427734, - "learning_rate": 2.4684390316352294e-05, - "loss": 0.0891, + "epoch": 1.7718698945492353, + "grad_norm": 0.5861250758171082, + "learning_rate": 8.228130105450766e-06, + "loss": 0.0792, "step": 11930 }, { - "epoch": 0.8866775582949651, - "grad_norm": 1.0628166198730469, - "learning_rate": 2.4679934650230212e-05, - "loss": 0.1007, + "epoch": 1.7733551165899302, + "grad_norm": 0.930970311164856, + "learning_rate": 8.22664488341007e-06, + "loss": 0.0809, "step": 11940 }, { - "epoch": 0.8874201693153126, - "grad_norm": 1.2441374063491821, - "learning_rate": 2.4675478984108124e-05, - "loss": 0.0989, + "epoch": 1.7748403386306253, + "grad_norm": 1.105394959449768, + "learning_rate": 8.225159661369375e-06, + "loss": 0.0817, "step": 11950 }, { - "epoch": 0.8881627803356602, - "grad_norm": 0.5451275110244751, - "learning_rate": 2.467102331798604e-05, - "loss": 0.0576, + "epoch": 1.7763255606713204, + "grad_norm": 0.5445694923400879, + "learning_rate": 8.22367443932868e-06, + "loss": 0.0833, "step": 11960 }, { - "epoch": 0.8889053913560078, - "grad_norm": 0.9148317575454712, - "learning_rate": 2.4666567651863954e-05, - "loss": 0.0591, + "epoch": 1.7778107827120153, + "grad_norm": 0.954166829586029, + "learning_rate": 8.222189217287985e-06, + "loss": 0.0633, "step": 11970 }, { - "epoch": 0.8896480023763552, - "grad_norm": 0.7988538146018982, - "learning_rate": 2.466211198574187e-05, - "loss": 0.0683, + "epoch": 1.7792960047527107, + "grad_norm": 1.1215667724609375, + "learning_rate": 8.22070399524729e-06, + "loss": 0.0576, "step": 11980 }, { - "epoch": 0.8903906133967028, - "grad_norm": 2.098226547241211, - "learning_rate": 2.4657656319619784e-05, - "loss": 0.0964, + "epoch": 1.7807812267934056, + "grad_norm": 1.0657153129577637, + "learning_rate": 8.219218773206596e-06, + "loss": 0.052, "step": 11990 }, { - "epoch": 0.8911332244170503, - "grad_norm": 2.0330681800842285, - "learning_rate": 2.4653200653497696e-05, - "loss": 0.0865, + "epoch": 1.7822664488341007, + "grad_norm": 0.8490333557128906, + "learning_rate": 8.2177335511659e-06, + "loss": 0.056, "step": 12000 }, { - "epoch": 0.8918758354373979, - "grad_norm": 2.2921535968780518, - "learning_rate": 2.4648744987375614e-05, - "loss": 0.077, + "epoch": 1.7837516708747958, + "grad_norm": 0.7806589007377625, + "learning_rate": 8.216248329125205e-06, + "loss": 0.0554, "step": 12010 }, { - "epoch": 0.8926184464577455, - "grad_norm": 3.7081544399261475, - "learning_rate": 2.464428932125353e-05, - "loss": 0.099, + "epoch": 1.7852368929154907, + "grad_norm": 0.9815047979354858, + "learning_rate": 8.21476310708451e-06, + "loss": 0.0868, "step": 12020 }, { - "epoch": 0.893361057478093, - "grad_norm": 0.6023477911949158, - "learning_rate": 2.463983365513144e-05, - "loss": 0.0599, + "epoch": 1.786722114956186, + "grad_norm": 1.7111990451812744, + "learning_rate": 8.213277885043815e-06, + "loss": 0.0767, "step": 12030 }, { - "epoch": 0.8941036684984405, - "grad_norm": 1.6796938180923462, - "learning_rate": 2.463537798900936e-05, - "loss": 0.0984, + "epoch": 1.788207336996881, + "grad_norm": 0.5546345710754395, + "learning_rate": 8.21179266300312e-06, + "loss": 0.074, "step": 12040 }, { - "epoch": 0.894846279518788, - "grad_norm": 4.132201194763184, - "learning_rate": 2.4630922322887274e-05, - "loss": 0.0771, + "epoch": 1.789692559037576, + "grad_norm": 0.6579710245132446, + "learning_rate": 8.210307440962426e-06, + "loss": 0.0711, "step": 12050 }, { - "epoch": 0.8955888905391356, - "grad_norm": 2.146115303039551, - "learning_rate": 2.4626466656765186e-05, - "loss": 0.0808, + "epoch": 1.7911777810782712, + "grad_norm": 0.24335908889770508, + "learning_rate": 8.208822218921729e-06, + "loss": 0.0714, "step": 12060 }, { - "epoch": 0.8963315015594832, - "grad_norm": 0.9783619046211243, - "learning_rate": 2.4622010990643104e-05, - "loss": 0.0676, + "epoch": 1.7926630031189663, + "grad_norm": 1.7280948162078857, + "learning_rate": 8.207336996881035e-06, + "loss": 0.062, "step": 12070 }, { - "epoch": 0.8970741125798307, - "grad_norm": 0.4546336829662323, - "learning_rate": 2.4617555324521016e-05, - "loss": 0.092, + "epoch": 1.7941482251596614, + "grad_norm": 0.5008382797241211, + "learning_rate": 8.205851774840339e-06, + "loss": 0.0704, "step": 12080 }, { - "epoch": 0.8978167236001783, - "grad_norm": 1.2638888359069824, - "learning_rate": 2.461309965839893e-05, - "loss": 0.0672, + "epoch": 1.7956334472003563, + "grad_norm": 1.4439283609390259, + "learning_rate": 8.204366552799644e-06, + "loss": 0.0912, "step": 12090 }, { - "epoch": 0.8985593346205257, - "grad_norm": 1.3266007900238037, - "learning_rate": 2.4608643992276846e-05, - "loss": 0.0567, + "epoch": 1.7971186692410517, + "grad_norm": 0.7069539427757263, + "learning_rate": 8.20288133075895e-06, + "loss": 0.0556, "step": 12100 }, { - "epoch": 0.8993019456408733, - "grad_norm": 2.1246678829193115, - "learning_rate": 2.460418832615476e-05, - "loss": 0.0795, + "epoch": 1.7986038912817466, + "grad_norm": 0.9885798692703247, + "learning_rate": 8.201396108718254e-06, + "loss": 0.0631, "step": 12110 }, { - "epoch": 0.9000445566612209, - "grad_norm": 2.1990578174591064, - "learning_rate": 2.4599732660032676e-05, - "loss": 0.0881, + "epoch": 1.8000891133224417, + "grad_norm": 0.8661073446273804, + "learning_rate": 8.199910886677559e-06, + "loss": 0.0687, "step": 12120 }, { - "epoch": 0.9007871676815684, - "grad_norm": 0.5446377992630005, - "learning_rate": 2.459527699391059e-05, - "loss": 0.0697, + "epoch": 1.8015743353631368, + "grad_norm": 1.235711693763733, + "learning_rate": 8.198425664636865e-06, + "loss": 0.0695, "step": 12130 }, { - "epoch": 0.901529778701916, - "grad_norm": 2.7443840503692627, - "learning_rate": 2.4590821327788506e-05, - "loss": 0.0912, + "epoch": 1.8030595574038317, + "grad_norm": 0.7834354639053345, + "learning_rate": 8.196940442596169e-06, + "loss": 0.0845, "step": 12140 }, { - "epoch": 0.9022723897222634, - "grad_norm": 1.4836909770965576, - "learning_rate": 2.458636566166642e-05, - "loss": 0.0904, + "epoch": 1.804544779444527, + "grad_norm": 0.5605319142341614, + "learning_rate": 8.195455220555474e-06, + "loss": 0.0648, "step": 12150 }, { - "epoch": 0.903015000742611, - "grad_norm": 0.3852311968803406, - "learning_rate": 2.4581909995544336e-05, - "loss": 0.0695, + "epoch": 1.806030001485222, + "grad_norm": 1.1360461711883545, + "learning_rate": 8.19396999851478e-06, + "loss": 0.0792, "step": 12160 }, { - "epoch": 0.9037576117629585, - "grad_norm": 1.652395248413086, - "learning_rate": 2.4577454329422247e-05, - "loss": 0.0836, + "epoch": 1.807515223525917, + "grad_norm": 0.9927794337272644, + "learning_rate": 8.192484776474084e-06, + "loss": 0.0811, "step": 12170 }, { - "epoch": 0.9045002227833061, - "grad_norm": 3.4490652084350586, - "learning_rate": 2.4572998663300166e-05, - "loss": 0.0974, + "epoch": 1.8090004455666122, + "grad_norm": 0.9647291898727417, + "learning_rate": 8.190999554433389e-06, + "loss": 0.0767, "step": 12180 }, { - "epoch": 0.9052428338036537, - "grad_norm": 1.1233237981796265, - "learning_rate": 2.4568542997178077e-05, - "loss": 0.0968, + "epoch": 1.8104856676073073, + "grad_norm": 0.6713846325874329, + "learning_rate": 8.189514332392693e-06, + "loss": 0.0601, "step": 12190 }, { - "epoch": 0.9059854448240012, - "grad_norm": 1.1226853132247925, - "learning_rate": 2.4564087331055992e-05, - "loss": 0.0837, + "epoch": 1.8119708896480025, + "grad_norm": 0.9489417672157288, + "learning_rate": 8.188029110351997e-06, + "loss": 0.0781, "step": 12200 }, { - "epoch": 0.9067280558443487, - "grad_norm": 6.846561908721924, - "learning_rate": 2.455963166493391e-05, - "loss": 0.0965, + "epoch": 1.8134561116886974, + "grad_norm": 0.9353389739990234, + "learning_rate": 8.186543888311304e-06, + "loss": 0.0701, "step": 12210 }, { - "epoch": 0.9074706668646962, - "grad_norm": 2.6397814750671387, - "learning_rate": 2.4555175998811822e-05, - "loss": 0.074, + "epoch": 1.8149413337293927, + "grad_norm": 1.8731766939163208, + "learning_rate": 8.185058666270608e-06, + "loss": 0.0798, "step": 12220 }, { - "epoch": 0.9082132778850438, - "grad_norm": 1.7175049781799316, - "learning_rate": 2.4550720332689737e-05, - "loss": 0.076, + "epoch": 1.8164265557700876, + "grad_norm": 0.8568295240402222, + "learning_rate": 8.183573444229912e-06, + "loss": 0.0682, "step": 12230 }, { - "epoch": 0.9089558889053914, - "grad_norm": 1.3105518817901611, - "learning_rate": 2.4546264666567652e-05, - "loss": 0.0952, + "epoch": 1.8179117778107827, + "grad_norm": 0.7391093373298645, + "learning_rate": 8.182088222189219e-06, + "loss": 0.0693, "step": 12240 }, { - "epoch": 0.9096984999257389, - "grad_norm": 3.014943838119507, - "learning_rate": 2.4541809000445567e-05, - "loss": 0.089, + "epoch": 1.8193969998514778, + "grad_norm": 0.7887309193611145, + "learning_rate": 8.180603000148523e-06, + "loss": 0.075, "step": 12250 }, { - "epoch": 0.9104411109460865, - "grad_norm": 1.1072237491607666, - "learning_rate": 2.4537353334323482e-05, - "loss": 0.0823, + "epoch": 1.8208822218921727, + "grad_norm": 0.4091874659061432, + "learning_rate": 8.179117778107827e-06, + "loss": 0.0619, "step": 12260 }, { - "epoch": 0.9111837219664339, - "grad_norm": 1.2074459791183472, - "learning_rate": 2.4532897668201397e-05, - "loss": 0.0787, + "epoch": 1.822367443932868, + "grad_norm": 0.642400860786438, + "learning_rate": 8.177632556067134e-06, + "loss": 0.0799, "step": 12270 }, { - "epoch": 0.9119263329867815, - "grad_norm": 0.8429141640663147, - "learning_rate": 2.4528442002079312e-05, - "loss": 0.1024, + "epoch": 1.823852665973563, + "grad_norm": 0.6852913498878479, + "learning_rate": 8.176147334026438e-06, + "loss": 0.0962, "step": 12280 }, { - "epoch": 0.9126689440071291, - "grad_norm": 2.6227517127990723, - "learning_rate": 2.4523986335957227e-05, - "loss": 0.0799, + "epoch": 1.8253378880142581, + "grad_norm": 0.8183869123458862, + "learning_rate": 8.174662111985742e-06, + "loss": 0.059, "step": 12290 }, { - "epoch": 0.9134115550274766, - "grad_norm": 0.7948519587516785, - "learning_rate": 2.451953066983514e-05, - "loss": 0.0722, + "epoch": 1.8268231100549532, + "grad_norm": 0.7981597781181335, + "learning_rate": 8.173176889945047e-06, + "loss": 0.0824, "step": 12300 }, { - "epoch": 0.9141541660478242, - "grad_norm": 1.5158371925354004, - "learning_rate": 2.4515075003713057e-05, - "loss": 0.0938, + "epoch": 1.8283083320956481, + "grad_norm": 0.4024955928325653, + "learning_rate": 8.171691667904353e-06, + "loss": 0.0895, "step": 12310 }, { - "epoch": 0.9148967770681717, - "grad_norm": 1.2404049634933472, - "learning_rate": 2.4510619337590972e-05, - "loss": 0.1068, + "epoch": 1.8297935541363435, + "grad_norm": 1.4834824800491333, + "learning_rate": 8.170206445863657e-06, + "loss": 0.0766, "step": 12320 }, { - "epoch": 0.9156393880885192, - "grad_norm": 1.7605699300765991, - "learning_rate": 2.4506163671468884e-05, - "loss": 0.0894, + "epoch": 1.8312787761770384, + "grad_norm": 1.0925889015197754, + "learning_rate": 8.168721223822962e-06, + "loss": 0.0823, "step": 12330 }, { - "epoch": 0.9163819991088668, - "grad_norm": 1.6435573101043701, - "learning_rate": 2.45017080053468e-05, - "loss": 0.0742, + "epoch": 1.8327639982177335, + "grad_norm": 0.5679433345794678, + "learning_rate": 8.167236001782268e-06, + "loss": 0.0712, "step": 12340 }, { - "epoch": 0.9171246101292143, - "grad_norm": 0.6193175911903381, - "learning_rate": 2.4497252339224717e-05, - "loss": 0.0551, + "epoch": 1.8342492202584286, + "grad_norm": 0.6262628436088562, + "learning_rate": 8.165750779741572e-06, + "loss": 0.0834, "step": 12350 }, { - "epoch": 0.9178672211495619, - "grad_norm": 2.4157028198242188, - "learning_rate": 2.449279667310263e-05, - "loss": 0.0889, + "epoch": 1.8357344422991237, + "grad_norm": 0.903304934501648, + "learning_rate": 8.164265557700877e-06, + "loss": 0.0757, "step": 12360 }, { - "epoch": 0.9186098321699094, - "grad_norm": 3.2014384269714355, - "learning_rate": 2.4488341006980544e-05, - "loss": 0.0777, + "epoch": 1.8372196643398189, + "grad_norm": 1.2422025203704834, + "learning_rate": 8.162780335660181e-06, + "loss": 0.0547, "step": 12370 }, { - "epoch": 0.919352443190257, - "grad_norm": 2.560277223587036, - "learning_rate": 2.448388534085846e-05, - "loss": 0.0892, + "epoch": 1.8387048863805138, + "grad_norm": 0.7186129093170166, + "learning_rate": 8.161295113619486e-06, + "loss": 0.0574, "step": 12380 }, { - "epoch": 0.9200950542106044, - "grad_norm": 1.1629691123962402, - "learning_rate": 2.4479429674736374e-05, - "loss": 0.0693, + "epoch": 1.8401901084212091, + "grad_norm": 0.728678286075592, + "learning_rate": 8.159809891578792e-06, + "loss": 0.0754, "step": 12390 }, { - "epoch": 0.920837665230952, - "grad_norm": 0.7739498019218445, - "learning_rate": 2.447497400861429e-05, - "loss": 0.0751, + "epoch": 1.841675330461904, + "grad_norm": 1.3359917402267456, + "learning_rate": 8.158324669538096e-06, + "loss": 0.0937, "step": 12400 }, { - "epoch": 0.9215802762512996, - "grad_norm": 3.0681796073913574, - "learning_rate": 2.44705183424922e-05, - "loss": 0.1, + "epoch": 1.8431605525025991, + "grad_norm": 0.9950870871543884, + "learning_rate": 8.1568394474974e-06, + "loss": 0.0708, "step": 12410 }, { - "epoch": 0.9223228872716471, - "grad_norm": 1.9392578601837158, - "learning_rate": 2.446606267637012e-05, - "loss": 0.0949, + "epoch": 1.8446457745432943, + "grad_norm": 0.5623972415924072, + "learning_rate": 8.155354225456707e-06, + "loss": 0.0796, "step": 12420 }, { - "epoch": 0.9230654982919947, - "grad_norm": 1.269616723060608, - "learning_rate": 2.4461607010248034e-05, - "loss": 0.0821, + "epoch": 1.8461309965839892, + "grad_norm": 0.8156291246414185, + "learning_rate": 8.153869003416011e-06, + "loss": 0.0818, "step": 12430 }, { - "epoch": 0.9238081093123421, - "grad_norm": 0.9152816534042358, - "learning_rate": 2.4457151344125945e-05, - "loss": 0.0905, + "epoch": 1.8476162186246845, + "grad_norm": 1.0178585052490234, + "learning_rate": 8.152383781375316e-06, + "loss": 0.0717, "step": 12440 }, { - "epoch": 0.9245507203326897, - "grad_norm": 1.6232317686080933, - "learning_rate": 2.4452695678003864e-05, - "loss": 0.0869, + "epoch": 1.8491014406653794, + "grad_norm": 0.8862835168838501, + "learning_rate": 8.150898559334622e-06, + "loss": 0.0639, "step": 12450 }, { - "epoch": 0.9252933313530373, - "grad_norm": 3.454188585281372, - "learning_rate": 2.444824001188178e-05, - "loss": 0.0908, + "epoch": 1.8505866627060745, + "grad_norm": 1.0448565483093262, + "learning_rate": 8.149413337293926e-06, + "loss": 0.0868, "step": 12460 }, { - "epoch": 0.9260359423733848, - "grad_norm": 1.880387783050537, - "learning_rate": 2.444378434575969e-05, - "loss": 0.0705, + "epoch": 1.8520718847467696, + "grad_norm": 1.3027526140213013, + "learning_rate": 8.14792811525323e-06, + "loss": 0.0671, "step": 12470 }, { - "epoch": 0.9267785533937324, - "grad_norm": 2.0626840591430664, - "learning_rate": 2.443932867963761e-05, - "loss": 0.0897, + "epoch": 1.8535571067874648, + "grad_norm": 0.7499740123748779, + "learning_rate": 8.146442893212537e-06, + "loss": 0.0749, "step": 12480 }, { - "epoch": 0.92752116441408, - "grad_norm": 1.5957422256469727, - "learning_rate": 2.443487301351552e-05, - "loss": 0.0902, + "epoch": 1.85504232882816, + "grad_norm": 1.153708577156067, + "learning_rate": 8.14495767117184e-06, + "loss": 0.0722, "step": 12490 }, { - "epoch": 0.9282637754344274, - "grad_norm": 3.2366816997528076, - "learning_rate": 2.4430417347393435e-05, - "loss": 0.0757, + "epoch": 1.8565275508688548, + "grad_norm": 0.627673864364624, + "learning_rate": 8.143472449131146e-06, + "loss": 0.0502, "step": 12500 }, { - "epoch": 0.929006386454775, - "grad_norm": 0.9479996562004089, - "learning_rate": 2.442596168127135e-05, - "loss": 0.0631, + "epoch": 1.8580127729095501, + "grad_norm": 1.2380660772323608, + "learning_rate": 8.141987227090452e-06, + "loss": 0.0751, "step": 12510 }, { - "epoch": 0.9297489974751225, - "grad_norm": 1.3636139631271362, - "learning_rate": 2.4421506015149265e-05, - "loss": 0.0845, + "epoch": 1.859497994950245, + "grad_norm": 0.9718501567840576, + "learning_rate": 8.140502005049755e-06, + "loss": 0.0813, "step": 12520 }, { - "epoch": 0.9304916084954701, - "grad_norm": 1.1171748638153076, - "learning_rate": 2.441705034902718e-05, - "loss": 0.0786, + "epoch": 1.8609832169909402, + "grad_norm": 0.7296931743621826, + "learning_rate": 8.13901678300906e-06, + "loss": 0.0771, "step": 12530 }, { - "epoch": 0.9312342195158176, - "grad_norm": 1.6914044618606567, - "learning_rate": 2.4412594682905095e-05, - "loss": 0.0919, + "epoch": 1.8624684390316353, + "grad_norm": 0.6836357712745667, + "learning_rate": 8.137531560968365e-06, + "loss": 0.0657, "step": 12540 }, { - "epoch": 0.9319768305361652, - "grad_norm": 1.3057868480682373, - "learning_rate": 2.440813901678301e-05, - "loss": 0.079, + "epoch": 1.8639536610723302, + "grad_norm": 0.8493332266807556, + "learning_rate": 8.13604633892767e-06, + "loss": 0.0699, "step": 12550 }, { - "epoch": 0.9327194415565127, - "grad_norm": 4.414134979248047, - "learning_rate": 2.4403683350660925e-05, - "loss": 0.0893, + "epoch": 1.8654388831130255, + "grad_norm": 0.5506488084793091, + "learning_rate": 8.134561116886976e-06, + "loss": 0.0656, "step": 12560 }, { - "epoch": 0.9334620525768602, - "grad_norm": 0.9300063848495483, - "learning_rate": 2.439922768453884e-05, - "loss": 0.1079, + "epoch": 1.8669241051537204, + "grad_norm": 1.1473393440246582, + "learning_rate": 8.13307589484628e-06, + "loss": 0.0688, "step": 12570 }, { - "epoch": 0.9342046635972078, - "grad_norm": 0.956235408782959, - "learning_rate": 2.4394772018416752e-05, - "loss": 0.1299, + "epoch": 1.8684093271944155, + "grad_norm": 0.6129547357559204, + "learning_rate": 8.131590672805584e-06, + "loss": 0.0615, "step": 12580 }, { - "epoch": 0.9349472746175553, - "grad_norm": 1.3673025369644165, - "learning_rate": 2.439031635229467e-05, - "loss": 0.0704, + "epoch": 1.8698945492351107, + "grad_norm": 0.6691162586212158, + "learning_rate": 8.13010545076489e-06, + "loss": 0.0637, "step": 12590 }, { - "epoch": 0.9356898856379029, - "grad_norm": 0.966641366481781, - "learning_rate": 2.4385860686172582e-05, - "loss": 0.0806, + "epoch": 1.8713797712758056, + "grad_norm": 1.2546964883804321, + "learning_rate": 8.128620228724195e-06, + "loss": 0.0796, "step": 12600 }, { - "epoch": 0.9364324966582505, - "grad_norm": 1.7886812686920166, - "learning_rate": 2.4381405020050497e-05, - "loss": 0.0621, + "epoch": 1.872864993316501, + "grad_norm": 0.7441532611846924, + "learning_rate": 8.1271350066835e-06, + "loss": 0.0818, "step": 12610 }, { - "epoch": 0.9371751076785979, - "grad_norm": 1.5795032978057861, - "learning_rate": 2.4376949353928415e-05, - "loss": 0.0645, + "epoch": 1.8743502153571958, + "grad_norm": 1.3013454675674438, + "learning_rate": 8.125649784642806e-06, + "loss": 0.0689, "step": 12620 }, { - "epoch": 0.9379177186989455, - "grad_norm": 0.8962666392326355, - "learning_rate": 2.4372493687806327e-05, - "loss": 0.0825, + "epoch": 1.875835437397891, + "grad_norm": 0.7669069170951843, + "learning_rate": 8.12416456260211e-06, + "loss": 0.0647, "step": 12630 }, { - "epoch": 0.938660329719293, - "grad_norm": 1.669140100479126, - "learning_rate": 2.4368038021684242e-05, - "loss": 0.1027, + "epoch": 1.877320659438586, + "grad_norm": 1.1804280281066895, + "learning_rate": 8.122679340561414e-06, + "loss": 0.0915, "step": 12640 }, { - "epoch": 0.9394029407396406, - "grad_norm": 0.6645317077636719, - "learning_rate": 2.4363582355562157e-05, - "loss": 0.0672, + "epoch": 1.8788058814792812, + "grad_norm": 1.0035256147384644, + "learning_rate": 8.12119411852072e-06, + "loss": 0.0844, "step": 12650 }, { - "epoch": 0.9401455517599882, - "grad_norm": 1.4275974035263062, - "learning_rate": 2.4359126689440072e-05, - "loss": 0.094, + "epoch": 1.8802911035199763, + "grad_norm": 0.7921715974807739, + "learning_rate": 8.119708896480023e-06, + "loss": 0.0752, "step": 12660 }, { - "epoch": 0.9408881627803357, - "grad_norm": 1.8758856058120728, - "learning_rate": 2.4354671023317987e-05, - "loss": 0.0911, + "epoch": 1.8817763255606712, + "grad_norm": 0.3802033066749573, + "learning_rate": 8.11822367443933e-06, + "loss": 0.0712, "step": 12670 }, { - "epoch": 0.9416307738006832, - "grad_norm": 1.4419819116592407, - "learning_rate": 2.4350215357195902e-05, - "loss": 0.0613, + "epoch": 1.8832615476013665, + "grad_norm": 0.45273879170417786, + "learning_rate": 8.116738452398636e-06, + "loss": 0.0694, "step": 12680 }, { - "epoch": 0.9423733848210307, - "grad_norm": 1.668791651725769, - "learning_rate": 2.4345759691073817e-05, - "loss": 0.067, + "epoch": 1.8847467696420614, + "grad_norm": 0.9187343716621399, + "learning_rate": 8.115253230357938e-06, + "loss": 0.0643, "step": 12690 }, { - "epoch": 0.9431159958413783, - "grad_norm": 1.9676769971847534, - "learning_rate": 2.4341304024951732e-05, - "loss": 0.0881, + "epoch": 1.8862319916827566, + "grad_norm": 0.7383594512939453, + "learning_rate": 8.113768008317244e-06, + "loss": 0.067, "step": 12700 }, { - "epoch": 0.9438586068617258, - "grad_norm": 0.5292758941650391, - "learning_rate": 2.4336848358829643e-05, - "loss": 0.0722, + "epoch": 1.8877172137234517, + "grad_norm": 1.0016905069351196, + "learning_rate": 8.112282786276549e-06, + "loss": 0.0725, "step": 12710 }, { - "epoch": 0.9446012178820734, - "grad_norm": 2.253980875015259, - "learning_rate": 2.433239269270756e-05, - "loss": 0.0689, + "epoch": 1.8892024357641466, + "grad_norm": 0.43434959650039673, + "learning_rate": 8.110797564235853e-06, + "loss": 0.0708, "step": 12720 }, { - "epoch": 0.9453438289024209, - "grad_norm": 2.0630314350128174, - "learning_rate": 2.4327937026585477e-05, - "loss": 0.0867, + "epoch": 1.890687657804842, + "grad_norm": 0.5876697897911072, + "learning_rate": 8.10931234219516e-06, + "loss": 0.0771, "step": 12730 }, { - "epoch": 0.9460864399227684, - "grad_norm": 0.9975630640983582, - "learning_rate": 2.4323481360463388e-05, - "loss": 0.1088, + "epoch": 1.8921728798455368, + "grad_norm": 0.7081173062324524, + "learning_rate": 8.107827120154464e-06, + "loss": 0.0798, "step": 12740 }, { - "epoch": 0.946829050943116, - "grad_norm": 1.6637675762176514, - "learning_rate": 2.4319025694341303e-05, - "loss": 0.103, + "epoch": 1.893658101886232, + "grad_norm": 0.8173850774765015, + "learning_rate": 8.106341898113768e-06, + "loss": 0.0733, "step": 12750 }, { - "epoch": 0.9475716619634635, - "grad_norm": 1.508355975151062, - "learning_rate": 2.431457002821922e-05, - "loss": 0.1022, + "epoch": 1.895143323926927, + "grad_norm": 1.0225803852081299, + "learning_rate": 8.104856676073074e-06, + "loss": 0.0657, "step": 12760 }, { - "epoch": 0.9483142729838111, - "grad_norm": 1.989896297454834, - "learning_rate": 2.4310114362097133e-05, - "loss": 0.0647, + "epoch": 1.8966285459676222, + "grad_norm": 0.45076829195022583, + "learning_rate": 8.103371454032379e-06, + "loss": 0.0643, "step": 12770 }, { - "epoch": 0.9490568840041587, - "grad_norm": 0.7862587571144104, - "learning_rate": 2.4305658695975048e-05, - "loss": 0.0754, + "epoch": 1.8981137680083173, + "grad_norm": 0.8104088306427002, + "learning_rate": 8.101886231991683e-06, + "loss": 0.069, "step": 12780 }, { - "epoch": 0.9497994950245061, - "grad_norm": 0.7664479613304138, - "learning_rate": 2.4301203029852967e-05, - "loss": 0.0759, + "epoch": 1.8995989900490122, + "grad_norm": 0.32960525155067444, + "learning_rate": 8.10040100995099e-06, + "loss": 0.0722, "step": 12790 }, { - "epoch": 0.9505421060448537, - "grad_norm": 1.1246778964996338, - "learning_rate": 2.4296747363730878e-05, - "loss": 0.0671, + "epoch": 1.9010842120897076, + "grad_norm": 0.983810544013977, + "learning_rate": 8.098915787910294e-06, + "loss": 0.0593, "step": 12800 }, { - "epoch": 0.9512847170652012, - "grad_norm": 1.6385598182678223, - "learning_rate": 2.4292291697608793e-05, - "loss": 0.1062, + "epoch": 1.9025694341304025, + "grad_norm": 0.5095059871673584, + "learning_rate": 8.097430565869598e-06, + "loss": 0.0692, "step": 12810 }, { - "epoch": 0.9520273280855488, - "grad_norm": 1.8573966026306152, - "learning_rate": 2.4287836031486705e-05, + "epoch": 1.9040546561710976, + "grad_norm": 0.8406579494476318, + "learning_rate": 8.095945343828903e-06, "loss": 0.0691, "step": 12820 }, { - "epoch": 0.9527699391058964, - "grad_norm": 1.2876501083374023, - "learning_rate": 2.4283380365364623e-05, - "loss": 0.0699, + "epoch": 1.9055398782117927, + "grad_norm": 0.5694898962974548, + "learning_rate": 8.094460121788207e-06, + "loss": 0.0822, "step": 12830 }, { - "epoch": 0.9535125501262439, - "grad_norm": 3.32975435256958, - "learning_rate": 2.4278924699242538e-05, - "loss": 0.0984, + "epoch": 1.9070251002524876, + "grad_norm": 0.5778987407684326, + "learning_rate": 8.092974899747513e-06, + "loss": 0.0726, "step": 12840 }, { - "epoch": 0.9542551611465914, - "grad_norm": 2.0247581005096436, - "learning_rate": 2.427446903312045e-05, - "loss": 0.0784, + "epoch": 1.908510322293183, + "grad_norm": 0.5937968492507935, + "learning_rate": 8.091489677706818e-06, + "loss": 0.0759, "step": 12850 }, { - "epoch": 0.9549977721669389, - "grad_norm": 3.556913375854492, - "learning_rate": 2.4270013366998368e-05, - "loss": 0.0756, + "epoch": 1.9099955443338779, + "grad_norm": 0.6318387389183044, + "learning_rate": 8.090004455666122e-06, + "loss": 0.0771, "step": 12860 }, { - "epoch": 0.9557403831872865, - "grad_norm": 1.9502661228179932, - "learning_rate": 2.4265557700876283e-05, - "loss": 0.1019, + "epoch": 1.911480766374573, + "grad_norm": 0.5258229374885559, + "learning_rate": 8.088519233625428e-06, + "loss": 0.0674, "step": 12870 }, { - "epoch": 0.956482994207634, - "grad_norm": 1.5548827648162842, - "learning_rate": 2.4261102034754195e-05, - "loss": 0.077, + "epoch": 1.912965988415268, + "grad_norm": 0.7556629776954651, + "learning_rate": 8.087034011584733e-06, + "loss": 0.074, "step": 12880 }, { - "epoch": 0.9572256052279816, - "grad_norm": 2.959385871887207, - "learning_rate": 2.4256646368632113e-05, - "loss": 0.0799, + "epoch": 1.914451210455963, + "grad_norm": 0.7217100262641907, + "learning_rate": 8.085548789544037e-06, + "loss": 0.0578, "step": 12890 }, { - "epoch": 0.9579682162483292, - "grad_norm": 0.9673056602478027, - "learning_rate": 2.4252190702510025e-05, - "loss": 0.0768, + "epoch": 1.9159364324966583, + "grad_norm": 0.6837291717529297, + "learning_rate": 8.084063567503342e-06, + "loss": 0.0592, "step": 12900 }, { - "epoch": 0.9587108272686766, - "grad_norm": 1.747621774673462, - "learning_rate": 2.424773503638794e-05, - "loss": 0.0604, + "epoch": 1.9174216545373532, + "grad_norm": 0.6772293448448181, + "learning_rate": 8.082578345462648e-06, + "loss": 0.0754, "step": 12910 }, { - "epoch": 0.9594534382890242, - "grad_norm": 1.6321135759353638, - "learning_rate": 2.4243279370265855e-05, - "loss": 0.0706, + "epoch": 1.9189068765780484, + "grad_norm": 0.7865071892738342, + "learning_rate": 8.081093123421952e-06, + "loss": 0.0777, "step": 12920 }, { - "epoch": 0.9601960493093717, - "grad_norm": 2.2727482318878174, - "learning_rate": 2.423882370414377e-05, - "loss": 0.0939, + "epoch": 1.9203920986187435, + "grad_norm": 0.5671284198760986, + "learning_rate": 8.079607901381257e-06, + "loss": 0.0815, "step": 12930 }, { - "epoch": 0.9609386603297193, - "grad_norm": 0.6536130309104919, - "learning_rate": 2.4234368038021685e-05, - "loss": 0.0881, + "epoch": 1.9218773206594386, + "grad_norm": 0.7648490071296692, + "learning_rate": 8.078122679340563e-06, + "loss": 0.0585, "step": 12940 }, { - "epoch": 0.9616812713500669, - "grad_norm": 2.0956132411956787, - "learning_rate": 2.42299123718996e-05, - "loss": 0.0837, + "epoch": 1.9233625427001337, + "grad_norm": 0.4957009255886078, + "learning_rate": 8.076637457299867e-06, + "loss": 0.0508, "step": 12950 }, { - "epoch": 0.9624238823704144, - "grad_norm": 1.1719980239868164, - "learning_rate": 2.4225456705777515e-05, - "loss": 0.0789, + "epoch": 1.9248477647408286, + "grad_norm": 0.8752880692481995, + "learning_rate": 8.075152235259172e-06, + "loss": 0.0883, "step": 12960 }, { - "epoch": 0.9631664933907619, - "grad_norm": 3.233799934387207, - "learning_rate": 2.422100103965543e-05, - "loss": 0.0597, + "epoch": 1.926332986781524, + "grad_norm": 0.690645158290863, + "learning_rate": 8.073667013218478e-06, + "loss": 0.0591, "step": 12970 }, { - "epoch": 0.9639091044111094, - "grad_norm": 3.5093204975128174, - "learning_rate": 2.4216545373533345e-05, - "loss": 0.1026, + "epoch": 1.9278182088222189, + "grad_norm": 0.7108989357948303, + "learning_rate": 8.072181791177782e-06, + "loss": 0.0527, "step": 12980 }, { - "epoch": 0.964651715431457, - "grad_norm": 1.597965121269226, - "learning_rate": 2.4212089707411256e-05, - "loss": 0.0852, + "epoch": 1.929303430862914, + "grad_norm": 0.6949102282524109, + "learning_rate": 8.070696569137087e-06, + "loss": 0.0705, "step": 12990 }, { - "epoch": 0.9653943264518046, - "grad_norm": 1.1015452146530151, - "learning_rate": 2.4207634041289175e-05, - "loss": 0.0651, + "epoch": 1.9307886529036091, + "grad_norm": 1.1313326358795166, + "learning_rate": 8.069211347096391e-06, + "loss": 0.0627, "step": 13000 }, { - "epoch": 0.9661369374721521, - "grad_norm": 0.7174215316772461, - "learning_rate": 2.4203178375167086e-05, - "loss": 0.0693, + "epoch": 1.932273874944304, + "grad_norm": 0.7514609098434448, + "learning_rate": 8.067726125055695e-06, + "loss": 0.0607, "step": 13010 }, { - "epoch": 0.9668795484924996, - "grad_norm": 1.493241786956787, - "learning_rate": 2.4198722709045e-05, - "loss": 0.0757, + "epoch": 1.9337590969849994, + "grad_norm": 0.749814510345459, + "learning_rate": 8.066240903015002e-06, + "loss": 0.0598, "step": 13020 }, { - "epoch": 0.9676221595128471, - "grad_norm": 2.9904990196228027, - "learning_rate": 2.419426704292292e-05, - "loss": 0.0782, + "epoch": 1.9352443190256943, + "grad_norm": 1.5375010967254639, + "learning_rate": 8.064755680974306e-06, + "loss": 0.0777, "step": 13030 }, { - "epoch": 0.9683647705331947, - "grad_norm": 1.8118109703063965, - "learning_rate": 2.418981137680083e-05, - "loss": 0.0718, + "epoch": 1.9367295410663894, + "grad_norm": 1.0587717294692993, + "learning_rate": 8.06327045893361e-06, + "loss": 0.0808, "step": 13040 }, { - "epoch": 0.9691073815535423, - "grad_norm": 3.275655508041382, - "learning_rate": 2.4185355710678746e-05, - "loss": 0.0977, + "epoch": 1.9382147631070845, + "grad_norm": 0.8014340400695801, + "learning_rate": 8.061785236892917e-06, + "loss": 0.0889, "step": 13050 }, { - "epoch": 0.9698499925738898, - "grad_norm": 1.704103708267212, - "learning_rate": 2.4180900044556665e-05, - "loss": 0.0578, + "epoch": 1.9396999851477796, + "grad_norm": 0.5191594958305359, + "learning_rate": 8.060300014852221e-06, + "loss": 0.076, "step": 13060 }, { - "epoch": 0.9705926035942374, - "grad_norm": 1.3468433618545532, - "learning_rate": 2.4176444378434576e-05, - "loss": 0.0595, + "epoch": 1.9411852071884748, + "grad_norm": 0.6541637778282166, + "learning_rate": 8.058814792811525e-06, + "loss": 0.0701, "step": 13070 }, { - "epoch": 0.9713352146145848, - "grad_norm": 0.6979770064353943, - "learning_rate": 2.417198871231249e-05, - "loss": 0.0875, + "epoch": 1.9426704292291697, + "grad_norm": 0.8594491481781006, + "learning_rate": 8.057329570770832e-06, + "loss": 0.0696, "step": 13080 }, { - "epoch": 0.9720778256349324, - "grad_norm": 3.1347808837890625, - "learning_rate": 2.4167533046190406e-05, - "loss": 0.1095, + "epoch": 1.944155651269865, + "grad_norm": 0.7585026025772095, + "learning_rate": 8.055844348730136e-06, + "loss": 0.0777, "step": 13090 }, { - "epoch": 0.97282043665528, - "grad_norm": 2.341815710067749, - "learning_rate": 2.416307738006832e-05, - "loss": 0.0909, + "epoch": 1.94564087331056, + "grad_norm": 0.4566737711429596, + "learning_rate": 8.05435912668944e-06, + "loss": 0.0722, "step": 13100 }, { - "epoch": 0.9735630476756275, - "grad_norm": 1.4380031824111938, - "learning_rate": 2.4158621713946236e-05, - "loss": 0.0855, + "epoch": 1.947126095351255, + "grad_norm": 1.1220885515213013, + "learning_rate": 8.052873904648747e-06, + "loss": 0.0868, "step": 13110 }, { - "epoch": 0.9743056586959751, - "grad_norm": 2.384162425994873, - "learning_rate": 2.4154166047824148e-05, - "loss": 0.0851, + "epoch": 1.9486113173919501, + "grad_norm": 0.7873674631118774, + "learning_rate": 8.05138868260805e-06, + "loss": 0.0651, "step": 13120 }, { - "epoch": 0.9750482697163226, - "grad_norm": 3.7416675090789795, - "learning_rate": 2.4149710381702066e-05, - "loss": 0.0855, + "epoch": 1.950096539432645, + "grad_norm": 0.9283306002616882, + "learning_rate": 8.049903460567355e-06, + "loss": 0.0732, "step": 13130 }, { - "epoch": 0.9757908807366701, - "grad_norm": 1.3692728281021118, - "learning_rate": 2.414525471557998e-05, - "loss": 0.0492, + "epoch": 1.9515817614733404, + "grad_norm": 1.4717298746109009, + "learning_rate": 8.04841823852666e-06, + "loss": 0.0703, "step": 13140 }, { - "epoch": 0.9765334917570176, - "grad_norm": 2.30066180229187, - "learning_rate": 2.4140799049457893e-05, - "loss": 0.0818, + "epoch": 1.9530669835140353, + "grad_norm": 1.004787564277649, + "learning_rate": 8.046933016485964e-06, + "loss": 0.0723, "step": 13150 }, { - "epoch": 0.9772761027773652, - "grad_norm": 0.9965130686759949, - "learning_rate": 2.4136343383335808e-05, - "loss": 0.08, + "epoch": 1.9545522055547304, + "grad_norm": 0.5175033211708069, + "learning_rate": 8.04544779444527e-06, + "loss": 0.081, "step": 13160 }, { - "epoch": 0.9780187137977128, - "grad_norm": 3.0066161155700684, - "learning_rate": 2.4131887717213726e-05, - "loss": 0.0932, + "epoch": 1.9560374275954255, + "grad_norm": 0.9765888452529907, + "learning_rate": 8.043962572404575e-06, + "loss": 0.0685, "step": 13170 }, { - "epoch": 0.9787613248180603, - "grad_norm": 2.1586639881134033, - "learning_rate": 2.4127432051091638e-05, - "loss": 0.0582, + "epoch": 1.9575226496361204, + "grad_norm": 1.0930464267730713, + "learning_rate": 8.04247735036388e-06, + "loss": 0.0801, "step": 13180 }, { - "epoch": 0.9795039358384079, - "grad_norm": 1.0595694780349731, - "learning_rate": 2.4122976384969553e-05, - "loss": 0.0731, + "epoch": 1.9590078716768158, + "grad_norm": 0.6813475489616394, + "learning_rate": 8.040992128323185e-06, + "loss": 0.061, "step": 13190 }, { - "epoch": 0.9802465468587553, - "grad_norm": 1.56610906124115, - "learning_rate": 2.411852071884747e-05, - "loss": 0.054, + "epoch": 1.9604930937175107, + "grad_norm": 0.9491771459579468, + "learning_rate": 8.03950690628249e-06, + "loss": 0.0682, "step": 13200 }, { - "epoch": 0.9809891578791029, - "grad_norm": 0.9875765442848206, - "learning_rate": 2.4114065052725383e-05, - "loss": 0.0748, + "epoch": 1.9619783157582058, + "grad_norm": 0.9751015305519104, + "learning_rate": 8.038021684241794e-06, + "loss": 0.07, "step": 13210 }, { - "epoch": 0.9817317688994505, - "grad_norm": 0.9391055703163147, - "learning_rate": 2.4109609386603298e-05, - "loss": 0.0635, + "epoch": 1.963463537798901, + "grad_norm": 0.6187069416046143, + "learning_rate": 8.0365364622011e-06, + "loss": 0.067, "step": 13220 }, { - "epoch": 0.982474379919798, - "grad_norm": 2.4511687755584717, - "learning_rate": 2.410515372048121e-05, - "loss": 0.0709, + "epoch": 1.964948759839596, + "grad_norm": 0.6812853217124939, + "learning_rate": 8.035051240160405e-06, + "loss": 0.0613, "step": 13230 }, { - "epoch": 0.9832169909401456, - "grad_norm": 1.185548186302185, - "learning_rate": 2.4100698054359128e-05, - "loss": 0.0477, + "epoch": 1.9664339818802912, + "grad_norm": 0.4832938611507416, + "learning_rate": 8.03356601811971e-06, + "loss": 0.0661, "step": 13240 }, { - "epoch": 0.9839596019604931, - "grad_norm": 2.511913299560547, - "learning_rate": 2.4096242388237043e-05, - "loss": 0.055, + "epoch": 1.967919203920986, + "grad_norm": 0.44773080945014954, + "learning_rate": 8.032080796079015e-06, + "loss": 0.0617, "step": 13250 }, { - "epoch": 0.9847022129808406, - "grad_norm": 3.263899326324463, - "learning_rate": 2.4091786722114954e-05, - "loss": 0.0808, + "epoch": 1.9694044259616814, + "grad_norm": 0.8043562769889832, + "learning_rate": 8.03059557403832e-06, + "loss": 0.064, "step": 13260 }, { - "epoch": 0.9854448240011882, - "grad_norm": 1.0159684419631958, - "learning_rate": 2.4087331055992873e-05, - "loss": 0.0897, + "epoch": 1.9708896480023763, + "grad_norm": 0.5656445622444153, + "learning_rate": 8.029110351997624e-06, + "loss": 0.0678, "step": 13270 }, { - "epoch": 0.9861874350215357, - "grad_norm": 1.7687329053878784, - "learning_rate": 2.4082875389870788e-05, - "loss": 0.0789, + "epoch": 1.9723748700430714, + "grad_norm": 0.43990078568458557, + "learning_rate": 8.02762512995693e-06, + "loss": 0.0619, "step": 13280 }, { - "epoch": 0.9869300460418833, - "grad_norm": 3.7545151710510254, - "learning_rate": 2.40784197237487e-05, - "loss": 0.0785, + "epoch": 1.9738600920837666, + "grad_norm": 1.2974399328231812, + "learning_rate": 8.026139907916233e-06, + "loss": 0.076, "step": 13290 }, { - "epoch": 0.9876726570622308, - "grad_norm": 4.160385608673096, - "learning_rate": 2.4073964057626618e-05, - "loss": 0.0784, + "epoch": 1.9753453141244615, + "grad_norm": 0.7306672930717468, + "learning_rate": 8.02465468587554e-06, + "loss": 0.0965, "step": 13300 }, { - "epoch": 0.9884152680825783, - "grad_norm": 2.456456184387207, - "learning_rate": 2.4069508391504533e-05, - "loss": 0.0863, + "epoch": 1.9768305361651568, + "grad_norm": 0.657429575920105, + "learning_rate": 8.023169463834844e-06, + "loss": 0.0689, "step": 13310 }, { - "epoch": 0.9891578791029259, - "grad_norm": 1.3430705070495605, - "learning_rate": 2.4065052725382444e-05, - "loss": 0.0961, + "epoch": 1.9783157582058517, + "grad_norm": 0.7042214870452881, + "learning_rate": 8.021684241794148e-06, + "loss": 0.0562, "step": 13320 }, { - "epoch": 0.9899004901232734, - "grad_norm": 1.3270246982574463, - "learning_rate": 2.406059705926036e-05, - "loss": 0.0721, + "epoch": 1.9798009802465468, + "grad_norm": 0.7432844638824463, + "learning_rate": 8.020199019753454e-06, + "loss": 0.0756, "step": 13330 }, { - "epoch": 0.990643101143621, - "grad_norm": 1.4000052213668823, - "learning_rate": 2.4056141393138274e-05, - "loss": 0.0679, + "epoch": 1.981286202287242, + "grad_norm": 0.6771445870399475, + "learning_rate": 8.018713797712759e-06, + "loss": 0.0811, "step": 13340 }, { - "epoch": 0.9913857121639685, - "grad_norm": 3.377154588699341, - "learning_rate": 2.405168572701619e-05, - "loss": 0.0651, + "epoch": 1.982771424327937, + "grad_norm": 0.629199743270874, + "learning_rate": 8.017228575672063e-06, + "loss": 0.0528, "step": 13350 }, { - "epoch": 0.9921283231843161, - "grad_norm": 1.1014114618301392, - "learning_rate": 2.4047230060894104e-05, - "loss": 0.1156, + "epoch": 1.9842566463686322, + "grad_norm": 0.8837576508522034, + "learning_rate": 8.01574335363137e-06, + "loss": 0.0744, "step": 13360 }, { - "epoch": 0.9928709342046635, - "grad_norm": 2.2198503017425537, - "learning_rate": 2.404277439477202e-05, - "loss": 0.0821, + "epoch": 1.985741868409327, + "grad_norm": 0.8143694996833801, + "learning_rate": 8.014258131590674e-06, + "loss": 0.0597, "step": 13370 }, { - "epoch": 0.9936135452250111, - "grad_norm": 2.7557029724121094, - "learning_rate": 2.4038318728649934e-05, - "loss": 0.0833, + "epoch": 1.9872270904500224, + "grad_norm": 0.993070662021637, + "learning_rate": 8.012772909549978e-06, + "loss": 0.0787, "step": 13380 }, { - "epoch": 0.9943561562453587, - "grad_norm": 3.052049160003662, - "learning_rate": 2.403386306252785e-05, - "loss": 0.0832, + "epoch": 1.9887123124907173, + "grad_norm": 1.5197769403457642, + "learning_rate": 8.011287687509284e-06, + "loss": 0.0722, "step": 13390 }, { - "epoch": 0.9950987672657062, - "grad_norm": 1.0992413759231567, - "learning_rate": 2.402940739640576e-05, - "loss": 0.1037, + "epoch": 1.9901975345314125, + "grad_norm": 1.2251149415969849, + "learning_rate": 8.009802465468589e-06, + "loss": 0.0734, "step": 13400 }, { - "epoch": 0.9958413782860538, - "grad_norm": 0.9476717114448547, - "learning_rate": 2.402495173028368e-05, - "loss": 0.0732, + "epoch": 1.9916827565721076, + "grad_norm": 0.6797492504119873, + "learning_rate": 8.008317243427893e-06, + "loss": 0.0755, "step": 13410 }, { - "epoch": 0.9965839893064014, - "grad_norm": 0.403255820274353, - "learning_rate": 2.402049606416159e-05, - "loss": 0.0565, + "epoch": 1.9931679786128025, + "grad_norm": 0.8411869406700134, + "learning_rate": 8.006832021387198e-06, + "loss": 0.083, "step": 13420 }, { - "epoch": 0.9973266003267488, - "grad_norm": 0.33830782771110535, - "learning_rate": 2.4016040398039506e-05, - "loss": 0.0614, + "epoch": 1.9946532006534978, + "grad_norm": 0.501990556716919, + "learning_rate": 8.005346799346502e-06, + "loss": 0.0728, "step": 13430 }, { - "epoch": 0.9980692113470964, - "grad_norm": 1.7131567001342773, - "learning_rate": 2.4011584731917424e-05, - "loss": 0.1047, + "epoch": 1.9961384226941927, + "grad_norm": 0.4618406891822815, + "learning_rate": 8.003861577305808e-06, + "loss": 0.0841, "step": 13440 }, { - "epoch": 0.9988118223674439, - "grad_norm": 1.484653115272522, - "learning_rate": 2.4007129065795336e-05, - "loss": 0.089, + "epoch": 1.9976236447348878, + "grad_norm": 1.0440362691879272, + "learning_rate": 8.002376355265113e-06, + "loss": 0.0647, "step": 13450 }, { - "epoch": 0.9995544333877915, - "grad_norm": 1.4303562641143799, - "learning_rate": 2.400267339967325e-05, - "loss": 0.0672, + "epoch": 1.999108866775583, + "grad_norm": 1.502364993095398, + "learning_rate": 8.000891133224417e-06, + "loss": 0.0729, "step": 13460 }, { - "epoch": 1.0, - "eval_f1": 0.0, - "eval_loss": 0.06958512961864471, - "eval_runtime": 835.1475, - "eval_samples_per_second": 45.524, - "eval_steps_per_second": 2.846, + "epoch": 2.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.06133544072508812, + "eval_runtime": 210.8496, + "eval_samples_per_second": 180.313, + "eval_steps_per_second": 5.639, "step": 13466 }, { - "epoch": 1.000297044408139, - "grad_norm": 1.3870664834976196, - "learning_rate": 2.399821773355117e-05, - "loss": 0.0639, + "epoch": 2.000594088816278, + "grad_norm": 0.5821937918663025, + "learning_rate": 7.999405911183723e-06, + "loss": 0.0607, "step": 13470 }, { - "epoch": 1.0010396554284866, - "grad_norm": 1.3017977476119995, - "learning_rate": 2.399376206742908e-05, - "loss": 0.0541, + "epoch": 2.002079310856973, + "grad_norm": 0.681476354598999, + "learning_rate": 7.997920689143028e-06, + "loss": 0.053, "step": 13480 }, { - "epoch": 1.001782266448834, - "grad_norm": 3.119060516357422, - "learning_rate": 2.3989306401306996e-05, - "loss": 0.0774, + "epoch": 2.003564532897668, + "grad_norm": 0.7646535634994507, + "learning_rate": 7.996435467102332e-06, + "loss": 0.0786, "step": 13490 }, { - "epoch": 1.0025248774691817, - "grad_norm": 1.463212013244629, - "learning_rate": 2.398485073518491e-05, - "loss": 0.0869, + "epoch": 2.0050497549383635, + "grad_norm": 1.3491325378417969, + "learning_rate": 7.994950245061638e-06, + "loss": 0.0735, "step": 13500 }, { - "epoch": 1.0032674884895292, - "grad_norm": 1.4256367683410645, - "learning_rate": 2.3980395069062826e-05, - "loss": 0.0856, + "epoch": 2.0065349769790584, + "grad_norm": 0.9875779747962952, + "learning_rate": 7.993465023020943e-06, + "loss": 0.0746, "step": 13510 }, { - "epoch": 1.0040100995098766, - "grad_norm": 2.062420606613159, - "learning_rate": 2.397593940294074e-05, - "loss": 0.0575, + "epoch": 2.0080201990197533, + "grad_norm": 0.7491360902786255, + "learning_rate": 7.991979800980247e-06, + "loss": 0.0784, "step": 13520 }, { - "epoch": 1.0047527105302243, - "grad_norm": 1.5311766862869263, - "learning_rate": 2.3971483736818653e-05, - "loss": 0.1091, + "epoch": 2.0095054210604486, + "grad_norm": 1.0167654752731323, + "learning_rate": 7.990494578939551e-06, + "loss": 0.0704, "step": 13530 }, { - "epoch": 1.0054953215505718, - "grad_norm": 1.6785446405410767, - "learning_rate": 2.396702807069657e-05, - "loss": 0.0978, + "epoch": 2.0109906431011435, + "grad_norm": 0.5021530985832214, + "learning_rate": 7.989009356898858e-06, + "loss": 0.07, "step": 13540 }, { - "epoch": 1.0062379325709194, - "grad_norm": 2.021958827972412, - "learning_rate": 2.3962572404574486e-05, - "loss": 0.1013, + "epoch": 2.012475865141839, + "grad_norm": 1.1179542541503906, + "learning_rate": 7.987524134858162e-06, + "loss": 0.0716, "step": 13550 }, { - "epoch": 1.0069805435912669, - "grad_norm": 1.236924409866333, - "learning_rate": 2.3958116738452397e-05, - "loss": 0.0621, + "epoch": 2.0139610871825337, + "grad_norm": 1.1701716184616089, + "learning_rate": 7.986038912817466e-06, + "loss": 0.0778, "step": 13560 }, { - "epoch": 1.0077231546116145, - "grad_norm": 0.7201011180877686, - "learning_rate": 2.3953661072330312e-05, - "loss": 0.0832, + "epoch": 2.015446309223229, + "grad_norm": 0.6975985169410706, + "learning_rate": 7.984553690776773e-06, + "loss": 0.0661, "step": 13570 }, { - "epoch": 1.008465765631962, - "grad_norm": 2.1270737648010254, - "learning_rate": 2.394920540620823e-05, - "loss": 0.0553, + "epoch": 2.016931531263924, + "grad_norm": 0.7828264832496643, + "learning_rate": 7.983068468736077e-06, + "loss": 0.069, "step": 13580 }, { - "epoch": 1.0092083766523094, - "grad_norm": 2.2044267654418945, - "learning_rate": 2.3944749740086142e-05, - "loss": 0.091, + "epoch": 2.018416753304619, + "grad_norm": 1.1737070083618164, + "learning_rate": 7.981583246695381e-06, + "loss": 0.0913, "step": 13590 }, { - "epoch": 1.0099509876726571, - "grad_norm": 1.7166577577590942, - "learning_rate": 2.3940294073964057e-05, - "loss": 0.0774, + "epoch": 2.0199019753453142, + "grad_norm": 0.706625759601593, + "learning_rate": 7.980098024654686e-06, + "loss": 0.0716, "step": 13600 }, { - "epoch": 1.0106935986930046, - "grad_norm": 1.7393256425857544, - "learning_rate": 2.3935838407841976e-05, - "loss": 0.0649, + "epoch": 2.021387197386009, + "grad_norm": 0.4071919023990631, + "learning_rate": 7.978612802613992e-06, + "loss": 0.0659, "step": 13610 }, { - "epoch": 1.0114362097133522, - "grad_norm": 1.1891884803771973, - "learning_rate": 2.3931382741719887e-05, - "loss": 0.0873, + "epoch": 2.0228724194267045, + "grad_norm": 1.00408935546875, + "learning_rate": 7.977127580573296e-06, + "loss": 0.0731, "step": 13620 }, { - "epoch": 1.0121788207336997, - "grad_norm": 0.8433098793029785, - "learning_rate": 2.3926927075597802e-05, - "loss": 0.0742, + "epoch": 2.0243576414673994, + "grad_norm": 1.0669114589691162, + "learning_rate": 7.9756423585326e-06, + "loss": 0.08, "step": 13630 }, { - "epoch": 1.0129214317540471, - "grad_norm": 1.3263208866119385, - "learning_rate": 2.3922471409475717e-05, - "loss": 0.0663, + "epoch": 2.0258428635080943, + "grad_norm": 0.363745778799057, + "learning_rate": 7.974157136491905e-06, + "loss": 0.0467, "step": 13640 }, { - "epoch": 1.0136640427743948, - "grad_norm": 2.3335938453674316, - "learning_rate": 2.3918015743353632e-05, - "loss": 0.0824, + "epoch": 2.0273280855487896, + "grad_norm": 1.2333308458328247, + "learning_rate": 7.972671914451211e-06, + "loss": 0.0555, "step": 13650 }, { - "epoch": 1.0144066537947423, - "grad_norm": 0.9380444884300232, - "learning_rate": 2.3913560077231547e-05, - "loss": 0.0787, + "epoch": 2.0288133075894845, + "grad_norm": 1.272576093673706, + "learning_rate": 7.971186692410516e-06, + "loss": 0.0807, "step": 13660 }, { - "epoch": 1.01514926481509, - "grad_norm": 1.832839846611023, - "learning_rate": 2.390910441110946e-05, - "loss": 0.0656, + "epoch": 2.03029852963018, + "grad_norm": 0.647402822971344, + "learning_rate": 7.96970147036982e-06, + "loss": 0.0724, "step": 13670 }, { - "epoch": 1.0158918758354374, - "grad_norm": 1.5540494918823242, - "learning_rate": 2.3904648744987377e-05, - "loss": 0.0403, + "epoch": 2.0317837516708748, + "grad_norm": 0.6416436433792114, + "learning_rate": 7.968216248329126e-06, + "loss": 0.0573, "step": 13680 }, { - "epoch": 1.0166344868557848, - "grad_norm": 3.9998695850372314, - "learning_rate": 2.3900193078865292e-05, - "loss": 0.1071, + "epoch": 2.0332689737115697, + "grad_norm": 0.5371004343032837, + "learning_rate": 7.96673102628843e-06, + "loss": 0.0574, "step": 13690 }, { - "epoch": 1.0173770978761325, - "grad_norm": 2.738377571105957, - "learning_rate": 2.3895737412743204e-05, - "loss": 0.0826, + "epoch": 2.034754195752265, + "grad_norm": 0.7714745998382568, + "learning_rate": 7.965245804247735e-06, + "loss": 0.0824, "step": 13700 }, { - "epoch": 1.01811970889648, - "grad_norm": 5.063292503356934, - "learning_rate": 2.3891281746621122e-05, - "loss": 0.0653, + "epoch": 2.03623941779296, + "grad_norm": 1.1034531593322754, + "learning_rate": 7.963760582207041e-06, + "loss": 0.0733, "step": 13710 }, { - "epoch": 1.0188623199168276, - "grad_norm": 0.8923290967941284, - "learning_rate": 2.3886826080499037e-05, - "loss": 0.089, + "epoch": 2.0377246398336553, + "grad_norm": 1.0166898965835571, + "learning_rate": 7.962275360166346e-06, + "loss": 0.0738, "step": 13720 }, { - "epoch": 1.019604930937175, - "grad_norm": 0.7146623134613037, - "learning_rate": 2.388237041437695e-05, - "loss": 0.0731, + "epoch": 2.03920986187435, + "grad_norm": 1.3070622682571411, + "learning_rate": 7.96079013812565e-06, + "loss": 0.0818, "step": 13730 }, { - "epoch": 1.0203475419575228, - "grad_norm": 5.034801006317139, - "learning_rate": 2.3877914748254864e-05, - "loss": 0.0837, + "epoch": 2.0406950839150455, + "grad_norm": 0.7084892392158508, + "learning_rate": 7.959304916084956e-06, + "loss": 0.0538, "step": 13740 }, { - "epoch": 1.0210901529778702, - "grad_norm": 2.207108736038208, - "learning_rate": 2.387345908213278e-05, - "loss": 0.0966, + "epoch": 2.0421803059557404, + "grad_norm": 0.5742867588996887, + "learning_rate": 7.957819694044259e-06, + "loss": 0.0875, "step": 13750 }, { - "epoch": 1.0218327639982177, - "grad_norm": 4.187524795532227, - "learning_rate": 2.3869003416010694e-05, - "loss": 0.0873, + "epoch": 2.0436655279964353, + "grad_norm": 0.7937983870506287, + "learning_rate": 7.956334472003565e-06, + "loss": 0.0639, "step": 13760 }, { - "epoch": 1.0225753750185653, - "grad_norm": 2.0011796951293945, - "learning_rate": 2.386454774988861e-05, - "loss": 0.1067, + "epoch": 2.0451507500371306, + "grad_norm": 1.001183271408081, + "learning_rate": 7.95484924996287e-06, + "loss": 0.0621, "step": 13770 }, { - "epoch": 1.0233179860389128, - "grad_norm": 1.419082760810852, - "learning_rate": 2.3860092083766524e-05, - "loss": 0.1013, + "epoch": 2.0466359720778255, + "grad_norm": 0.7804184556007385, + "learning_rate": 7.953364027922174e-06, + "loss": 0.0748, "step": 13780 }, { - "epoch": 1.0240605970592604, - "grad_norm": 1.378090500831604, - "learning_rate": 2.385563641764444e-05, - "loss": 0.0844, + "epoch": 2.048121194118521, + "grad_norm": 0.8443603515625, + "learning_rate": 7.95187880588148e-06, + "loss": 0.0661, "step": 13790 }, { - "epoch": 1.024803208079608, - "grad_norm": 1.8442440032958984, - "learning_rate": 2.3851180751522354e-05, - "loss": 0.0922, + "epoch": 2.049606416159216, + "grad_norm": 0.716850996017456, + "learning_rate": 7.950393583840785e-06, + "loss": 0.0754, "step": 13800 }, { - "epoch": 1.0255458190999553, - "grad_norm": 1.9411593675613403, - "learning_rate": 2.3846725085400266e-05, - "loss": 0.0699, + "epoch": 2.0510916381999107, + "grad_norm": 1.1926591396331787, + "learning_rate": 7.948908361800089e-06, + "loss": 0.0601, "step": 13810 }, { - "epoch": 1.026288430120303, - "grad_norm": 1.1153783798217773, - "learning_rate": 2.3842269419278184e-05, - "loss": 0.0986, + "epoch": 2.052576860240606, + "grad_norm": 1.0652192831039429, + "learning_rate": 7.947423139759395e-06, + "loss": 0.076, "step": 13820 }, { - "epoch": 1.0270310411406505, - "grad_norm": 2.119503974914551, - "learning_rate": 2.3837813753156096e-05, - "loss": 0.0851, + "epoch": 2.054062082281301, + "grad_norm": 1.3740670680999756, + "learning_rate": 7.9459379177187e-06, + "loss": 0.0739, "step": 13830 }, { - "epoch": 1.0277736521609981, - "grad_norm": 2.0890700817108154, - "learning_rate": 2.383335808703401e-05, - "loss": 0.0687, + "epoch": 2.0555473043219963, + "grad_norm": 1.0437966585159302, + "learning_rate": 7.944452695678004e-06, + "loss": 0.0842, "step": 13840 }, { - "epoch": 1.0285162631813456, - "grad_norm": 3.1581473350524902, - "learning_rate": 2.382890242091193e-05, - "loss": 0.0774, + "epoch": 2.057032526362691, + "grad_norm": 1.145579218864441, + "learning_rate": 7.94296747363731e-06, + "loss": 0.0754, "step": 13850 }, { - "epoch": 1.0292588742016933, - "grad_norm": 1.0169066190719604, - "learning_rate": 2.382444675478984e-05, - "loss": 0.0636, + "epoch": 2.0585177484033865, + "grad_norm": 0.6400442123413086, + "learning_rate": 7.941482251596615e-06, + "loss": 0.071, "step": 13860 }, { - "epoch": 1.0300014852220407, - "grad_norm": 1.5723227262496948, - "learning_rate": 2.3819991088667756e-05, - "loss": 0.0878, + "epoch": 2.0600029704440814, + "grad_norm": 0.7294432520866394, + "learning_rate": 7.939997029555919e-06, + "loss": 0.0712, "step": 13870 }, { - "epoch": 1.0307440962423882, - "grad_norm": 1.652784824371338, - "learning_rate": 2.3815535422545674e-05, - "loss": 0.0862, + "epoch": 2.0614881924847763, + "grad_norm": 1.1582225561141968, + "learning_rate": 7.938511807515225e-06, + "loss": 0.0717, "step": 13880 }, { - "epoch": 1.0314867072627358, - "grad_norm": 1.3980462551116943, - "learning_rate": 2.3811079756423586e-05, - "loss": 0.0937, + "epoch": 2.0629734145254717, + "grad_norm": 1.0745748281478882, + "learning_rate": 7.937026585474528e-06, + "loss": 0.0678, "step": 13890 }, { - "epoch": 1.0322293182830833, - "grad_norm": 0.9966709613800049, - "learning_rate": 2.38066240903015e-05, - "loss": 0.0551, + "epoch": 2.0644586365661666, + "grad_norm": 0.6209663152694702, + "learning_rate": 7.935541363433834e-06, + "loss": 0.0594, "step": 13900 }, { - "epoch": 1.032971929303431, - "grad_norm": 1.0643346309661865, - "learning_rate": 2.3802168424179416e-05, - "loss": 0.0647, + "epoch": 2.065943858606862, + "grad_norm": 0.42464518547058105, + "learning_rate": 7.93405614139314e-06, + "loss": 0.0653, "step": 13910 }, { - "epoch": 1.0337145403237784, - "grad_norm": 1.1754989624023438, - "learning_rate": 2.379771275805733e-05, - "loss": 0.0859, + "epoch": 2.067429080647557, + "grad_norm": 1.677648663520813, + "learning_rate": 7.932570919352443e-06, + "loss": 0.0769, "step": 13920 }, { - "epoch": 1.0344571513441259, - "grad_norm": 1.513095736503601, - "learning_rate": 2.3793257091935245e-05, - "loss": 0.1229, + "epoch": 2.0689143026882517, + "grad_norm": 0.6147602796554565, + "learning_rate": 7.931085697311749e-06, + "loss": 0.0738, "step": 13930 }, { - "epoch": 1.0351997623644735, - "grad_norm": 2.310269832611084, - "learning_rate": 2.3788801425813157e-05, - "loss": 0.0641, + "epoch": 2.070399524728947, + "grad_norm": 0.7974849939346313, + "learning_rate": 7.929600475271053e-06, + "loss": 0.0844, "step": 13940 }, { - "epoch": 1.035942373384821, - "grad_norm": 1.0776907205581665, - "learning_rate": 2.3784345759691075e-05, - "loss": 0.0738, + "epoch": 2.071884746769642, + "grad_norm": 1.607678771018982, + "learning_rate": 7.928115253230358e-06, + "loss": 0.0669, "step": 13950 }, { - "epoch": 1.0366849844051687, - "grad_norm": 1.2684451341629028, - "learning_rate": 2.377989009356899e-05, - "loss": 0.1101, + "epoch": 2.0733699688103373, + "grad_norm": 0.8933297991752625, + "learning_rate": 7.926630031189664e-06, + "loss": 0.0798, "step": 13960 }, { - "epoch": 1.037427595425516, - "grad_norm": 2.617946147918701, - "learning_rate": 2.3775434427446902e-05, - "loss": 0.0603, + "epoch": 2.074855190851032, + "grad_norm": 0.7862673997879028, + "learning_rate": 7.925144809148968e-06, + "loss": 0.071, "step": 13970 }, { - "epoch": 1.0381702064458636, - "grad_norm": 2.604550361633301, - "learning_rate": 2.3770978761324817e-05, - "loss": 0.1068, + "epoch": 2.076340412891727, + "grad_norm": 0.622954249382019, + "learning_rate": 7.923659587108273e-06, + "loss": 0.079, "step": 13980 }, { - "epoch": 1.0389128174662112, - "grad_norm": 2.7444822788238525, - "learning_rate": 2.3766523095202735e-05, - "loss": 0.081, + "epoch": 2.0778256349324224, + "grad_norm": 0.48314040899276733, + "learning_rate": 7.922174365067579e-06, + "loss": 0.052, "step": 13990 }, { - "epoch": 1.0396554284865587, - "grad_norm": 2.1153526306152344, - "learning_rate": 2.3762067429080647e-05, - "loss": 0.0752, + "epoch": 2.0793108569731173, + "grad_norm": 0.7916228175163269, + "learning_rate": 7.920689143026883e-06, + "loss": 0.0904, "step": 14000 }, { - "epoch": 1.0403980395069063, - "grad_norm": 2.4944326877593994, - "learning_rate": 2.3757611762958562e-05, - "loss": 0.1094, + "epoch": 2.0807960790138127, + "grad_norm": 0.45953261852264404, + "learning_rate": 7.919203920986188e-06, + "loss": 0.0764, "step": 14010 }, { - "epoch": 1.0411406505272538, - "grad_norm": 2.197497606277466, - "learning_rate": 2.375315609683648e-05, - "loss": 0.0758, + "epoch": 2.0822813010545076, + "grad_norm": 0.6490451693534851, + "learning_rate": 7.917718698945494e-06, + "loss": 0.0581, "step": 14020 }, { - "epoch": 1.0418832615476015, - "grad_norm": 1.1447632312774658, - "learning_rate": 2.3748700430714392e-05, - "loss": 0.0637, + "epoch": 2.083766523095203, + "grad_norm": 1.2762033939361572, + "learning_rate": 7.916233476904798e-06, + "loss": 0.0871, "step": 14030 }, { - "epoch": 1.042625872567949, - "grad_norm": 2.18037486076355, - "learning_rate": 2.3744244764592307e-05, - "loss": 0.0899, + "epoch": 2.085251745135898, + "grad_norm": 0.9953591227531433, + "learning_rate": 7.914748254864103e-06, + "loss": 0.0629, "step": 14040 }, { - "epoch": 1.0433684835882964, - "grad_norm": 1.8311865329742432, - "learning_rate": 2.3739789098470222e-05, - "loss": 0.0924, + "epoch": 2.0867369671765927, + "grad_norm": 0.6095679402351379, + "learning_rate": 7.913263032823407e-06, + "loss": 0.0748, "step": 14050 }, { - "epoch": 1.044111094608644, - "grad_norm": 1.3843854665756226, - "learning_rate": 2.3735333432348137e-05, - "loss": 0.0737, + "epoch": 2.088222189217288, + "grad_norm": 1.2095739841461182, + "learning_rate": 7.911777810782712e-06, + "loss": 0.073, "step": 14060 }, { - "epoch": 1.0448537056289915, - "grad_norm": 2.358302116394043, - "learning_rate": 2.3730877766226052e-05, - "loss": 0.1003, + "epoch": 2.089707411257983, + "grad_norm": 0.7403334379196167, + "learning_rate": 7.910292588742018e-06, + "loss": 0.0924, "step": 14070 }, { - "epoch": 1.0455963166493392, - "grad_norm": 0.5193414688110352, - "learning_rate": 2.3726422100103964e-05, - "loss": 0.0753, + "epoch": 2.0911926332986783, + "grad_norm": 0.6129160523414612, + "learning_rate": 7.908807366701322e-06, + "loss": 0.0688, "step": 14080 }, { - "epoch": 1.0463389276696866, - "grad_norm": 2.2045769691467285, - "learning_rate": 2.3721966433981882e-05, - "loss": 0.0484, + "epoch": 2.0926778553393732, + "grad_norm": 0.6304017305374146, + "learning_rate": 7.907322144660627e-06, + "loss": 0.0662, "step": 14090 }, { - "epoch": 1.047081538690034, - "grad_norm": 1.636837124824524, - "learning_rate": 2.3717510767859797e-05, - "loss": 0.0846, + "epoch": 2.094163077380068, + "grad_norm": 0.8260060548782349, + "learning_rate": 7.905836922619933e-06, + "loss": 0.0894, "step": 14100 }, { - "epoch": 1.0478241497103817, - "grad_norm": 2.898538112640381, - "learning_rate": 2.371305510173771e-05, - "loss": 0.1106, + "epoch": 2.0956482994207635, + "grad_norm": 1.0576426982879639, + "learning_rate": 7.904351700579237e-06, + "loss": 0.0831, "step": 14110 }, { - "epoch": 1.0485667607307292, - "grad_norm": 1.6339201927185059, - "learning_rate": 2.3708599435615627e-05, - "loss": 0.0969, + "epoch": 2.0971335214614584, + "grad_norm": 0.8142070174217224, + "learning_rate": 7.902866478538542e-06, + "loss": 0.0718, "step": 14120 }, { - "epoch": 1.0493093717510769, - "grad_norm": 4.053697109222412, - "learning_rate": 2.3704143769493542e-05, - "loss": 0.0898, + "epoch": 2.0986187435021537, + "grad_norm": 0.44303345680236816, + "learning_rate": 7.901381256497848e-06, + "loss": 0.0687, "step": 14130 }, { - "epoch": 1.0500519827714243, - "grad_norm": 2.793971300125122, - "learning_rate": 2.3699688103371454e-05, - "loss": 0.0996, + "epoch": 2.1001039655428486, + "grad_norm": 1.0897902250289917, + "learning_rate": 7.899896034457152e-06, + "loss": 0.0796, "step": 14140 }, { - "epoch": 1.050794593791772, - "grad_norm": 1.560320258140564, - "learning_rate": 2.369523243724937e-05, - "loss": 0.1084, + "epoch": 2.101589187583544, + "grad_norm": 1.709612488746643, + "learning_rate": 7.898410812416457e-06, + "loss": 0.0658, "step": 14150 }, { - "epoch": 1.0515372048121194, - "grad_norm": 2.7155508995056152, - "learning_rate": 2.3690776771127284e-05, - "loss": 0.0503, + "epoch": 2.103074409624239, + "grad_norm": 1.4813929796218872, + "learning_rate": 7.896925590375761e-06, + "loss": 0.0909, "step": 14160 }, { - "epoch": 1.0522798158324669, - "grad_norm": 1.7181273698806763, - "learning_rate": 2.36863211050052e-05, - "loss": 0.0793, + "epoch": 2.1045596316649338, + "grad_norm": 1.0697726011276245, + "learning_rate": 7.895440368335067e-06, + "loss": 0.0604, "step": 14170 }, { - "epoch": 1.0530224268528146, - "grad_norm": 3.6145498752593994, - "learning_rate": 2.3681865438883114e-05, - "loss": 0.0727, + "epoch": 2.106044853705629, + "grad_norm": 0.8308620452880859, + "learning_rate": 7.893955146294372e-06, + "loss": 0.0819, "step": 14180 }, { - "epoch": 1.053765037873162, - "grad_norm": 1.1024489402770996, - "learning_rate": 2.367740977276103e-05, - "loss": 0.0675, + "epoch": 2.107530075746324, + "grad_norm": 0.589131236076355, + "learning_rate": 7.892469924253676e-06, + "loss": 0.0403, "step": 14190 }, { - "epoch": 1.0545076488935097, - "grad_norm": 1.619287371635437, - "learning_rate": 2.3672954106638944e-05, - "loss": 0.0636, + "epoch": 2.1090152977870193, + "grad_norm": 1.19193696975708, + "learning_rate": 7.890984702212982e-06, + "loss": 0.0693, "step": 14200 }, { - "epoch": 1.0552502599138571, - "grad_norm": 2.327834367752075, - "learning_rate": 2.366849844051686e-05, - "loss": 0.0938, + "epoch": 2.1105005198277142, + "grad_norm": 0.6469309329986572, + "learning_rate": 7.889499480172287e-06, + "loss": 0.0679, "step": 14210 }, { - "epoch": 1.0559928709342046, - "grad_norm": 2.3670992851257324, - "learning_rate": 2.366404277439477e-05, - "loss": 0.0786, + "epoch": 2.111985741868409, + "grad_norm": 0.9464824795722961, + "learning_rate": 7.888014258131591e-06, + "loss": 0.0708, "step": 14220 }, { - "epoch": 1.0567354819545522, - "grad_norm": 2.3187897205352783, - "learning_rate": 2.365958710827269e-05, - "loss": 0.1029, + "epoch": 2.1134709639091045, + "grad_norm": 0.47763773798942566, + "learning_rate": 7.886529036090896e-06, + "loss": 0.0622, "step": 14230 }, { - "epoch": 1.0574780929748997, - "grad_norm": 2.0554943084716797, - "learning_rate": 2.3655131442150604e-05, - "loss": 0.0771, + "epoch": 2.1149561859497994, + "grad_norm": 0.7277671694755554, + "learning_rate": 7.885043814050202e-06, + "loss": 0.0795, "step": 14240 }, { - "epoch": 1.0582207039952474, - "grad_norm": 1.3860465288162231, - "learning_rate": 2.3650675776028515e-05, - "loss": 0.0645, + "epoch": 2.1164414079904947, + "grad_norm": 1.1749573945999146, + "learning_rate": 7.883558592009506e-06, + "loss": 0.0734, "step": 14250 }, { - "epoch": 1.0589633150155948, - "grad_norm": 0.33183351159095764, - "learning_rate": 2.3646220109906434e-05, - "loss": 0.0551, + "epoch": 2.1179266300311896, + "grad_norm": 0.9396737813949585, + "learning_rate": 7.88207336996881e-06, + "loss": 0.0711, "step": 14260 }, { - "epoch": 1.0597059260359423, - "grad_norm": 0.7407335042953491, - "learning_rate": 2.3641764443784345e-05, - "loss": 0.1163, + "epoch": 2.1194118520718845, + "grad_norm": 1.0463459491729736, + "learning_rate": 7.880588147928115e-06, + "loss": 0.0726, "step": 14270 }, { - "epoch": 1.06044853705629, - "grad_norm": 1.9329426288604736, - "learning_rate": 2.363730877766226e-05, - "loss": 0.0972, + "epoch": 2.12089707411258, + "grad_norm": 0.6679229140281677, + "learning_rate": 7.879102925887421e-06, + "loss": 0.0535, "step": 14280 }, { - "epoch": 1.0611911480766374, - "grad_norm": 0.46645310521125793, - "learning_rate": 2.363285311154018e-05, - "loss": 0.0547, + "epoch": 2.122382296153275, + "grad_norm": 0.7594313621520996, + "learning_rate": 7.877617703846726e-06, + "loss": 0.0616, "step": 14290 }, { - "epoch": 1.061933759096985, - "grad_norm": 0.9803817272186279, - "learning_rate": 2.362839744541809e-05, - "loss": 0.0556, + "epoch": 2.12386751819397, + "grad_norm": 0.7679316997528076, + "learning_rate": 7.87613248180603e-06, + "loss": 0.0522, "step": 14300 }, { - "epoch": 1.0626763701173325, - "grad_norm": 2.7999866008758545, - "learning_rate": 2.3623941779296005e-05, - "loss": 0.0965, + "epoch": 2.125352740234665, + "grad_norm": 0.5544955134391785, + "learning_rate": 7.874647259765336e-06, + "loss": 0.0646, "step": 14310 }, { - "epoch": 1.0634189811376802, - "grad_norm": 0.8801766037940979, - "learning_rate": 2.361948611317392e-05, - "loss": 0.0768, + "epoch": 2.1268379622753604, + "grad_norm": 0.9552668929100037, + "learning_rate": 7.87316203772464e-06, + "loss": 0.0577, "step": 14320 }, { - "epoch": 1.0641615921580276, - "grad_norm": 1.0280200242996216, - "learning_rate": 2.3615030447051835e-05, - "loss": 0.0472, + "epoch": 2.1283231843160553, + "grad_norm": 1.1328357458114624, + "learning_rate": 7.871676815683945e-06, + "loss": 0.0647, "step": 14330 }, { - "epoch": 1.064904203178375, - "grad_norm": 1.479634165763855, - "learning_rate": 2.361057478092975e-05, - "loss": 0.0671, + "epoch": 2.12980840635675, + "grad_norm": 1.2579344511032104, + "learning_rate": 7.870191593643251e-06, + "loss": 0.0701, "step": 14340 }, { - "epoch": 1.0656468141987228, - "grad_norm": 0.7711525559425354, - "learning_rate": 2.3606119114807662e-05, - "loss": 0.0725, + "epoch": 2.1312936283974455, + "grad_norm": 0.7890368700027466, + "learning_rate": 7.868706371602554e-06, + "loss": 0.0574, "step": 14350 }, { - "epoch": 1.0663894252190702, - "grad_norm": 2.1043448448181152, - "learning_rate": 2.360166344868558e-05, - "loss": 0.0788, + "epoch": 2.1327788504381404, + "grad_norm": 0.752335786819458, + "learning_rate": 7.86722114956186e-06, + "loss": 0.0522, "step": 14360 }, { - "epoch": 1.0671320362394179, - "grad_norm": 1.8969895839691162, - "learning_rate": 2.3597207782563495e-05, - "loss": 0.0519, + "epoch": 2.1342640724788358, + "grad_norm": 0.8492644429206848, + "learning_rate": 7.865735927521166e-06, + "loss": 0.07, "step": 14370 }, { - "epoch": 1.0678746472597653, - "grad_norm": 3.172367811203003, - "learning_rate": 2.3592752116441407e-05, - "loss": 0.1093, + "epoch": 2.1357492945195307, + "grad_norm": 0.9102494120597839, + "learning_rate": 7.864250705480469e-06, + "loss": 0.0544, "step": 14380 }, { - "epoch": 1.0686172582801128, - "grad_norm": 2.328660726547241, - "learning_rate": 2.358829645031932e-05, - "loss": 0.0743, + "epoch": 2.1372345165602256, + "grad_norm": 0.9324311017990112, + "learning_rate": 7.862765483439775e-06, + "loss": 0.059, "step": 14390 }, { - "epoch": 1.0693598693004605, - "grad_norm": 1.0215742588043213, - "learning_rate": 2.358384078419724e-05, - "loss": 0.0577, + "epoch": 2.138719738600921, + "grad_norm": 1.1309044361114502, + "learning_rate": 7.86128026139908e-06, + "loss": 0.0547, "step": 14400 }, { - "epoch": 1.070102480320808, - "grad_norm": 2.7807462215423584, - "learning_rate": 2.357938511807515e-05, - "loss": 0.0538, + "epoch": 2.140204960641616, + "grad_norm": 1.2628252506256104, + "learning_rate": 7.859795039358384e-06, + "loss": 0.0823, "step": 14410 }, { - "epoch": 1.0708450913411556, - "grad_norm": 2.544025421142578, - "learning_rate": 2.3574929451953067e-05, - "loss": 0.0984, + "epoch": 2.141690182682311, + "grad_norm": 1.4814199209213257, + "learning_rate": 7.85830981731769e-06, + "loss": 0.0833, "step": 14420 }, { - "epoch": 1.071587702361503, - "grad_norm": 1.8840546607971191, - "learning_rate": 2.3570473785830985e-05, - "loss": 0.0891, + "epoch": 2.143175404723006, + "grad_norm": 0.6828455328941345, + "learning_rate": 7.856824595276994e-06, + "loss": 0.0692, "step": 14430 }, { - "epoch": 1.0723303133818507, - "grad_norm": 1.2693723440170288, - "learning_rate": 2.3566018119708897e-05, - "loss": 0.098, + "epoch": 2.1446606267637014, + "grad_norm": 1.0615395307540894, + "learning_rate": 7.855339373236299e-06, + "loss": 0.0823, "step": 14440 }, { - "epoch": 1.0730729244021981, - "grad_norm": 2.465930938720703, - "learning_rate": 2.356156245358681e-05, - "loss": 0.1154, + "epoch": 2.1461458488043963, + "grad_norm": 1.5419204235076904, + "learning_rate": 7.853854151195605e-06, + "loss": 0.0617, "step": 14450 }, { - "epoch": 1.0738155354225456, - "grad_norm": 3.5287487506866455, - "learning_rate": 2.3557106787464727e-05, - "loss": 0.0794, + "epoch": 2.147631070845091, + "grad_norm": 0.6285436749458313, + "learning_rate": 7.85236892915491e-06, + "loss": 0.075, "step": 14460 }, { - "epoch": 1.0745581464428933, - "grad_norm": 2.1311392784118652, - "learning_rate": 2.355265112134264e-05, - "loss": 0.0911, + "epoch": 2.1491162928857865, + "grad_norm": 0.6257336139678955, + "learning_rate": 7.850883707114214e-06, + "loss": 0.0792, "step": 14470 }, { - "epoch": 1.0753007574632407, - "grad_norm": 1.7194443941116333, - "learning_rate": 2.3548195455220557e-05, - "loss": 0.0689, + "epoch": 2.1506015149264814, + "grad_norm": 1.2522633075714111, + "learning_rate": 7.84939848507352e-06, + "loss": 0.0864, "step": 14480 }, { - "epoch": 1.0760433684835884, - "grad_norm": 1.9258701801300049, - "learning_rate": 2.3543739789098468e-05, - "loss": 0.0488, + "epoch": 2.152086736967177, + "grad_norm": 1.2310116291046143, + "learning_rate": 7.847913263032824e-06, + "loss": 0.0731, "step": 14490 }, { - "epoch": 1.0767859795039358, - "grad_norm": 1.2407386302947998, - "learning_rate": 2.3539284122976387e-05, - "loss": 0.066, + "epoch": 2.1535719590078717, + "grad_norm": 0.6669400930404663, + "learning_rate": 7.846428040992129e-06, + "loss": 0.0697, "step": 14500 }, { - "epoch": 1.0775285905242833, - "grad_norm": 0.2325424998998642, - "learning_rate": 2.35348284568543e-05, - "loss": 0.0667, + "epoch": 2.1550571810485666, + "grad_norm": 1.038515567779541, + "learning_rate": 7.844942818951435e-06, + "loss": 0.0625, "step": 14510 }, { - "epoch": 1.078271201544631, - "grad_norm": 0.7023411989212036, - "learning_rate": 2.3530372790732213e-05, - "loss": 0.0774, + "epoch": 2.156542403089262, + "grad_norm": 0.7487426400184631, + "learning_rate": 7.843457596910738e-06, + "loss": 0.0748, "step": 14520 }, { - "epoch": 1.0790138125649784, - "grad_norm": 0.42877697944641113, - "learning_rate": 2.352591712461013e-05, - "loss": 0.0441, + "epoch": 2.158027625129957, + "grad_norm": 0.6837989687919617, + "learning_rate": 7.841972374870044e-06, + "loss": 0.0585, "step": 14530 }, { - "epoch": 1.079756423585326, - "grad_norm": 0.8632937669754028, - "learning_rate": 2.3521461458488047e-05, - "loss": 0.0564, + "epoch": 2.159512847170652, + "grad_norm": 1.1205534934997559, + "learning_rate": 7.840487152829348e-06, + "loss": 0.0982, "step": 14540 }, { - "epoch": 1.0804990346056735, - "grad_norm": 7.092894077301025, - "learning_rate": 2.3517005792365958e-05, - "loss": 0.0802, + "epoch": 2.160998069211347, + "grad_norm": 0.7028797268867493, + "learning_rate": 7.839001930788653e-06, + "loss": 0.0544, "step": 14550 }, { - "epoch": 1.081241645626021, - "grad_norm": 2.007236957550049, - "learning_rate": 2.3512550126243873e-05, - "loss": 0.1011, + "epoch": 2.162483291252042, + "grad_norm": 0.5611459016799927, + "learning_rate": 7.837516708747959e-06, + "loss": 0.0509, "step": 14560 }, { - "epoch": 1.0819842566463687, - "grad_norm": 2.4065308570861816, - "learning_rate": 2.3508094460121788e-05, - "loss": 0.0985, + "epoch": 2.1639685132927373, + "grad_norm": 0.8358970880508423, + "learning_rate": 7.836031486707263e-06, + "loss": 0.0815, "step": 14570 }, { - "epoch": 1.082726867666716, - "grad_norm": 4.827093601226807, - "learning_rate": 2.3503638793999703e-05, - "loss": 0.1154, + "epoch": 2.165453735333432, + "grad_norm": 0.9127787351608276, + "learning_rate": 7.834546264666568e-06, + "loss": 0.0774, "step": 14580 }, { - "epoch": 1.0834694786870638, - "grad_norm": 2.402787208557129, - "learning_rate": 2.3499183127877618e-05, - "loss": 0.0893, + "epoch": 2.1669389573741276, + "grad_norm": 1.102961778640747, + "learning_rate": 7.833061042625874e-06, + "loss": 0.0639, "step": 14590 }, { - "epoch": 1.0842120897074112, - "grad_norm": 1.0034596920013428, - "learning_rate": 2.3494727461755533e-05, - "loss": 0.1013, + "epoch": 2.1684241794148225, + "grad_norm": 1.123892068862915, + "learning_rate": 7.831575820585178e-06, + "loss": 0.066, "step": 14600 }, { - "epoch": 1.084954700727759, - "grad_norm": 1.7055171728134155, - "learning_rate": 2.3490271795633448e-05, - "loss": 0.0918, + "epoch": 2.169909401455518, + "grad_norm": 1.0453194379806519, + "learning_rate": 7.830090598544483e-06, + "loss": 0.0584, "step": 14610 }, { - "epoch": 1.0856973117481064, - "grad_norm": 2.0638813972473145, - "learning_rate": 2.3485816129511363e-05, - "loss": 0.0888, + "epoch": 2.1713946234962127, + "grad_norm": 0.4781140983104706, + "learning_rate": 7.828605376503789e-06, + "loss": 0.0727, "step": 14620 }, { - "epoch": 1.0864399227684538, - "grad_norm": 1.2625740766525269, - "learning_rate": 2.3481360463389275e-05, - "loss": 0.0901, + "epoch": 2.1728798455369076, + "grad_norm": 0.7792844772338867, + "learning_rate": 7.827120154463093e-06, + "loss": 0.0669, "step": 14630 }, { - "epoch": 1.0871825337888015, - "grad_norm": 1.9347448348999023, - "learning_rate": 2.3476904797267193e-05, - "loss": 0.0508, + "epoch": 2.174365067577603, + "grad_norm": 1.3519136905670166, + "learning_rate": 7.825634932422398e-06, + "loss": 0.0652, "step": 14640 }, { - "epoch": 1.087925144809149, - "grad_norm": 1.2374624013900757, - "learning_rate": 2.3472449131145108e-05, - "loss": 0.0544, + "epoch": 2.175850289618298, + "grad_norm": 1.1446763277053833, + "learning_rate": 7.824149710381704e-06, + "loss": 0.0702, "step": 14650 }, { - "epoch": 1.0886677558294966, - "grad_norm": 1.0130029916763306, - "learning_rate": 2.346799346502302e-05, - "loss": 0.0371, + "epoch": 2.177335511658993, + "grad_norm": 0.6634364128112793, + "learning_rate": 7.822664488341008e-06, + "loss": 0.0548, "step": 14660 }, { - "epoch": 1.089410366849844, - "grad_norm": 2.272545576095581, - "learning_rate": 2.3463537798900938e-05, - "loss": 0.0668, + "epoch": 2.178820733699688, + "grad_norm": 0.37931519746780396, + "learning_rate": 7.821179266300313e-06, + "loss": 0.0889, "step": 14670 }, { - "epoch": 1.0901529778701915, - "grad_norm": 1.1492127180099487, - "learning_rate": 2.345908213277885e-05, - "loss": 0.0909, + "epoch": 2.180305955740383, + "grad_norm": 0.9678369164466858, + "learning_rate": 7.819694044259617e-06, + "loss": 0.081, "step": 14680 }, { - "epoch": 1.0908955888905392, - "grad_norm": 0.4936734735965729, - "learning_rate": 2.3454626466656765e-05, - "loss": 0.0579, + "epoch": 2.1817911777810783, + "grad_norm": 0.8365264534950256, + "learning_rate": 7.818208822218922e-06, + "loss": 0.0622, "step": 14690 }, { - "epoch": 1.0916381999108866, - "grad_norm": 1.2032221555709839, - "learning_rate": 2.3450170800534683e-05, - "loss": 0.0953, + "epoch": 2.1832763998217732, + "grad_norm": 0.43657487630844116, + "learning_rate": 7.816723600178228e-06, + "loss": 0.0578, "step": 14700 }, { - "epoch": 1.0923808109312343, - "grad_norm": 1.325255036354065, - "learning_rate": 2.3445715134412595e-05, - "loss": 0.0653, + "epoch": 2.1847616218624686, + "grad_norm": 0.68485426902771, + "learning_rate": 7.815238378137532e-06, + "loss": 0.0579, "step": 14710 }, { - "epoch": 1.0931234219515817, - "grad_norm": 2.2962052822113037, - "learning_rate": 2.344125946829051e-05, - "loss": 0.0618, + "epoch": 2.1862468439031635, + "grad_norm": 0.9559560418128967, + "learning_rate": 7.813753156096837e-06, + "loss": 0.0736, "step": 14720 }, { - "epoch": 1.0938660329719294, - "grad_norm": 0.9786393642425537, - "learning_rate": 2.3436803802168425e-05, - "loss": 0.0784, + "epoch": 2.187732065943859, + "grad_norm": 1.7984938621520996, + "learning_rate": 7.812267934056143e-06, + "loss": 0.0719, "step": 14730 }, { - "epoch": 1.0946086439922769, - "grad_norm": 1.7134709358215332, - "learning_rate": 2.343234813604634e-05, - "loss": 0.0592, + "epoch": 2.1892172879845537, + "grad_norm": 1.2785571813583374, + "learning_rate": 7.810782712015447e-06, + "loss": 0.0755, "step": 14740 }, { - "epoch": 1.0953512550126243, - "grad_norm": 0.9471766948699951, - "learning_rate": 2.3427892469924255e-05, - "loss": 0.112, + "epoch": 2.1907025100252486, + "grad_norm": 0.6600275039672852, + "learning_rate": 7.809297489974752e-06, + "loss": 0.0782, "step": 14750 }, { - "epoch": 1.096093866032972, - "grad_norm": 0.9687842130661011, - "learning_rate": 2.342343680380217e-05, - "loss": 0.0799, + "epoch": 2.192187732065944, + "grad_norm": 0.6349477171897888, + "learning_rate": 7.807812267934058e-06, + "loss": 0.0792, "step": 14760 }, { - "epoch": 1.0968364770533194, - "grad_norm": 1.6770538091659546, - "learning_rate": 2.3418981137680085e-05, - "loss": 0.0894, + "epoch": 2.193672954106639, + "grad_norm": 0.6333335041999817, + "learning_rate": 7.806327045893362e-06, + "loss": 0.0767, "step": 14770 }, { - "epoch": 1.097579088073667, - "grad_norm": 2.252725124359131, - "learning_rate": 2.3414525471558e-05, - "loss": 0.0903, + "epoch": 2.195158176147334, + "grad_norm": 0.6242454051971436, + "learning_rate": 7.804841823852667e-06, + "loss": 0.0714, "step": 14780 }, { - "epoch": 1.0983216990940146, - "grad_norm": 1.531714677810669, - "learning_rate": 2.341006980543591e-05, - "loss": 0.0644, + "epoch": 2.196643398188029, + "grad_norm": 0.8708824515342712, + "learning_rate": 7.803356601811971e-06, + "loss": 0.0789, "step": 14790 }, { - "epoch": 1.099064310114362, - "grad_norm": 2.1551620960235596, - "learning_rate": 2.3405614139313826e-05, - "loss": 0.073, + "epoch": 2.198128620228724, + "grad_norm": 0.5887396931648254, + "learning_rate": 7.801871379771277e-06, + "loss": 0.0534, "step": 14800 }, { - "epoch": 1.0998069211347097, - "grad_norm": 1.436985969543457, - "learning_rate": 2.3401158473191745e-05, - "loss": 0.0695, + "epoch": 2.1996138422694194, + "grad_norm": 0.9256582260131836, + "learning_rate": 7.800386157730582e-06, + "loss": 0.0624, "step": 14810 }, { - "epoch": 1.1005495321550571, - "grad_norm": 2.5473999977111816, - "learning_rate": 2.3396702807069656e-05, - "loss": 0.037, + "epoch": 2.2010990643101143, + "grad_norm": 1.3295891284942627, + "learning_rate": 7.798900935689886e-06, + "loss": 0.0695, "step": 14820 }, { - "epoch": 1.1012921431754048, - "grad_norm": 2.2128050327301025, - "learning_rate": 2.339224714094757e-05, - "loss": 0.0767, + "epoch": 2.2025842863508096, + "grad_norm": 1.114598035812378, + "learning_rate": 7.79741571364919e-06, + "loss": 0.0633, "step": 14830 }, { - "epoch": 1.1020347541957523, - "grad_norm": 0.8083871006965637, - "learning_rate": 2.338779147482549e-05, - "loss": 0.0557, + "epoch": 2.2040695083915045, + "grad_norm": 0.6609292030334473, + "learning_rate": 7.795930491608497e-06, + "loss": 0.0658, "step": 14840 }, { - "epoch": 1.1027773652160997, - "grad_norm": 2.2617440223693848, - "learning_rate": 2.33833358087034e-05, - "loss": 0.0922, + "epoch": 2.2055547304321994, + "grad_norm": 2.0195000171661377, + "learning_rate": 7.794445269567801e-06, + "loss": 0.0824, "step": 14850 }, { - "epoch": 1.1035199762364474, - "grad_norm": 0.7747202515602112, - "learning_rate": 2.3378880142581316e-05, - "loss": 0.0625, + "epoch": 2.2070399524728948, + "grad_norm": 0.7190861105918884, + "learning_rate": 7.792960047527105e-06, + "loss": 0.0574, "step": 14860 }, { - "epoch": 1.1042625872567948, - "grad_norm": 1.1605490446090698, - "learning_rate": 2.337442447645923e-05, - "loss": 0.0792, + "epoch": 2.2085251745135897, + "grad_norm": 0.8353859782218933, + "learning_rate": 7.79147482548641e-06, + "loss": 0.0959, "step": 14870 }, { - "epoch": 1.1050051982771425, - "grad_norm": 1.8753223419189453, - "learning_rate": 2.3369968810337146e-05, - "loss": 0.0943, + "epoch": 2.210010396554285, + "grad_norm": 0.5273255109786987, + "learning_rate": 7.789989603445716e-06, + "loss": 0.057, "step": 14880 }, { - "epoch": 1.10574780929749, - "grad_norm": 0.9175904393196106, - "learning_rate": 2.336551314421506e-05, - "loss": 0.0921, + "epoch": 2.21149561859498, + "grad_norm": 1.0599677562713623, + "learning_rate": 7.78850438140502e-06, + "loss": 0.0713, "step": 14890 }, { - "epoch": 1.1064904203178376, - "grad_norm": 0.9853323101997375, - "learning_rate": 2.3361057478092973e-05, - "loss": 0.0689, + "epoch": 2.2129808406356752, + "grad_norm": 1.4016988277435303, + "learning_rate": 7.787019159364325e-06, + "loss": 0.074, "step": 14900 }, { - "epoch": 1.107233031338185, - "grad_norm": 1.734784722328186, - "learning_rate": 2.335660181197089e-05, - "loss": 0.0748, + "epoch": 2.21446606267637, + "grad_norm": 0.2593357264995575, + "learning_rate": 7.785533937323631e-06, + "loss": 0.0642, "step": 14910 }, { - "epoch": 1.1079756423585325, - "grad_norm": 3.9313881397247314, - "learning_rate": 2.3352146145848806e-05, - "loss": 0.0829, + "epoch": 2.215951284717065, + "grad_norm": 1.8210548162460327, + "learning_rate": 7.784048715282935e-06, + "loss": 0.0722, "step": 14920 }, { - "epoch": 1.1087182533788802, - "grad_norm": 0.6658304929733276, - "learning_rate": 2.3347690479726718e-05, - "loss": 0.0597, + "epoch": 2.2174365067577604, + "grad_norm": 0.4837227761745453, + "learning_rate": 7.78256349324224e-06, + "loss": 0.0755, "step": 14930 }, { - "epoch": 1.1094608643992276, - "grad_norm": 1.8570702075958252, - "learning_rate": 2.3343234813604636e-05, - "loss": 0.0969, + "epoch": 2.2189217287984553, + "grad_norm": 0.8653165102005005, + "learning_rate": 7.781078271201546e-06, + "loss": 0.0689, "step": 14940 }, { - "epoch": 1.1102034754195753, - "grad_norm": 2.4052164554595947, - "learning_rate": 2.333877914748255e-05, - "loss": 0.0937, + "epoch": 2.2204069508391506, + "grad_norm": 0.6423892378807068, + "learning_rate": 7.77959304916085e-06, + "loss": 0.0693, "step": 14950 }, { - "epoch": 1.1109460864399228, - "grad_norm": 2.2822628021240234, - "learning_rate": 2.3334323481360463e-05, - "loss": 0.086, + "epoch": 2.2218921728798455, + "grad_norm": 1.4939004182815552, + "learning_rate": 7.778107827120155e-06, + "loss": 0.0878, "step": 14960 }, { - "epoch": 1.1116886974602702, - "grad_norm": 4.604571342468262, - "learning_rate": 2.3329867815238378e-05, - "loss": 0.1085, + "epoch": 2.2233773949205404, + "grad_norm": 0.8596535921096802, + "learning_rate": 7.776622605079461e-06, + "loss": 0.0774, "step": 14970 }, { - "epoch": 1.1124313084806179, - "grad_norm": 2.1120212078094482, - "learning_rate": 2.3325412149116293e-05, - "loss": 0.0738, + "epoch": 2.2248626169612358, + "grad_norm": 0.9801391363143921, + "learning_rate": 7.775137383038764e-06, + "loss": 0.0602, "step": 14980 }, { - "epoch": 1.1131739195009653, - "grad_norm": 2.652463912963867, - "learning_rate": 2.3320956482994208e-05, - "loss": 0.0612, + "epoch": 2.2263478390019307, + "grad_norm": 0.5129373669624329, + "learning_rate": 7.77365216099807e-06, + "loss": 0.0402, "step": 14990 }, { - "epoch": 1.113916530521313, - "grad_norm": 1.0508460998535156, - "learning_rate": 2.3316500816872123e-05, - "loss": 0.0624, + "epoch": 2.227833061042626, + "grad_norm": 1.0586576461791992, + "learning_rate": 7.772166938957374e-06, + "loss": 0.0696, "step": 15000 }, { - "epoch": 1.1146591415416605, - "grad_norm": 2.2255547046661377, - "learning_rate": 2.3312045150750038e-05, - "loss": 0.0836, + "epoch": 2.229318283083321, + "grad_norm": 0.7149041891098022, + "learning_rate": 7.770681716916679e-06, + "loss": 0.0877, "step": 15010 }, { - "epoch": 1.1154017525620081, - "grad_norm": 1.7754733562469482, - "learning_rate": 2.3307589484627953e-05, - "loss": 0.1167, + "epoch": 2.2308035051240163, + "grad_norm": 1.1977014541625977, + "learning_rate": 7.769196494875985e-06, + "loss": 0.0783, "step": 15020 }, { - "epoch": 1.1161443635823556, - "grad_norm": 1.857144832611084, - "learning_rate": 2.3303133818505868e-05, - "loss": 0.0596, + "epoch": 2.232288727164711, + "grad_norm": 0.6709617376327515, + "learning_rate": 7.76771127283529e-06, + "loss": 0.0624, "step": 15030 }, { - "epoch": 1.116886974602703, - "grad_norm": 3.7554049491882324, - "learning_rate": 2.3298678152383783e-05, - "loss": 0.0647, + "epoch": 2.233773949205406, + "grad_norm": 0.6660168766975403, + "learning_rate": 7.766226050794594e-06, + "loss": 0.0642, "step": 15040 }, { - "epoch": 1.1176295856230507, - "grad_norm": 3.861762762069702, - "learning_rate": 2.3294222486261698e-05, - "loss": 0.083, + "epoch": 2.2352591712461014, + "grad_norm": 0.5352195501327515, + "learning_rate": 7.7647408287539e-06, + "loss": 0.0554, "step": 15050 }, { - "epoch": 1.1183721966433982, - "grad_norm": 0.333187997341156, - "learning_rate": 2.3289766820139613e-05, - "loss": 0.0567, + "epoch": 2.2367443932867963, + "grad_norm": 1.1467076539993286, + "learning_rate": 7.763255606713204e-06, + "loss": 0.072, "step": 15060 }, { - "epoch": 1.1191148076637458, - "grad_norm": 3.2010586261749268, - "learning_rate": 2.3285311154017524e-05, - "loss": 0.0925, + "epoch": 2.2382296153274917, + "grad_norm": 0.9584192037582397, + "learning_rate": 7.761770384672509e-06, + "loss": 0.0721, "step": 15070 }, { - "epoch": 1.1198574186840933, - "grad_norm": 3.66748046875, - "learning_rate": 2.3280855487895443e-05, - "loss": 0.0884, + "epoch": 2.2397148373681866, + "grad_norm": 1.2189011573791504, + "learning_rate": 7.760285162631815e-06, + "loss": 0.0661, "step": 15080 }, { - "epoch": 1.1206000297044407, - "grad_norm": 0.8364987969398499, - "learning_rate": 2.3276399821773354e-05, - "loss": 0.0644, + "epoch": 2.2412000594088815, + "grad_norm": 1.0347647666931152, + "learning_rate": 7.75879994059112e-06, + "loss": 0.089, "step": 15090 }, { - "epoch": 1.1213426407247884, - "grad_norm": 2.9807636737823486, - "learning_rate": 2.327194415565127e-05, - "loss": 0.0687, + "epoch": 2.242685281449577, + "grad_norm": 1.1839691400527954, + "learning_rate": 7.757314718550424e-06, + "loss": 0.0834, "step": 15100 }, { - "epoch": 1.1220852517451358, - "grad_norm": 0.977165937423706, - "learning_rate": 2.3267488489529188e-05, - "loss": 0.0714, + "epoch": 2.2441705034902717, + "grad_norm": 0.8672871589660645, + "learning_rate": 7.75582949650973e-06, + "loss": 0.0763, "step": 15110 }, { - "epoch": 1.1228278627654835, - "grad_norm": 1.4593790769577026, - "learning_rate": 2.32630328234071e-05, - "loss": 0.0698, + "epoch": 2.245655725530967, + "grad_norm": 0.44413626194000244, + "learning_rate": 7.754344274469033e-06, + "loss": 0.0745, "step": 15120 }, { - "epoch": 1.123570473785831, - "grad_norm": 0.9217209219932556, - "learning_rate": 2.3258577157285014e-05, - "loss": 0.0764, + "epoch": 2.247140947571662, + "grad_norm": 1.441617488861084, + "learning_rate": 7.752859052428339e-06, + "loss": 0.0943, "step": 15130 }, { - "epoch": 1.1243130848061784, - "grad_norm": 1.4398505687713623, - "learning_rate": 2.325412149116293e-05, - "loss": 0.1199, + "epoch": 2.248626169612357, + "grad_norm": 1.0939092636108398, + "learning_rate": 7.751373830387645e-06, + "loss": 0.0822, "step": 15140 }, { - "epoch": 1.125055695826526, - "grad_norm": 1.7291533946990967, - "learning_rate": 2.3249665825040844e-05, - "loss": 0.0899, + "epoch": 2.250111391653052, + "grad_norm": 0.6537975668907166, + "learning_rate": 7.749888608346948e-06, + "loss": 0.0677, "step": 15150 }, { - "epoch": 1.1257983068468735, - "grad_norm": 2.176523208618164, - "learning_rate": 2.324521015891876e-05, - "loss": 0.0746, + "epoch": 2.251596613693747, + "grad_norm": 1.4885673522949219, + "learning_rate": 7.748403386306254e-06, + "loss": 0.0616, "step": 15160 }, { - "epoch": 1.1265409178672212, - "grad_norm": 2.276003122329712, - "learning_rate": 2.3240754492796674e-05, - "loss": 0.1026, + "epoch": 2.2530818357344424, + "grad_norm": 0.9456076622009277, + "learning_rate": 7.746918164265558e-06, + "loss": 0.0715, "step": 15170 }, { - "epoch": 1.1272835288875687, - "grad_norm": 2.0524089336395264, - "learning_rate": 2.323629882667459e-05, - "loss": 0.0692, + "epoch": 2.2545670577751373, + "grad_norm": 0.8747375011444092, + "learning_rate": 7.745432942224863e-06, + "loss": 0.0965, "step": 15180 }, { - "epoch": 1.1280261399079161, - "grad_norm": 0.9038380980491638, - "learning_rate": 2.3231843160552504e-05, - "loss": 0.0775, + "epoch": 2.2560522798158322, + "grad_norm": 0.8469992876052856, + "learning_rate": 7.743947720184169e-06, + "loss": 0.0631, "step": 15190 }, { - "epoch": 1.1287687509282638, - "grad_norm": 2.3695902824401855, - "learning_rate": 2.3227387494430416e-05, - "loss": 0.1008, + "epoch": 2.2575375018565276, + "grad_norm": 1.1663897037506104, + "learning_rate": 7.742462498143473e-06, + "loss": 0.0689, "step": 15200 }, { - "epoch": 1.1295113619486112, - "grad_norm": 1.6643588542938232, - "learning_rate": 2.322293182830833e-05, - "loss": 0.0637, + "epoch": 2.2590227238972225, + "grad_norm": 1.6151713132858276, + "learning_rate": 7.740977276102778e-06, + "loss": 0.0616, "step": 15210 }, { - "epoch": 1.130253972968959, - "grad_norm": 0.4091331660747528, - "learning_rate": 2.321847616218625e-05, - "loss": 0.0768, + "epoch": 2.260507945937918, + "grad_norm": 0.8520883917808533, + "learning_rate": 7.739492054062084e-06, + "loss": 0.0769, "step": 15220 }, { - "epoch": 1.1309965839893064, - "grad_norm": 2.0745925903320312, - "learning_rate": 2.321402049606416e-05, - "loss": 0.082, + "epoch": 2.2619931679786127, + "grad_norm": 0.6212167143821716, + "learning_rate": 7.738006832021388e-06, + "loss": 0.076, "step": 15230 }, { - "epoch": 1.131739195009654, - "grad_norm": 3.2917673587799072, - "learning_rate": 2.3209564829942076e-05, - "loss": 0.1074, + "epoch": 2.263478390019308, + "grad_norm": 0.7191680073738098, + "learning_rate": 7.736521609980693e-06, + "loss": 0.0513, "step": 15240 }, { - "epoch": 1.1324818060300015, - "grad_norm": 2.7250659465789795, - "learning_rate": 2.3205109163819994e-05, - "loss": 0.0742, + "epoch": 2.264963612060003, + "grad_norm": 1.244286298751831, + "learning_rate": 7.735036387939999e-06, + "loss": 0.0731, "step": 15250 }, { - "epoch": 1.133224417050349, - "grad_norm": 0.4812146723270416, - "learning_rate": 2.3200653497697906e-05, - "loss": 0.0644, + "epoch": 2.266448834100698, + "grad_norm": 0.6261264681816101, + "learning_rate": 7.733551165899303e-06, + "loss": 0.0614, "step": 15260 }, { - "epoch": 1.1339670280706966, - "grad_norm": 1.7859218120574951, - "learning_rate": 2.319619783157582e-05, - "loss": 0.1025, + "epoch": 2.267934056141393, + "grad_norm": 0.5021324753761292, + "learning_rate": 7.732065943858608e-06, + "loss": 0.0877, "step": 15270 }, { - "epoch": 1.134709639091044, - "grad_norm": 1.6448826789855957, - "learning_rate": 2.3191742165453736e-05, - "loss": 0.0694, + "epoch": 2.269419278182088, + "grad_norm": 1.0325325727462769, + "learning_rate": 7.730580721817912e-06, + "loss": 0.0662, "step": 15280 }, { - "epoch": 1.1354522501113917, - "grad_norm": 1.5270838737487793, - "learning_rate": 2.318728649933165e-05, - "loss": 0.084, + "epoch": 2.2709045002227835, + "grad_norm": 1.0051850080490112, + "learning_rate": 7.729095499777216e-06, + "loss": 0.0866, "step": 15290 }, { - "epoch": 1.1361948611317392, - "grad_norm": 2.1386685371398926, - "learning_rate": 2.3182830833209566e-05, - "loss": 0.0864, + "epoch": 2.2723897222634784, + "grad_norm": 1.184901475906372, + "learning_rate": 7.727610277736523e-06, + "loss": 0.0565, "step": 15300 }, { - "epoch": 1.1369374721520868, - "grad_norm": 2.1466033458709717, - "learning_rate": 2.3178375167087477e-05, - "loss": 0.0792, + "epoch": 2.2738749443041737, + "grad_norm": 1.2180163860321045, + "learning_rate": 7.726125055695827e-06, + "loss": 0.0576, "step": 15310 }, { - "epoch": 1.1376800831724343, - "grad_norm": 1.733211874961853, - "learning_rate": 2.3173919500965396e-05, - "loss": 0.0966, + "epoch": 2.2753601663448686, + "grad_norm": 0.8974994421005249, + "learning_rate": 7.724639833655131e-06, + "loss": 0.0876, "step": 15320 }, { - "epoch": 1.1384226941927817, - "grad_norm": 0.9286133646965027, - "learning_rate": 2.316946383484331e-05, - "loss": 0.0736, + "epoch": 2.2768453883855635, + "grad_norm": 0.9203564524650574, + "learning_rate": 7.723154611614438e-06, + "loss": 0.0548, "step": 15330 }, { - "epoch": 1.1391653052131294, - "grad_norm": 3.003708839416504, - "learning_rate": 2.3165008168721222e-05, - "loss": 0.1012, + "epoch": 2.278330610426259, + "grad_norm": 1.1554508209228516, + "learning_rate": 7.721669389573742e-06, + "loss": 0.0725, "step": 15340 }, { - "epoch": 1.1399079162334769, - "grad_norm": 0.6814678311347961, - "learning_rate": 2.316055250259914e-05, - "loss": 0.0611, + "epoch": 2.2798158324669537, + "grad_norm": 1.1028167009353638, + "learning_rate": 7.720184167533046e-06, + "loss": 0.0729, "step": 15350 }, { - "epoch": 1.1406505272538245, - "grad_norm": 1.6721028089523315, - "learning_rate": 2.3156096836477056e-05, - "loss": 0.0972, + "epoch": 2.281301054507649, + "grad_norm": 0.9877077341079712, + "learning_rate": 7.718698945492352e-06, + "loss": 0.0657, "step": 15360 }, { - "epoch": 1.141393138274172, - "grad_norm": 1.2787104845046997, - "learning_rate": 2.3151641170354967e-05, - "loss": 0.0652, + "epoch": 2.282786276548344, + "grad_norm": 1.2431800365447998, + "learning_rate": 7.717213723451657e-06, + "loss": 0.0884, "step": 15370 }, { - "epoch": 1.1421357492945194, - "grad_norm": 1.4805560111999512, - "learning_rate": 2.3147185504232882e-05, - "loss": 0.0678, + "epoch": 2.284271498589039, + "grad_norm": 0.6929932832717896, + "learning_rate": 7.715728501410961e-06, + "loss": 0.0765, "step": 15380 }, { - "epoch": 1.1428783603148671, - "grad_norm": 0.9559769034385681, - "learning_rate": 2.3142729838110797e-05, - "loss": 0.0718, + "epoch": 2.2857567206297342, + "grad_norm": 0.5198222398757935, + "learning_rate": 7.714243279370266e-06, + "loss": 0.0527, "step": 15390 }, { - "epoch": 1.1436209713352146, - "grad_norm": 2.3336753845214844, - "learning_rate": 2.3138274171988712e-05, - "loss": 0.0716, + "epoch": 2.287241942670429, + "grad_norm": 0.39518117904663086, + "learning_rate": 7.712758057329572e-06, + "loss": 0.0703, "step": 15400 }, { - "epoch": 1.1443635823555622, - "grad_norm": 1.7206687927246094, - "learning_rate": 2.3133818505866627e-05, - "loss": 0.0532, + "epoch": 2.2887271647111245, + "grad_norm": 1.2519538402557373, + "learning_rate": 7.711272835288876e-06, + "loss": 0.0589, "step": 15410 }, { - "epoch": 1.1451061933759097, - "grad_norm": 4.165546894073486, - "learning_rate": 2.3129362839744542e-05, - "loss": 0.0606, + "epoch": 2.2902123867518194, + "grad_norm": 0.6768942475318909, + "learning_rate": 7.70978761324818e-06, + "loss": 0.0507, "step": 15420 }, { - "epoch": 1.1458488043962571, - "grad_norm": 3.124039888381958, - "learning_rate": 2.3124907173622457e-05, - "loss": 0.111, + "epoch": 2.2916976087925143, + "grad_norm": 0.732701301574707, + "learning_rate": 7.708302391207487e-06, + "loss": 0.0686, "step": 15430 }, { - "epoch": 1.1465914154166048, - "grad_norm": 0.9052489995956421, - "learning_rate": 2.3120451507500372e-05, - "loss": 0.0805, + "epoch": 2.2931828308332096, + "grad_norm": 0.7347680330276489, + "learning_rate": 7.706817169166791e-06, + "loss": 0.0767, "step": 15440 }, { - "epoch": 1.1473340264369523, - "grad_norm": 1.5409538745880127, - "learning_rate": 2.3115995841378287e-05, - "loss": 0.0552, + "epoch": 2.2946680528739045, + "grad_norm": 0.6387335658073425, + "learning_rate": 7.705331947126096e-06, + "loss": 0.0657, "step": 15450 }, { - "epoch": 1.1480766374573, - "grad_norm": 0.6143955588340759, - "learning_rate": 2.3111540175256202e-05, - "loss": 0.0527, + "epoch": 2.2961532749146, + "grad_norm": 0.5078271627426147, + "learning_rate": 7.7038467250854e-06, + "loss": 0.0766, "step": 15460 }, { - "epoch": 1.1488192484776474, - "grad_norm": 2.081989288330078, - "learning_rate": 2.3107084509134117e-05, - "loss": 0.1173, + "epoch": 2.2976384969552948, + "grad_norm": 0.5774367451667786, + "learning_rate": 7.702361503044706e-06, + "loss": 0.0578, "step": 15470 }, { - "epoch": 1.1495618594979948, - "grad_norm": 1.4942225217819214, - "learning_rate": 2.310262884301203e-05, - "loss": 0.0951, + "epoch": 2.2991237189959897, + "grad_norm": 0.8975863456726074, + "learning_rate": 7.70087628100401e-06, + "loss": 0.0626, "step": 15480 }, { - "epoch": 1.1503044705183425, - "grad_norm": 3.5311174392700195, - "learning_rate": 2.3098173176889947e-05, - "loss": 0.1277, + "epoch": 2.300608941036685, + "grad_norm": 0.54920893907547, + "learning_rate": 7.699391058963315e-06, + "loss": 0.0573, "step": 15490 }, { - "epoch": 1.15104708153869, - "grad_norm": 1.772064447402954, - "learning_rate": 2.309371751076786e-05, - "loss": 0.0568, + "epoch": 2.30209416307738, + "grad_norm": 0.9207307696342468, + "learning_rate": 7.69790583692262e-06, + "loss": 0.08, "step": 15500 }, { - "epoch": 1.1517896925590376, - "grad_norm": 1.9447821378707886, - "learning_rate": 2.3089261844645774e-05, - "loss": 0.0542, + "epoch": 2.3035793851180753, + "grad_norm": 0.7573016881942749, + "learning_rate": 7.696420614881926e-06, + "loss": 0.0672, "step": 15510 }, { - "epoch": 1.152532303579385, - "grad_norm": 2.5192413330078125, - "learning_rate": 2.3084806178523692e-05, - "loss": 0.0636, + "epoch": 2.30506460715877, + "grad_norm": 0.47608405351638794, + "learning_rate": 7.69493539284123e-06, + "loss": 0.0739, "step": 15520 }, { - "epoch": 1.1532749145997327, - "grad_norm": 0.6667538285255432, - "learning_rate": 2.3080350512401604e-05, - "loss": 0.0897, + "epoch": 2.3065498291994655, + "grad_norm": 1.493303894996643, + "learning_rate": 7.693450170800535e-06, + "loss": 0.0737, "step": 15530 }, { - "epoch": 1.1540175256200802, - "grad_norm": 1.988601803779602, - "learning_rate": 2.307589484627952e-05, - "loss": 0.1259, + "epoch": 2.3080350512401604, + "grad_norm": 0.50611811876297, + "learning_rate": 7.69196494875984e-06, + "loss": 0.0538, "step": 15540 }, { - "epoch": 1.1547601366404276, - "grad_norm": 1.6338027715682983, - "learning_rate": 2.3071439180157434e-05, - "loss": 0.0578, + "epoch": 2.3095202732808553, + "grad_norm": 0.39653122425079346, + "learning_rate": 7.690479726719145e-06, + "loss": 0.0699, "step": 15550 }, { - "epoch": 1.1555027476607753, - "grad_norm": 3.52496600151062, - "learning_rate": 2.306698351403535e-05, - "loss": 0.074, + "epoch": 2.3110054953215506, + "grad_norm": 1.0106645822525024, + "learning_rate": 7.68899450467845e-06, + "loss": 0.0725, "step": 15560 }, { - "epoch": 1.1562453586811228, - "grad_norm": 1.9696495532989502, - "learning_rate": 2.3062527847913264e-05, - "loss": 0.1064, + "epoch": 2.3124907173622455, + "grad_norm": 0.8386173844337463, + "learning_rate": 7.687509282637756e-06, + "loss": 0.0695, "step": 15570 }, { - "epoch": 1.1569879697014704, - "grad_norm": 1.104413390159607, - "learning_rate": 2.305807218179118e-05, - "loss": 0.0936, + "epoch": 2.313975939402941, + "grad_norm": 1.1915806531906128, + "learning_rate": 7.68602406059706e-06, + "loss": 0.0769, "step": 15580 }, { - "epoch": 1.157730580721818, - "grad_norm": 2.5523598194122314, - "learning_rate": 2.3053616515669094e-05, - "loss": 0.0714, + "epoch": 2.315461161443636, + "grad_norm": 0.8047814965248108, + "learning_rate": 7.684538838556365e-06, + "loss": 0.0791, "step": 15590 }, { - "epoch": 1.1584731917421656, - "grad_norm": 1.7481622695922852, - "learning_rate": 2.304916084954701e-05, - "loss": 0.1121, + "epoch": 2.316946383484331, + "grad_norm": 0.912143349647522, + "learning_rate": 7.68305361651567e-06, + "loss": 0.0802, "step": 15600 }, { - "epoch": 1.159215802762513, - "grad_norm": 3.4101874828338623, - "learning_rate": 2.304470518342492e-05, - "loss": 0.0916, + "epoch": 2.318431605525026, + "grad_norm": 0.9745832681655884, + "learning_rate": 7.681568394474973e-06, + "loss": 0.0716, "step": 15610 }, { - "epoch": 1.1599584137828605, - "grad_norm": 1.432702660560608, - "learning_rate": 2.3040249517302835e-05, - "loss": 0.1003, + "epoch": 2.319916827565721, + "grad_norm": 0.41111069917678833, + "learning_rate": 7.68008317243428e-06, + "loss": 0.0728, "step": 15620 }, { - "epoch": 1.1607010248032081, - "grad_norm": 0.6182481646537781, - "learning_rate": 2.3035793851180754e-05, - "loss": 0.0504, + "epoch": 2.3214020496064163, + "grad_norm": 0.8632513284683228, + "learning_rate": 7.678597950393584e-06, + "loss": 0.0795, "step": 15630 }, { - "epoch": 1.1614436358235556, - "grad_norm": 2.7727530002593994, - "learning_rate": 2.3031338185058665e-05, - "loss": 0.0671, + "epoch": 2.322887271647111, + "grad_norm": 0.6391085982322693, + "learning_rate": 7.677112728352888e-06, + "loss": 0.0597, "step": 15640 }, { - "epoch": 1.1621862468439033, - "grad_norm": 0.7968599200248718, - "learning_rate": 2.302688251893658e-05, - "loss": 0.0742, + "epoch": 2.3243724936878065, + "grad_norm": 0.5207847952842712, + "learning_rate": 7.675627506312195e-06, + "loss": 0.0817, "step": 15650 }, { - "epoch": 1.1629288578642507, - "grad_norm": 1.9094624519348145, - "learning_rate": 2.30224268528145e-05, - "loss": 0.0778, + "epoch": 2.3258577157285014, + "grad_norm": 0.7792304754257202, + "learning_rate": 7.674142284271499e-06, + "loss": 0.0807, "step": 15660 }, { - "epoch": 1.1636714688845982, - "grad_norm": 2.032755136489868, - "learning_rate": 2.301797118669241e-05, - "loss": 0.0674, + "epoch": 2.3273429377691963, + "grad_norm": 0.8187819123268127, + "learning_rate": 7.672657062230803e-06, + "loss": 0.079, "step": 15670 }, { - "epoch": 1.1644140799049458, - "grad_norm": 2.188685417175293, - "learning_rate": 2.3013515520570325e-05, - "loss": 0.1031, + "epoch": 2.3288281598098917, + "grad_norm": 0.4100897014141083, + "learning_rate": 7.67117184019011e-06, + "loss": 0.073, "step": 15680 }, { - "epoch": 1.1651566909252933, - "grad_norm": 0.8258926868438721, - "learning_rate": 2.3009059854448244e-05, - "loss": 0.0641, + "epoch": 2.3303133818505866, + "grad_norm": 0.7977427244186401, + "learning_rate": 7.669686618149414e-06, + "loss": 0.0787, "step": 15690 }, { - "epoch": 1.165899301945641, - "grad_norm": 0.9573965668678284, - "learning_rate": 2.3004604188326155e-05, - "loss": 0.0601, + "epoch": 2.331798603891282, + "grad_norm": 0.5222535729408264, + "learning_rate": 7.668201396108718e-06, + "loss": 0.075, "step": 15700 }, { - "epoch": 1.1666419129659884, - "grad_norm": 1.2031244039535522, - "learning_rate": 2.300014852220407e-05, - "loss": 0.0518, + "epoch": 2.333283825931977, + "grad_norm": 0.7441068887710571, + "learning_rate": 7.666716174068025e-06, + "loss": 0.0697, "step": 15710 }, { - "epoch": 1.1673845239863359, - "grad_norm": 3.0771892070770264, - "learning_rate": 2.2995692856081982e-05, - "loss": 0.1124, + "epoch": 2.3347690479726717, + "grad_norm": 0.9779987931251526, + "learning_rate": 7.665230952027329e-06, + "loss": 0.0756, "step": 15720 }, { - "epoch": 1.1681271350066835, - "grad_norm": 1.2991482019424438, - "learning_rate": 2.29912371899599e-05, - "loss": 0.0508, + "epoch": 2.336254270013367, + "grad_norm": 1.1214604377746582, + "learning_rate": 7.663745729986633e-06, + "loss": 0.0663, "step": 15730 }, { - "epoch": 1.168869746027031, - "grad_norm": 0.9079421162605286, - "learning_rate": 2.2986781523837815e-05, - "loss": 0.0638, + "epoch": 2.337739492054062, + "grad_norm": 0.6864479184150696, + "learning_rate": 7.66226050794594e-06, + "loss": 0.0506, "step": 15740 }, { - "epoch": 1.1696123570473786, - "grad_norm": 1.197899341583252, - "learning_rate": 2.2982325857715727e-05, - "loss": 0.0828, + "epoch": 2.3392247140947573, + "grad_norm": 0.6545161008834839, + "learning_rate": 7.660775285905242e-06, + "loss": 0.0716, "step": 15750 }, { - "epoch": 1.170354968067726, - "grad_norm": 1.4822005033493042, - "learning_rate": 2.2977870191593645e-05, - "loss": 0.0511, + "epoch": 2.340709936135452, + "grad_norm": 0.6110475659370422, + "learning_rate": 7.659290063864548e-06, + "loss": 0.0807, "step": 15760 }, { - "epoch": 1.1710975790880735, - "grad_norm": 2.059238910675049, - "learning_rate": 2.297341452547156e-05, - "loss": 0.0762, + "epoch": 2.342195158176147, + "grad_norm": 0.7456690073013306, + "learning_rate": 7.657804841823855e-06, + "loss": 0.0717, "step": 15770 }, { - "epoch": 1.1718401901084212, - "grad_norm": 2.548032283782959, - "learning_rate": 2.2968958859349472e-05, - "loss": 0.0711, + "epoch": 2.3436803802168424, + "grad_norm": 1.7456918954849243, + "learning_rate": 7.656319619783157e-06, + "loss": 0.0838, "step": 15780 }, { - "epoch": 1.1725828011287687, - "grad_norm": 2.6228041648864746, - "learning_rate": 2.2964503193227387e-05, - "loss": 0.0709, + "epoch": 2.3451656022575373, + "grad_norm": 0.856558620929718, + "learning_rate": 7.654834397742463e-06, + "loss": 0.0662, "step": 15790 }, { - "epoch": 1.1733254121491163, - "grad_norm": 1.9563509225845337, - "learning_rate": 2.2960047527105302e-05, - "loss": 0.0589, + "epoch": 2.3466508242982327, + "grad_norm": 0.6788325309753418, + "learning_rate": 7.653349175701768e-06, + "loss": 0.0605, "step": 15800 }, { - "epoch": 1.1740680231694638, - "grad_norm": 1.4019722938537598, - "learning_rate": 2.2955591860983217e-05, - "loss": 0.0733, + "epoch": 2.3481360463389276, + "grad_norm": 0.6518844366073608, + "learning_rate": 7.651863953661072e-06, + "loss": 0.0653, "step": 15810 }, { - "epoch": 1.1748106341898115, - "grad_norm": 1.6653647422790527, - "learning_rate": 2.2951136194861132e-05, - "loss": 0.0505, + "epoch": 2.349621268379623, + "grad_norm": 0.9282351732254028, + "learning_rate": 7.650378731620378e-06, + "loss": 0.0545, "step": 15820 }, { - "epoch": 1.175553245210159, - "grad_norm": 2.455420732498169, - "learning_rate": 2.2946680528739047e-05, - "loss": 0.1232, + "epoch": 2.351106490420318, + "grad_norm": 1.422558307647705, + "learning_rate": 7.648893509579683e-06, + "loss": 0.0685, "step": 15830 }, { - "epoch": 1.1762958562305064, - "grad_norm": 1.1181570291519165, - "learning_rate": 2.2942224862616962e-05, - "loss": 0.0626, + "epoch": 2.3525917124610127, + "grad_norm": 1.116840124130249, + "learning_rate": 7.647408287538987e-06, + "loss": 0.0733, "step": 15840 }, { - "epoch": 1.177038467250854, - "grad_norm": 1.8775357007980347, - "learning_rate": 2.2937769196494877e-05, - "loss": 0.0971, + "epoch": 2.354076934501708, + "grad_norm": 0.8625660538673401, + "learning_rate": 7.645923065498293e-06, + "loss": 0.0631, "step": 15850 }, { - "epoch": 1.1777810782712015, - "grad_norm": 1.8063764572143555, - "learning_rate": 2.2933313530372792e-05, - "loss": 0.12, + "epoch": 2.355562156542403, + "grad_norm": 0.8200535774230957, + "learning_rate": 7.644437843457598e-06, + "loss": 0.0517, "step": 15860 }, { - "epoch": 1.1785236892915492, - "grad_norm": 0.9645183682441711, - "learning_rate": 2.2928857864250707e-05, - "loss": 0.0783, + "epoch": 2.3570473785830983, + "grad_norm": 0.6217565536499023, + "learning_rate": 7.642952621416902e-06, + "loss": 0.0644, "step": 15870 }, { - "epoch": 1.1792663003118966, - "grad_norm": 2.029613971710205, - "learning_rate": 2.2924402198128622e-05, - "loss": 0.1101, + "epoch": 2.358532600623793, + "grad_norm": 0.6351733207702637, + "learning_rate": 7.641467399376208e-06, + "loss": 0.0651, "step": 15880 }, { - "epoch": 1.1800089113322443, - "grad_norm": 0.7178744673728943, - "learning_rate": 2.2919946532006534e-05, - "loss": 0.0689, + "epoch": 2.3600178226644886, + "grad_norm": 1.270407795906067, + "learning_rate": 7.639982177335513e-06, + "loss": 0.0438, "step": 15890 }, { - "epoch": 1.1807515223525917, - "grad_norm": 1.271041750907898, - "learning_rate": 2.2915490865884452e-05, - "loss": 0.0662, + "epoch": 2.3615030447051835, + "grad_norm": 1.0128871202468872, + "learning_rate": 7.638496955294817e-06, + "loss": 0.0696, "step": 15900 }, { - "epoch": 1.1814941333729392, - "grad_norm": 2.572619915008545, - "learning_rate": 2.2911035199762364e-05, - "loss": 0.0677, + "epoch": 2.3629882667458784, + "grad_norm": 0.6585198044776917, + "learning_rate": 7.637011733254122e-06, + "loss": 0.0741, "step": 15910 }, { - "epoch": 1.1822367443932869, - "grad_norm": 0.5422751307487488, - "learning_rate": 2.290657953364028e-05, - "loss": 0.0662, + "epoch": 2.3644734887865737, + "grad_norm": 1.2549078464508057, + "learning_rate": 7.635526511213426e-06, + "loss": 0.0828, "step": 15920 }, { - "epoch": 1.1829793554136343, - "grad_norm": 2.011805295944214, - "learning_rate": 2.2902123867518197e-05, - "loss": 0.078, + "epoch": 2.3659587108272686, + "grad_norm": 0.5107712745666504, + "learning_rate": 7.634041289172732e-06, + "loss": 0.0643, "step": 15930 }, { - "epoch": 1.183721966433982, - "grad_norm": 1.7894953489303589, - "learning_rate": 2.289766820139611e-05, - "loss": 0.0817, + "epoch": 2.367443932867964, + "grad_norm": 1.306120753288269, + "learning_rate": 7.632556067132037e-06, + "loss": 0.0641, "step": 15940 }, { - "epoch": 1.1844645774543294, - "grad_norm": 0.9145591855049133, - "learning_rate": 2.2893212535274024e-05, - "loss": 0.0867, + "epoch": 2.368929154908659, + "grad_norm": 0.6712403297424316, + "learning_rate": 7.631070845091341e-06, + "loss": 0.0667, "step": 15950 }, { - "epoch": 1.1852071884746769, - "grad_norm": 2.027958631515503, - "learning_rate": 2.288875686915194e-05, - "loss": 0.0579, + "epoch": 2.3704143769493538, + "grad_norm": 0.6917909979820251, + "learning_rate": 7.629585623050647e-06, + "loss": 0.0668, "step": 15960 }, { - "epoch": 1.1859497994950245, - "grad_norm": 0.9415296316146851, - "learning_rate": 2.2884301203029853e-05, - "loss": 0.0854, + "epoch": 2.371899598990049, + "grad_norm": 1.065975308418274, + "learning_rate": 7.628100401009951e-06, + "loss": 0.0779, "step": 15970 }, { - "epoch": 1.186692410515372, - "grad_norm": 2.021057367324829, - "learning_rate": 2.287984553690777e-05, - "loss": 0.073, + "epoch": 2.373384821030744, + "grad_norm": 0.6773597598075867, + "learning_rate": 7.626615178969257e-06, + "loss": 0.0585, "step": 15980 }, { - "epoch": 1.1874350215357197, - "grad_norm": 1.6900122165679932, - "learning_rate": 2.2875389870785683e-05, - "loss": 0.0772, + "epoch": 2.3748700430714393, + "grad_norm": 0.946287989616394, + "learning_rate": 7.625129956928562e-06, + "loss": 0.0549, "step": 15990 }, { - "epoch": 1.1881776325560671, - "grad_norm": 0.6172839999198914, - "learning_rate": 2.28709342046636e-05, - "loss": 0.0522, + "epoch": 2.3763552651121342, + "grad_norm": 1.1909871101379395, + "learning_rate": 7.623644734887866e-06, + "loss": 0.0898, "step": 16000 }, { - "epoch": 1.1889202435764146, - "grad_norm": 2.918687105178833, - "learning_rate": 2.2866478538541513e-05, - "loss": 0.0566, + "epoch": 2.377840487152829, + "grad_norm": 0.9652341604232788, + "learning_rate": 7.622159512847171e-06, + "loss": 0.0686, "step": 16010 }, { - "epoch": 1.1896628545967622, - "grad_norm": 1.5707764625549316, - "learning_rate": 2.2862022872419425e-05, - "loss": 0.0721, + "epoch": 2.3793257091935245, + "grad_norm": 0.5234491229057312, + "learning_rate": 7.620674290806476e-06, + "loss": 0.0668, "step": 16020 }, { - "epoch": 1.1904054656171097, - "grad_norm": 1.0280346870422363, - "learning_rate": 2.285756720629734e-05, - "loss": 0.0646, + "epoch": 2.3808109312342194, + "grad_norm": 0.6725775599479675, + "learning_rate": 7.619189068765781e-06, + "loss": 0.0673, "step": 16030 }, { - "epoch": 1.1911480766374574, - "grad_norm": 1.4963786602020264, - "learning_rate": 2.285311154017526e-05, - "loss": 0.0555, + "epoch": 2.3822961532749147, + "grad_norm": 0.7732347249984741, + "learning_rate": 7.617703846725086e-06, + "loss": 0.0698, "step": 16040 }, { - "epoch": 1.1918906876578048, - "grad_norm": 0.9982985258102417, - "learning_rate": 2.284865587405317e-05, - "loss": 0.0647, + "epoch": 2.3837813753156096, + "grad_norm": 1.0386955738067627, + "learning_rate": 7.616218624684391e-06, + "loss": 0.0679, "step": 16050 }, { - "epoch": 1.1926332986781523, - "grad_norm": 1.1375582218170166, - "learning_rate": 2.2844200207931085e-05, - "loss": 0.0728, + "epoch": 2.3852665973563045, + "grad_norm": 0.6354694366455078, + "learning_rate": 7.614733402643696e-06, + "loss": 0.0798, "step": 16060 }, { - "epoch": 1.1933759096985, - "grad_norm": 4.054408073425293, - "learning_rate": 2.2839744541809003e-05, - "loss": 0.0874, + "epoch": 2.386751819397, + "grad_norm": 0.8849825263023376, + "learning_rate": 7.613248180603001e-06, + "loss": 0.063, "step": 16070 }, { - "epoch": 1.1941185207188474, - "grad_norm": 3.6856000423431396, - "learning_rate": 2.2835288875686915e-05, - "loss": 0.1011, + "epoch": 2.3882370414376948, + "grad_norm": 1.465538501739502, + "learning_rate": 7.611762958562306e-06, + "loss": 0.0867, "step": 16080 }, { - "epoch": 1.194861131739195, - "grad_norm": 3.7098820209503174, - "learning_rate": 2.283083320956483e-05, - "loss": 0.0638, + "epoch": 2.38972226347839, + "grad_norm": 1.1470627784729004, + "learning_rate": 7.610277736521611e-06, + "loss": 0.0654, "step": 16090 }, { - "epoch": 1.1956037427595425, - "grad_norm": 0.7960993647575378, - "learning_rate": 2.282637754344275e-05, - "loss": 0.0832, + "epoch": 2.391207485519085, + "grad_norm": 0.7879219055175781, + "learning_rate": 7.608792514480916e-06, + "loss": 0.0732, "step": 16100 }, { - "epoch": 1.1963463537798902, - "grad_norm": 1.0602184534072876, - "learning_rate": 2.282192187732066e-05, - "loss": 0.0824, + "epoch": 2.3926927075597804, + "grad_norm": 1.3938816785812378, + "learning_rate": 7.6073072924402206e-06, + "loss": 0.0872, "step": 16110 }, { - "epoch": 1.1970889648002376, - "grad_norm": 1.5129131078720093, - "learning_rate": 2.2817466211198575e-05, - "loss": 0.0734, + "epoch": 2.3941779296004753, + "grad_norm": 0.9284458160400391, + "learning_rate": 7.605822070399526e-06, + "loss": 0.0724, "step": 16120 }, { - "epoch": 1.197831575820585, - "grad_norm": 2.3577966690063477, - "learning_rate": 2.2813010545076487e-05, - "loss": 0.0931, + "epoch": 2.39566315164117, + "grad_norm": 0.7141979932785034, + "learning_rate": 7.6043368483588294e-06, + "loss": 0.0826, "step": 16130 }, { - "epoch": 1.1985741868409328, - "grad_norm": 2.6945302486419678, - "learning_rate": 2.2808554878954405e-05, - "loss": 0.1088, + "epoch": 2.3971483736818655, + "grad_norm": 0.3661617934703827, + "learning_rate": 7.602851626318135e-06, + "loss": 0.0683, "step": 16140 }, { - "epoch": 1.1993167978612802, - "grad_norm": 1.0213791131973267, - "learning_rate": 2.280409921283232e-05, - "loss": 0.0683, + "epoch": 2.3986335957225604, + "grad_norm": 1.011782169342041, + "learning_rate": 7.601366404277441e-06, + "loss": 0.0769, "step": 16150 }, { - "epoch": 1.2000594088816279, - "grad_norm": 0.7909874320030212, - "learning_rate": 2.279964354671023e-05, - "loss": 0.0453, + "epoch": 2.4001188177632558, + "grad_norm": 0.7773118615150452, + "learning_rate": 7.5998811822367444e-06, + "loss": 0.0718, "step": 16160 }, { - "epoch": 1.2008020199019753, - "grad_norm": 1.2978131771087646, - "learning_rate": 2.279518788058815e-05, - "loss": 0.0787, + "epoch": 2.4016040398039507, + "grad_norm": 1.1565062999725342, + "learning_rate": 7.59839596019605e-06, + "loss": 0.0524, "step": 16170 }, { - "epoch": 1.201544630922323, - "grad_norm": 0.5896059274673462, - "learning_rate": 2.2790732214466065e-05, - "loss": 0.0719, + "epoch": 2.403089261844646, + "grad_norm": 0.8525496125221252, + "learning_rate": 7.596910738155355e-06, + "loss": 0.0888, "step": 16180 }, { - "epoch": 1.2022872419426704, - "grad_norm": 2.9565205574035645, - "learning_rate": 2.2786276548343977e-05, - "loss": 0.1046, + "epoch": 2.404574483885341, + "grad_norm": 0.6374387741088867, + "learning_rate": 7.5954255161146594e-06, + "loss": 0.0855, "step": 16190 }, { - "epoch": 1.203029852963018, - "grad_norm": 4.079509258270264, - "learning_rate": 2.278182088222189e-05, - "loss": 0.0686, + "epoch": 2.406059705926036, + "grad_norm": 0.8507137298583984, + "learning_rate": 7.593940294073965e-06, + "loss": 0.0696, "step": 16200 }, { - "epoch": 1.2037724639833656, - "grad_norm": 2.044127941131592, - "learning_rate": 2.277736521609981e-05, - "loss": 0.0618, + "epoch": 2.407544927966731, + "grad_norm": 0.7530370950698853, + "learning_rate": 7.59245507203327e-06, + "loss": 0.0742, "step": 16210 }, { - "epoch": 1.204515075003713, - "grad_norm": 2.2022972106933594, - "learning_rate": 2.277290954997772e-05, - "loss": 0.0973, + "epoch": 2.409030150007426, + "grad_norm": 0.3654564917087555, + "learning_rate": 7.590969849992574e-06, + "loss": 0.0675, "step": 16220 }, { - "epoch": 1.2052576860240607, - "grad_norm": 1.957979440689087, - "learning_rate": 2.2768453883855637e-05, - "loss": 0.0765, + "epoch": 2.4105153720481214, + "grad_norm": 0.8282201290130615, + "learning_rate": 7.58948462795188e-06, + "loss": 0.0599, "step": 16230 }, { - "epoch": 1.2060002970444081, - "grad_norm": 0.7121636271476746, - "learning_rate": 2.276399821773355e-05, - "loss": 0.0732, + "epoch": 2.4120005940888163, + "grad_norm": 0.7527429461479187, + "learning_rate": 7.587999405911184e-06, + "loss": 0.0676, "step": 16240 }, { - "epoch": 1.2067429080647556, - "grad_norm": 2.143155574798584, - "learning_rate": 2.2759542551611467e-05, - "loss": 0.0564, + "epoch": 2.413485816129511, + "grad_norm": 0.8395132422447205, + "learning_rate": 7.586514183870489e-06, + "loss": 0.0757, "step": 16250 }, { - "epoch": 1.2074855190851033, - "grad_norm": 2.7049849033355713, - "learning_rate": 2.275508688548938e-05, - "loss": 0.0806, + "epoch": 2.4149710381702065, + "grad_norm": 1.281912088394165, + "learning_rate": 7.585028961829795e-06, + "loss": 0.0783, "step": 16260 }, { - "epoch": 1.2082281301054507, - "grad_norm": 2.114739418029785, - "learning_rate": 2.2750631219367297e-05, - "loss": 0.0569, + "epoch": 2.4164562602109014, + "grad_norm": 1.0508184432983398, + "learning_rate": 7.583543739789099e-06, + "loss": 0.0864, "step": 16270 }, { - "epoch": 1.2089707411257984, - "grad_norm": 2.1519935131073, - "learning_rate": 2.274617555324521e-05, - "loss": 0.0727, + "epoch": 2.4179414822515968, + "grad_norm": 0.9719825983047485, + "learning_rate": 7.582058517748404e-06, + "loss": 0.0636, "step": 16280 }, { - "epoch": 1.2097133521461458, - "grad_norm": 0.7403703927993774, - "learning_rate": 2.2741719887123127e-05, - "loss": 0.0468, + "epoch": 2.4194267042922917, + "grad_norm": 0.9227995276451111, + "learning_rate": 7.58057329570771e-06, + "loss": 0.0874, "step": 16290 }, { - "epoch": 1.2104559631664933, - "grad_norm": 1.2450178861618042, - "learning_rate": 2.2737264221001038e-05, - "loss": 0.09, + "epoch": 2.4209119263329866, + "grad_norm": 0.854083776473999, + "learning_rate": 7.579088073667013e-06, + "loss": 0.0755, "step": 16300 }, { - "epoch": 1.211198574186841, - "grad_norm": 2.1269819736480713, - "learning_rate": 2.2732808554878957e-05, - "loss": 0.0532, + "epoch": 2.422397148373682, + "grad_norm": 0.48843663930892944, + "learning_rate": 7.5776028516263186e-06, + "loss": 0.0621, "step": 16310 }, { - "epoch": 1.2119411852071884, - "grad_norm": 1.752208948135376, - "learning_rate": 2.2728352888756868e-05, - "loss": 0.0908, + "epoch": 2.423882370414377, + "grad_norm": 0.956684410572052, + "learning_rate": 7.576117629585623e-06, + "loss": 0.0575, "step": 16320 }, { - "epoch": 1.212683796227536, - "grad_norm": 2.0299859046936035, - "learning_rate": 2.2723897222634783e-05, - "loss": 0.1128, + "epoch": 2.425367592455072, + "grad_norm": 1.1044747829437256, + "learning_rate": 7.574632407544928e-06, + "loss": 0.0726, "step": 16330 }, { - "epoch": 1.2134264072478835, - "grad_norm": 0.9291142225265503, - "learning_rate": 2.27194415565127e-05, - "loss": 0.07, + "epoch": 2.426852814495767, + "grad_norm": 0.8783396482467651, + "learning_rate": 7.5731471855042336e-06, + "loss": 0.0753, "step": 16340 }, { - "epoch": 1.214169018268231, - "grad_norm": 1.3614659309387207, - "learning_rate": 2.2714985890390613e-05, - "loss": 0.1037, + "epoch": 2.428338036536462, + "grad_norm": 0.7214937210083008, + "learning_rate": 7.571661963463538e-06, + "loss": 0.0471, "step": 16350 }, { - "epoch": 1.2149116292885787, - "grad_norm": 0.6833984851837158, - "learning_rate": 2.2710530224268528e-05, - "loss": 0.0574, + "epoch": 2.4298232585771573, + "grad_norm": 1.0365811586380005, + "learning_rate": 7.570176741422843e-06, + "loss": 0.0741, "step": 16360 }, { - "epoch": 1.215654240308926, - "grad_norm": 1.6602541208267212, - "learning_rate": 2.2706074558146443e-05, - "loss": 0.0677, + "epoch": 2.431308480617852, + "grad_norm": 0.936764657497406, + "learning_rate": 7.5686915193821485e-06, + "loss": 0.0779, "step": 16370 }, { - "epoch": 1.2163968513292738, - "grad_norm": 3.005326509475708, - "learning_rate": 2.2701618892024358e-05, - "loss": 0.0806, + "epoch": 2.4327937026585476, + "grad_norm": 0.803390622138977, + "learning_rate": 7.567206297341453e-06, + "loss": 0.0761, "step": 16380 }, { - "epoch": 1.2171394623496212, - "grad_norm": 0.7319986820220947, - "learning_rate": 2.2697163225902273e-05, - "loss": 0.0621, + "epoch": 2.4342789246992425, + "grad_norm": 0.5410571098327637, + "learning_rate": 7.565721075300758e-06, + "loss": 0.0751, "step": 16390 }, { - "epoch": 1.217882073369969, - "grad_norm": 1.5393048524856567, - "learning_rate": 2.2692707559780188e-05, - "loss": 0.0795, + "epoch": 2.435764146739938, + "grad_norm": 0.7092260122299194, + "learning_rate": 7.5642358532600635e-06, + "loss": 0.0671, "step": 16400 }, { - "epoch": 1.2186246843903163, - "grad_norm": 1.8766040802001953, - "learning_rate": 2.2688251893658103e-05, - "loss": 0.0525, + "epoch": 2.4372493687806327, + "grad_norm": 1.1456022262573242, + "learning_rate": 7.562750631219368e-06, + "loss": 0.0702, "step": 16410 }, { - "epoch": 1.2193672954106638, - "grad_norm": 0.824567437171936, - "learning_rate": 2.2683796227536018e-05, - "loss": 0.0746, + "epoch": 2.4387345908213276, + "grad_norm": 1.1234076023101807, + "learning_rate": 7.561265409178673e-06, + "loss": 0.0696, "step": 16420 }, { - "epoch": 1.2201099064310115, - "grad_norm": 0.9083713293075562, - "learning_rate": 2.267934056141393e-05, - "loss": 0.0791, + "epoch": 2.440219812862023, + "grad_norm": 1.503846526145935, + "learning_rate": 7.559780187137977e-06, + "loss": 0.0768, "step": 16430 }, { - "epoch": 1.220852517451359, - "grad_norm": 1.9115943908691406, - "learning_rate": 2.2674884895291848e-05, - "loss": 0.076, + "epoch": 2.441705034902718, + "grad_norm": 0.7656980752944946, + "learning_rate": 7.558294965097283e-06, + "loss": 0.0741, "step": 16440 }, { - "epoch": 1.2215951284717066, - "grad_norm": 1.7898435592651367, - "learning_rate": 2.2670429229169763e-05, - "loss": 0.0917, + "epoch": 2.443190256943413, + "grad_norm": 0.432203084230423, + "learning_rate": 7.556809743056588e-06, + "loss": 0.0712, "step": 16450 }, { - "epoch": 1.222337739492054, - "grad_norm": 3.3459744453430176, - "learning_rate": 2.2665973563047675e-05, - "loss": 0.0693, + "epoch": 2.444675478984108, + "grad_norm": 0.9114720225334167, + "learning_rate": 7.555324521015892e-06, + "loss": 0.071, "step": 16460 }, { - "epoch": 1.2230803505124017, - "grad_norm": 2.533830404281616, - "learning_rate": 2.266151789692559e-05, - "loss": 0.0948, + "epoch": 2.4461607010248034, + "grad_norm": 0.6950905323028564, + "learning_rate": 7.553839298975197e-06, + "loss": 0.0632, "step": 16470 }, { - "epoch": 1.2238229615327492, - "grad_norm": 0.8340369462966919, - "learning_rate": 2.2657062230803508e-05, - "loss": 0.0518, + "epoch": 2.4476459230654983, + "grad_norm": 0.7350353598594666, + "learning_rate": 7.552354076934502e-06, + "loss": 0.0614, "step": 16480 }, { - "epoch": 1.2245655725530966, - "grad_norm": 0.5460755825042725, - "learning_rate": 2.265260656468142e-05, - "loss": 0.0776, + "epoch": 2.4491311451061932, + "grad_norm": 0.4233296811580658, + "learning_rate": 7.550868854893807e-06, + "loss": 0.0779, "step": 16490 }, { - "epoch": 1.2253081835734443, - "grad_norm": 0.6815189719200134, - "learning_rate": 2.2648150898559335e-05, - "loss": 0.0595, + "epoch": 2.4506163671468886, + "grad_norm": 1.4698457717895508, + "learning_rate": 7.549383632853112e-06, + "loss": 0.0786, "step": 16500 }, { - "epoch": 1.2260507945937917, - "grad_norm": 1.1513220071792603, - "learning_rate": 2.2643695232437253e-05, - "loss": 0.146, + "epoch": 2.4521015891875835, + "grad_norm": 0.9390078783035278, + "learning_rate": 7.547898410812417e-06, + "loss": 0.0748, "step": 16510 }, { - "epoch": 1.2267934056141394, - "grad_norm": 1.9079151153564453, - "learning_rate": 2.2639239566315165e-05, - "loss": 0.0827, + "epoch": 2.453586811228279, + "grad_norm": 0.9275726675987244, + "learning_rate": 7.546413188771722e-06, + "loss": 0.0713, "step": 16520 }, { - "epoch": 1.2275360166344869, - "grad_norm": 1.8923051357269287, - "learning_rate": 2.263478390019308e-05, - "loss": 0.1135, + "epoch": 2.4550720332689737, + "grad_norm": 1.1759885549545288, + "learning_rate": 7.544927966731027e-06, + "loss": 0.0776, "step": 16530 }, { - "epoch": 1.2282786276548343, - "grad_norm": 1.8979130983352661, - "learning_rate": 2.263032823407099e-05, - "loss": 0.0895, + "epoch": 2.4565572553096686, + "grad_norm": 0.8219086527824402, + "learning_rate": 7.5434427446903315e-06, + "loss": 0.066, "step": 16540 }, { - "epoch": 1.229021238675182, - "grad_norm": 1.6301295757293701, - "learning_rate": 2.262587256794891e-05, - "loss": 0.0891, + "epoch": 2.458042477350364, + "grad_norm": 0.8670978546142578, + "learning_rate": 7.541957522649637e-06, + "loss": 0.073, "step": 16550 }, { - "epoch": 1.2297638496955294, - "grad_norm": 1.7979081869125366, - "learning_rate": 2.2621416901826825e-05, - "loss": 0.0765, + "epoch": 2.459527699391059, + "grad_norm": 0.8667005896568298, + "learning_rate": 7.540472300608942e-06, + "loss": 0.0562, "step": 16560 }, { - "epoch": 1.230506460715877, - "grad_norm": 0.5826703310012817, - "learning_rate": 2.2616961235704736e-05, - "loss": 0.0668, + "epoch": 2.461012921431754, + "grad_norm": 1.1121703386306763, + "learning_rate": 7.5389870785682465e-06, + "loss": 0.0686, "step": 16570 }, { - "epoch": 1.2312490717362246, - "grad_norm": 1.6885042190551758, - "learning_rate": 2.2612505569582655e-05, - "loss": 0.0646, + "epoch": 2.462498143472449, + "grad_norm": 0.5565051436424255, + "learning_rate": 7.537501856527552e-06, + "loss": 0.0613, "step": 16580 }, { - "epoch": 1.231991682756572, - "grad_norm": 0.9739753603935242, - "learning_rate": 2.260804990346057e-05, - "loss": 0.0566, + "epoch": 2.463983365513144, + "grad_norm": 0.5451401472091675, + "learning_rate": 7.536016634486857e-06, + "loss": 0.0525, "step": 16590 }, { - "epoch": 1.2327342937769197, - "grad_norm": 0.9012984037399292, - "learning_rate": 2.260359423733848e-05, - "loss": 0.0785, + "epoch": 2.4654685875538394, + "grad_norm": 0.6481854915618896, + "learning_rate": 7.534531412446161e-06, + "loss": 0.0624, "step": 16600 }, { - "epoch": 1.2334769047972671, - "grad_norm": 0.8952996134757996, - "learning_rate": 2.2599138571216396e-05, - "loss": 0.0655, + "epoch": 2.4669538095945343, + "grad_norm": 0.8952411413192749, + "learning_rate": 7.533046190405466e-06, + "loss": 0.0599, "step": 16610 }, { - "epoch": 1.2342195158176148, - "grad_norm": 2.116847515106201, - "learning_rate": 2.2594682905094315e-05, - "loss": 0.077, + "epoch": 2.4684390316352296, + "grad_norm": 0.49602147936820984, + "learning_rate": 7.531560968364772e-06, + "loss": 0.0656, "step": 16620 }, { - "epoch": 1.2349621268379622, - "grad_norm": 0.5466039180755615, - "learning_rate": 2.2590227238972226e-05, - "loss": 0.0754, + "epoch": 2.4699242536759245, + "grad_norm": 1.612073540687561, + "learning_rate": 7.530075746324076e-06, + "loss": 0.0725, "step": 16630 }, { - "epoch": 1.2357047378583097, - "grad_norm": 0.7208026647567749, - "learning_rate": 2.258577157285014e-05, - "loss": 0.0987, + "epoch": 2.4714094757166194, + "grad_norm": 1.0118762254714966, + "learning_rate": 7.528590524283381e-06, + "loss": 0.0682, "step": 16640 }, { - "epoch": 1.2364473488786574, - "grad_norm": 1.444373607635498, - "learning_rate": 2.2581315906728056e-05, - "loss": 0.0711, + "epoch": 2.4728946977573147, + "grad_norm": 0.7182286977767944, + "learning_rate": 7.527105302242685e-06, + "loss": 0.0621, "step": 16650 }, { - "epoch": 1.2371899598990048, - "grad_norm": 1.2086124420166016, - "learning_rate": 2.257686024060597e-05, - "loss": 0.0818, + "epoch": 2.4743799197980096, + "grad_norm": 0.4392257630825043, + "learning_rate": 7.525620080201991e-06, + "loss": 0.0548, "step": 16660 }, { - "epoch": 1.2379325709193525, - "grad_norm": 1.3369284868240356, - "learning_rate": 2.2572404574483886e-05, - "loss": 0.0722, + "epoch": 2.475865141838705, + "grad_norm": 0.3020372688770294, + "learning_rate": 7.524134858161296e-06, + "loss": 0.0607, "step": 16670 }, { - "epoch": 1.2386751819397, - "grad_norm": 0.9845725893974304, - "learning_rate": 2.25679489083618e-05, - "loss": 0.072, + "epoch": 2.4773503638794, + "grad_norm": 1.2162078619003296, + "learning_rate": 7.5226496361206e-06, + "loss": 0.0564, "step": 16680 }, { - "epoch": 1.2394177929600476, - "grad_norm": 0.6263337135314941, - "learning_rate": 2.2563493242239716e-05, - "loss": 0.0455, + "epoch": 2.4788355859200952, + "grad_norm": 1.0460399389266968, + "learning_rate": 7.521164414079906e-06, + "loss": 0.0671, "step": 16690 }, { - "epoch": 1.240160403980395, - "grad_norm": 1.889050841331482, - "learning_rate": 2.255903757611763e-05, - "loss": 0.0967, + "epoch": 2.48032080796079, + "grad_norm": 0.5017814636230469, + "learning_rate": 7.519679192039211e-06, + "loss": 0.0618, "step": 16700 }, { - "epoch": 1.2409030150007425, - "grad_norm": 0.5217537879943848, - "learning_rate": 2.2554581909995543e-05, - "loss": 0.0857, + "epoch": 2.481806030001485, + "grad_norm": 0.7603805065155029, + "learning_rate": 7.518193969998515e-06, + "loss": 0.076, "step": 16710 }, { - "epoch": 1.2416456260210902, - "grad_norm": 1.730975866317749, - "learning_rate": 2.255012624387346e-05, - "loss": 0.0831, + "epoch": 2.4832912520421804, + "grad_norm": 0.6417643427848816, + "learning_rate": 7.516708747957821e-06, + "loss": 0.085, "step": 16720 }, { - "epoch": 1.2423882370414376, - "grad_norm": 0.8797131776809692, - "learning_rate": 2.2545670577751373e-05, - "loss": 0.071, + "epoch": 2.4847764740828753, + "grad_norm": 0.7605277895927429, + "learning_rate": 7.515223525917125e-06, + "loss": 0.0974, "step": 16730 }, { - "epoch": 1.2431308480617853, - "grad_norm": 2.5811779499053955, - "learning_rate": 2.2541214911629288e-05, - "loss": 0.0956, + "epoch": 2.4862616961235706, + "grad_norm": 0.7401385307312012, + "learning_rate": 7.51373830387643e-06, + "loss": 0.0659, "step": 16740 }, { - "epoch": 1.2438734590821328, - "grad_norm": 1.4201879501342773, - "learning_rate": 2.2536759245507206e-05, - "loss": 0.0722, + "epoch": 2.4877469181642655, + "grad_norm": 1.0905494689941406, + "learning_rate": 7.512253081835736e-06, + "loss": 0.0756, "step": 16750 }, { - "epoch": 1.2446160701024804, - "grad_norm": 2.9858505725860596, - "learning_rate": 2.2532303579385118e-05, - "loss": 0.0867, + "epoch": 2.489232140204961, + "grad_norm": 0.9249421954154968, + "learning_rate": 7.510767859795039e-06, + "loss": 0.0699, "step": 16760 }, { - "epoch": 1.2453586811228279, - "grad_norm": 2.050238609313965, - "learning_rate": 2.2527847913263033e-05, - "loss": 0.0588, + "epoch": 2.4907173622456558, + "grad_norm": 1.4558953046798706, + "learning_rate": 7.5092826377543445e-06, + "loss": 0.073, "step": 16770 }, { - "epoch": 1.2461012921431753, - "grad_norm": 3.033705472946167, - "learning_rate": 2.2523392247140948e-05, - "loss": 0.0687, + "epoch": 2.4922025842863507, + "grad_norm": 1.035396933555603, + "learning_rate": 7.50779741571365e-06, + "loss": 0.0673, "step": 16780 }, { - "epoch": 1.246843903163523, - "grad_norm": 1.8493062257766724, - "learning_rate": 2.2518936581018863e-05, - "loss": 0.0653, + "epoch": 2.493687806327046, + "grad_norm": 0.9037249684333801, + "learning_rate": 7.506312193672954e-06, + "loss": 0.0732, "step": 16790 }, { - "epoch": 1.2475865141838705, - "grad_norm": 2.4249043464660645, - "learning_rate": 2.2514480914896778e-05, - "loss": 0.1034, + "epoch": 2.495173028367741, + "grad_norm": 0.8131252527236938, + "learning_rate": 7.5048269716322595e-06, + "loss": 0.0783, "step": 16800 }, { - "epoch": 1.2483291252042181, - "grad_norm": 0.8284013271331787, - "learning_rate": 2.2510025248774693e-05, - "loss": 0.0978, + "epoch": 2.4966582504084363, + "grad_norm": 1.5988869667053223, + "learning_rate": 7.503341749591565e-06, + "loss": 0.0664, "step": 16810 }, { - "epoch": 1.2490717362245656, - "grad_norm": 1.3202928304672241, - "learning_rate": 2.2505569582652608e-05, - "loss": 0.0709, + "epoch": 2.498143472449131, + "grad_norm": 0.6440235376358032, + "learning_rate": 7.501856527550869e-06, + "loss": 0.061, "step": 16820 }, { - "epoch": 1.249814347244913, - "grad_norm": 1.9100357294082642, - "learning_rate": 2.2501113916530523e-05, - "loss": 0.085, - "step": 16830 + "epoch": 2.499628694489826, + "grad_norm": 0.5629106760025024, + "learning_rate": 7.5003713055101745e-06, + "loss": 0.0504, + "step": 16830 }, { - "epoch": 1.2505569582652607, - "grad_norm": 1.8051328659057617, - "learning_rate": 2.2496658250408434e-05, - "loss": 0.0653, + "epoch": 2.5011139165305214, + "grad_norm": 0.7344189286231995, + "learning_rate": 7.498886083469479e-06, + "loss": 0.0579, "step": 16840 }, { - "epoch": 1.2512995692856081, - "grad_norm": 0.7372807860374451, - "learning_rate": 2.2492202584286353e-05, - "loss": 0.0798, + "epoch": 2.5025991385712163, + "grad_norm": 1.2263416051864624, + "learning_rate": 7.497400861428784e-06, + "loss": 0.0693, "step": 16850 }, { - "epoch": 1.2520421803059558, - "grad_norm": 1.493801474571228, - "learning_rate": 2.2487746918164268e-05, - "loss": 0.0862, + "epoch": 2.5040843606119116, + "grad_norm": 0.47904643416404724, + "learning_rate": 7.4959156393880895e-06, + "loss": 0.0691, "step": 16860 }, { - "epoch": 1.2527847913263033, - "grad_norm": 0.8077939748764038, - "learning_rate": 2.248329125204218e-05, - "loss": 0.0651, + "epoch": 2.5055695826526065, + "grad_norm": 0.4501626789569855, + "learning_rate": 7.494430417347394e-06, + "loss": 0.0465, "step": 16870 }, { - "epoch": 1.2535274023466507, - "grad_norm": 1.4354842901229858, - "learning_rate": 2.2478835585920094e-05, - "loss": 0.0754, + "epoch": 2.5070548046933014, + "grad_norm": 0.9378479719161987, + "learning_rate": 7.492945195306699e-06, + "loss": 0.0804, "step": 16880 }, { - "epoch": 1.2542700133669984, - "grad_norm": 1.9970204830169678, - "learning_rate": 2.2474379919798013e-05, - "loss": 0.0746, + "epoch": 2.508540026733997, + "grad_norm": 0.6688227653503418, + "learning_rate": 7.4914599732660045e-06, + "loss": 0.066, "step": 16890 }, { - "epoch": 1.2550126243873458, - "grad_norm": 0.7201411724090576, - "learning_rate": 2.2469924253675924e-05, - "loss": 0.0706, + "epoch": 2.5100252487746917, + "grad_norm": 1.1443583965301514, + "learning_rate": 7.489974751225308e-06, + "loss": 0.0745, "step": 16900 }, { - "epoch": 1.2557552354076935, - "grad_norm": 2.7510123252868652, - "learning_rate": 2.246546858755384e-05, - "loss": 0.0799, + "epoch": 2.511510470815387, + "grad_norm": 1.8711727857589722, + "learning_rate": 7.488489529184614e-06, + "loss": 0.0803, "step": 16910 }, { - "epoch": 1.256497846428041, - "grad_norm": 1.3006263971328735, - "learning_rate": 2.2461012921431758e-05, - "loss": 0.0689, + "epoch": 2.512995692856082, + "grad_norm": 1.0903749465942383, + "learning_rate": 7.4870043071439195e-06, + "loss": 0.0523, "step": 16920 }, { - "epoch": 1.2572404574483884, - "grad_norm": 2.1631722450256348, - "learning_rate": 2.245655725530967e-05, - "loss": 0.0484, + "epoch": 2.514480914896777, + "grad_norm": 0.7821912169456482, + "learning_rate": 7.485519085103223e-06, + "loss": 0.0839, "step": 16930 }, { - "epoch": 1.257983068468736, - "grad_norm": 0.6136536598205566, - "learning_rate": 2.2452101589187584e-05, - "loss": 0.1071, + "epoch": 2.515966136937472, + "grad_norm": 1.8524274826049805, + "learning_rate": 7.484033863062528e-06, + "loss": 0.0845, "step": 16940 }, { - "epoch": 1.2587256794890835, - "grad_norm": 2.494858503341675, - "learning_rate": 2.2447645923065496e-05, - "loss": 0.0693, + "epoch": 2.517451358978167, + "grad_norm": 0.7729329466819763, + "learning_rate": 7.482548641021833e-06, + "loss": 0.0607, "step": 16950 }, { - "epoch": 1.2594682905094312, - "grad_norm": 3.487287998199463, - "learning_rate": 2.2443190256943414e-05, - "loss": 0.0533, + "epoch": 2.5189365810188624, + "grad_norm": 0.6898858547210693, + "learning_rate": 7.481063418981138e-06, + "loss": 0.0587, "step": 16960 }, { - "epoch": 1.2602109015297787, - "grad_norm": 1.1997121572494507, - "learning_rate": 2.243873459082133e-05, - "loss": 0.0713, + "epoch": 2.5204218030595573, + "grad_norm": 0.9820473194122314, + "learning_rate": 7.479578196940443e-06, + "loss": 0.0607, "step": 16970 }, { - "epoch": 1.260953512550126, - "grad_norm": 1.8079684972763062, - "learning_rate": 2.243427892469924e-05, - "loss": 0.0849, + "epoch": 2.521907025100252, + "grad_norm": 0.8298704028129578, + "learning_rate": 7.478092974899748e-06, + "loss": 0.0607, "step": 16980 }, { - "epoch": 1.2616961235704738, - "grad_norm": 1.9124133586883545, - "learning_rate": 2.242982325857716e-05, - "loss": 0.0789, + "epoch": 2.5233922471409476, + "grad_norm": 1.4189072847366333, + "learning_rate": 7.476607752859053e-06, + "loss": 0.0889, "step": 16990 }, { - "epoch": 1.2624387345908215, - "grad_norm": 1.6641535758972168, - "learning_rate": 2.2425367592455074e-05, - "loss": 0.0694, + "epoch": 2.524877469181643, + "grad_norm": 0.6076411008834839, + "learning_rate": 7.475122530818358e-06, + "loss": 0.0608, "step": 17000 }, { - "epoch": 1.263181345611169, - "grad_norm": 1.7782231569290161, - "learning_rate": 2.2420911926332986e-05, - "loss": 0.0773, + "epoch": 2.526362691222338, + "grad_norm": 0.8957213163375854, + "learning_rate": 7.473637308777663e-06, + "loss": 0.0694, "step": 17010 }, { - "epoch": 1.2639239566315164, - "grad_norm": 1.2206722497940063, - "learning_rate": 2.24164562602109e-05, - "loss": 0.0739, + "epoch": 2.5278479132630327, + "grad_norm": 0.5196698307991028, + "learning_rate": 7.472152086736968e-06, + "loss": 0.0545, "step": 17020 }, { - "epoch": 1.264666567651864, - "grad_norm": 0.541761577129364, - "learning_rate": 2.241200059408882e-05, - "loss": 0.0639, + "epoch": 2.529333135303728, + "grad_norm": 1.1449114084243774, + "learning_rate": 7.470666864696273e-06, + "loss": 0.0698, "step": 17030 }, { - "epoch": 1.2654091786722115, - "grad_norm": 0.547622561454773, - "learning_rate": 2.240754492796673e-05, - "loss": 0.0702, + "epoch": 2.530818357344423, + "grad_norm": 0.8476200699806213, + "learning_rate": 7.469181642655578e-06, + "loss": 0.0467, "step": 17040 }, { - "epoch": 1.2661517896925591, - "grad_norm": 4.135508060455322, - "learning_rate": 2.2403089261844646e-05, - "loss": 0.0927, + "epoch": 2.5323035793851183, + "grad_norm": 1.2697662115097046, + "learning_rate": 7.467696420614883e-06, + "loss": 0.0828, "step": 17050 }, { - "epoch": 1.2668944007129066, - "grad_norm": 3.19258975982666, - "learning_rate": 2.239863359572256e-05, - "loss": 0.0736, + "epoch": 2.533788801425813, + "grad_norm": 0.7003705501556396, + "learning_rate": 7.466211198574187e-06, + "loss": 0.0812, "step": 17060 }, { - "epoch": 1.267637011733254, - "grad_norm": 1.1759032011032104, - "learning_rate": 2.2394177929600476e-05, - "loss": 0.0686, + "epoch": 2.535274023466508, + "grad_norm": 1.0060502290725708, + "learning_rate": 7.464725976533492e-06, + "loss": 0.0668, "step": 17070 }, { - "epoch": 1.2683796227536017, - "grad_norm": 0.7790769338607788, - "learning_rate": 2.238972226347839e-05, - "loss": 0.0961, + "epoch": 2.5367592455072034, + "grad_norm": 0.6963827610015869, + "learning_rate": 7.463240754492797e-06, + "loss": 0.0587, "step": 17080 }, { - "epoch": 1.2691222337739492, - "grad_norm": 0.9479905962944031, - "learning_rate": 2.2385266597356306e-05, - "loss": 0.0555, + "epoch": 2.5382444675478983, + "grad_norm": 1.017677903175354, + "learning_rate": 7.461755532452102e-06, + "loss": 0.07, "step": 17090 }, { - "epoch": 1.2698648447942968, - "grad_norm": 1.7313250303268433, - "learning_rate": 2.238081093123422e-05, - "loss": 0.099, + "epoch": 2.5397296895885937, + "grad_norm": 0.9492692947387695, + "learning_rate": 7.460270310411407e-06, + "loss": 0.0635, "step": 17100 }, { - "epoch": 1.2706074558146443, - "grad_norm": 0.5795320272445679, - "learning_rate": 2.2376355265112136e-05, - "loss": 0.0669, + "epoch": 2.5412149116292886, + "grad_norm": 0.9661943316459656, + "learning_rate": 7.458785088370712e-06, + "loss": 0.0644, "step": 17110 }, { - "epoch": 1.2713500668349917, - "grad_norm": 1.634346604347229, - "learning_rate": 2.2371899598990047e-05, - "loss": 0.0771, + "epoch": 2.5427001336699835, + "grad_norm": 1.2824933528900146, + "learning_rate": 7.457299866330017e-06, + "loss": 0.0802, "step": 17120 }, { - "epoch": 1.2720926778553394, - "grad_norm": 2.006558656692505, - "learning_rate": 2.2367443932867966e-05, - "loss": 0.0762, + "epoch": 2.544185355710679, + "grad_norm": 0.9125545620918274, + "learning_rate": 7.455814644289322e-06, + "loss": 0.0685, "step": 17130 }, { - "epoch": 1.2728352888756869, - "grad_norm": 1.0356532335281372, - "learning_rate": 2.236298826674588e-05, - "loss": 0.0903, + "epoch": 2.5456705777513737, + "grad_norm": 1.1388263702392578, + "learning_rate": 7.454329422248627e-06, + "loss": 0.0819, "step": 17140 }, { - "epoch": 1.2735778998960345, - "grad_norm": 1.004071831703186, - "learning_rate": 2.2358532600623792e-05, - "loss": 0.0676, + "epoch": 2.547155799792069, + "grad_norm": 0.954296886920929, + "learning_rate": 7.452844200207932e-06, + "loss": 0.0806, "step": 17150 }, { - "epoch": 1.274320510916382, - "grad_norm": 1.3854845762252808, - "learning_rate": 2.235407693450171e-05, - "loss": 0.0705, + "epoch": 2.548641021832764, + "grad_norm": 0.6064457893371582, + "learning_rate": 7.451358978167237e-06, + "loss": 0.0867, "step": 17160 }, { - "epoch": 1.2750631219367294, - "grad_norm": 2.1504805088043213, - "learning_rate": 2.2349621268379622e-05, - "loss": 0.072, + "epoch": 2.550126243873459, + "grad_norm": 0.5995849370956421, + "learning_rate": 7.449873756126541e-06, + "loss": 0.0672, "step": 17170 }, { - "epoch": 1.275805732957077, - "grad_norm": 1.1549479961395264, - "learning_rate": 2.2345165602257537e-05, - "loss": 0.0642, + "epoch": 2.551611465914154, + "grad_norm": 0.3621232211589813, + "learning_rate": 7.448388534085847e-06, + "loss": 0.062, "step": 17180 }, { - "epoch": 1.2765483439774246, - "grad_norm": 2.8275554180145264, - "learning_rate": 2.2340709936135452e-05, - "loss": 0.0646, + "epoch": 2.553096687954849, + "grad_norm": 0.40740764141082764, + "learning_rate": 7.446903312045152e-06, + "loss": 0.0623, "step": 17190 }, { - "epoch": 1.2772909549977722, - "grad_norm": 0.6290885806083679, - "learning_rate": 2.2336254270013367e-05, - "loss": 0.0875, + "epoch": 2.5545819099955445, + "grad_norm": 0.8544617891311646, + "learning_rate": 7.445418090004456e-06, + "loss": 0.0758, "step": 17200 }, { - "epoch": 1.2780335660181197, - "grad_norm": 2.0349278450012207, - "learning_rate": 2.2331798603891282e-05, - "loss": 0.0624, + "epoch": 2.5560671320362394, + "grad_norm": 1.7287043333053589, + "learning_rate": 7.443932867963762e-06, + "loss": 0.0535, "step": 17210 }, { - "epoch": 1.2787761770384671, - "grad_norm": 1.7029626369476318, - "learning_rate": 2.2327342937769197e-05, - "loss": 0.057, + "epoch": 2.5575523540769343, + "grad_norm": 0.7510034441947937, + "learning_rate": 7.442447645923067e-06, + "loss": 0.083, "step": 17220 }, { - "epoch": 1.2795187880588148, - "grad_norm": 0.9866172075271606, - "learning_rate": 2.2322887271647112e-05, - "loss": 0.08, + "epoch": 2.5590375761176296, + "grad_norm": 0.5877035856246948, + "learning_rate": 7.4409624238823705e-06, + "loss": 0.0723, "step": 17230 }, { - "epoch": 1.2802613990791623, - "grad_norm": 1.6005713939666748, - "learning_rate": 2.2318431605525027e-05, - "loss": 0.0767, + "epoch": 2.5605227981583245, + "grad_norm": 1.369896411895752, + "learning_rate": 7.439477201841676e-06, + "loss": 0.0693, "step": 17240 }, { - "epoch": 1.28100401009951, - "grad_norm": 0.7228248119354248, - "learning_rate": 2.231397593940294e-05, - "loss": 0.1184, + "epoch": 2.56200802019902, + "grad_norm": 1.503385305404663, + "learning_rate": 7.43799197980098e-06, + "loss": 0.0637, "step": 17250 }, { - "epoch": 1.2817466211198574, - "grad_norm": 2.2156078815460205, - "learning_rate": 2.2309520273280857e-05, - "loss": 0.1104, + "epoch": 2.5634932422397148, + "grad_norm": 0.6620815992355347, + "learning_rate": 7.4365067577602855e-06, + "loss": 0.0572, "step": 17260 }, { - "epoch": 1.2824892321402048, - "grad_norm": 1.3294280767440796, - "learning_rate": 2.2305064607158772e-05, - "loss": 0.0742, + "epoch": 2.5649784642804097, + "grad_norm": 1.0107345581054688, + "learning_rate": 7.435021535719591e-06, + "loss": 0.0685, "step": 17270 }, { - "epoch": 1.2832318431605525, - "grad_norm": 1.8837758302688599, - "learning_rate": 2.2300608941036684e-05, - "loss": 0.0753, + "epoch": 2.566463686321105, + "grad_norm": 0.6963445544242859, + "learning_rate": 7.433536313678895e-06, + "loss": 0.0642, "step": 17280 }, { - "epoch": 1.2839744541809002, - "grad_norm": 1.9538664817810059, - "learning_rate": 2.22961532749146e-05, - "loss": 0.0685, + "epoch": 2.5679489083618003, + "grad_norm": 0.7158843278884888, + "learning_rate": 7.4320510916382005e-06, + "loss": 0.0879, "step": 17290 }, { - "epoch": 1.2847170652012476, - "grad_norm": 1.3086044788360596, - "learning_rate": 2.2291697608792517e-05, - "loss": 0.069, + "epoch": 2.5694341304024952, + "grad_norm": 0.49214211106300354, + "learning_rate": 7.430565869597506e-06, + "loss": 0.0556, "step": 17300 }, { - "epoch": 1.285459676221595, - "grad_norm": 0.43306243419647217, - "learning_rate": 2.228724194267043e-05, - "loss": 0.0688, + "epoch": 2.57091935244319, + "grad_norm": 0.5516762733459473, + "learning_rate": 7.42908064755681e-06, + "loss": 0.0548, "step": 17310 }, { - "epoch": 1.2862022872419427, - "grad_norm": 1.7001709938049316, - "learning_rate": 2.2282786276548344e-05, - "loss": 0.0964, + "epoch": 2.5724045744838855, + "grad_norm": 0.8607064485549927, + "learning_rate": 7.4275954255161155e-06, + "loss": 0.078, "step": 17320 }, { - "epoch": 1.2869448982622902, - "grad_norm": 0.9813358187675476, - "learning_rate": 2.2278330610426262e-05, - "loss": 0.0748, + "epoch": 2.5738897965245804, + "grad_norm": 0.9706307649612427, + "learning_rate": 7.426110203475421e-06, + "loss": 0.0552, "step": 17330 }, { - "epoch": 1.2876875092826379, - "grad_norm": 1.8679172992706299, - "learning_rate": 2.2273874944304174e-05, - "loss": 0.0962, + "epoch": 2.5753750185652757, + "grad_norm": 0.6151372790336609, + "learning_rate": 7.424624981434725e-06, + "loss": 0.0835, "step": 17340 }, { - "epoch": 1.2884301203029853, - "grad_norm": 1.7753219604492188, - "learning_rate": 2.226941927818209e-05, - "loss": 0.0818, + "epoch": 2.5768602406059706, + "grad_norm": 1.603258490562439, + "learning_rate": 7.4231397593940305e-06, + "loss": 0.0943, "step": 17350 }, { - "epoch": 1.2891727313233328, - "grad_norm": 1.1424388885498047, - "learning_rate": 2.226496361206e-05, - "loss": 0.0718, + "epoch": 2.5783454626466655, + "grad_norm": 0.7356602549552917, + "learning_rate": 7.421654537353334e-06, + "loss": 0.0783, "step": 17360 }, { - "epoch": 1.2899153423436804, - "grad_norm": 1.6380572319030762, - "learning_rate": 2.226050794593792e-05, - "loss": 0.0815, + "epoch": 2.579830684687361, + "grad_norm": 0.5023389458656311, + "learning_rate": 7.420169315312639e-06, + "loss": 0.0707, "step": 17370 }, { - "epoch": 1.2906579533640279, - "grad_norm": 0.8902571201324463, - "learning_rate": 2.2256052279815834e-05, - "loss": 0.075, + "epoch": 2.5813159067280558, + "grad_norm": 0.5624324679374695, + "learning_rate": 7.4186840932719454e-06, + "loss": 0.0606, "step": 17380 }, { - "epoch": 1.2914005643843756, - "grad_norm": 0.8039567470550537, - "learning_rate": 2.2251596613693745e-05, - "loss": 0.0554, + "epoch": 2.582801128768751, + "grad_norm": 0.6219521164894104, + "learning_rate": 7.417198871231249e-06, + "loss": 0.0768, "step": 17390 }, { - "epoch": 1.292143175404723, - "grad_norm": 2.0586135387420654, - "learning_rate": 2.2247140947571664e-05, - "loss": 0.0662, + "epoch": 2.584286350809446, + "grad_norm": 0.8075690865516663, + "learning_rate": 7.415713649190554e-06, + "loss": 0.0646, "step": 17400 }, { - "epoch": 1.2928857864250705, - "grad_norm": 3.4961864948272705, - "learning_rate": 2.224268528144958e-05, - "loss": 0.1205, + "epoch": 2.585771572850141, + "grad_norm": 0.8664126396179199, + "learning_rate": 7.41422842714986e-06, + "loss": 0.0548, "step": 17410 }, { - "epoch": 1.2936283974454181, - "grad_norm": 1.724418044090271, - "learning_rate": 2.223822961532749e-05, - "loss": 0.0581, + "epoch": 2.5872567948908363, + "grad_norm": 0.8029124736785889, + "learning_rate": 7.412743205109164e-06, + "loss": 0.062, "step": 17420 }, { - "epoch": 1.2943710084657656, - "grad_norm": 1.78573739528656, - "learning_rate": 2.2233773949205405e-05, - "loss": 0.0827, + "epoch": 2.588742016931531, + "grad_norm": 0.9638460278511047, + "learning_rate": 7.411257983068469e-06, + "loss": 0.0816, "step": 17430 }, { - "epoch": 1.2951136194861133, - "grad_norm": 1.7535440921783447, - "learning_rate": 2.2229318283083324e-05, - "loss": 0.0835, + "epoch": 2.5902272389722265, + "grad_norm": 0.6284675002098083, + "learning_rate": 7.409772761027775e-06, + "loss": 0.0558, "step": 17440 }, { - "epoch": 1.2958562305064607, - "grad_norm": 0.7381752729415894, - "learning_rate": 2.2224862616961235e-05, - "loss": 0.0748, + "epoch": 2.5917124610129214, + "grad_norm": 0.517883837223053, + "learning_rate": 7.408287538987079e-06, + "loss": 0.0791, "step": 17450 }, { - "epoch": 1.2965988415268082, - "grad_norm": 2.1226701736450195, - "learning_rate": 2.222040695083915e-05, - "loss": 0.0663, + "epoch": 2.5931976830536163, + "grad_norm": 0.49655789136886597, + "learning_rate": 7.406802316946384e-06, + "loss": 0.0543, "step": 17460 }, { - "epoch": 1.2973414525471558, - "grad_norm": 1.3175716400146484, - "learning_rate": 2.2215951284717065e-05, - "loss": 0.1009, + "epoch": 2.5946829050943117, + "grad_norm": 0.5364641547203064, + "learning_rate": 7.405317094905689e-06, + "loss": 0.076, "step": 17470 }, { - "epoch": 1.2980840635675033, - "grad_norm": 1.1516002416610718, - "learning_rate": 2.221149561859498e-05, - "loss": 0.075, + "epoch": 2.5961681271350066, + "grad_norm": 0.2534388601779938, + "learning_rate": 7.403831872864994e-06, + "loss": 0.0625, "step": 17480 }, { - "epoch": 1.298826674587851, - "grad_norm": 2.0485615730285645, - "learning_rate": 2.2207039952472895e-05, - "loss": 0.0753, + "epoch": 2.597653349175702, + "grad_norm": 0.8373918533325195, + "learning_rate": 7.402346650824299e-06, + "loss": 0.0701, "step": 17490 }, { - "epoch": 1.2995692856081984, - "grad_norm": 1.492017149925232, - "learning_rate": 2.220258428635081e-05, - "loss": 0.0801, + "epoch": 2.599138571216397, + "grad_norm": 0.8206072449684143, + "learning_rate": 7.400861428783604e-06, + "loss": 0.082, "step": 17500 }, { - "epoch": 1.3003118966285458, - "grad_norm": 1.2517192363739014, - "learning_rate": 2.2198128620228725e-05, - "loss": 0.0704, + "epoch": 2.6006237932570917, + "grad_norm": 0.7630594968795776, + "learning_rate": 7.399376206742909e-06, + "loss": 0.0852, "step": 17510 }, { - "epoch": 1.3010545076488935, - "grad_norm": 1.73708176612854, - "learning_rate": 2.219367295410664e-05, - "loss": 0.0564, + "epoch": 2.602109015297787, + "grad_norm": 1.250341773033142, + "learning_rate": 7.397890984702214e-06, + "loss": 0.0644, "step": 17520 }, { - "epoch": 1.301797118669241, - "grad_norm": 1.0200793743133545, - "learning_rate": 2.2189217287984552e-05, - "loss": 0.0521, + "epoch": 2.603594237338482, + "grad_norm": 0.9379168152809143, + "learning_rate": 7.396405762661518e-06, + "loss": 0.0767, "step": 17530 }, { - "epoch": 1.3025397296895886, - "grad_norm": 3.1325795650482178, - "learning_rate": 2.218476162186247e-05, - "loss": 0.0628, + "epoch": 2.6050794593791773, + "grad_norm": 0.8298720121383667, + "learning_rate": 7.394920540620823e-06, + "loss": 0.0751, "step": 17540 }, { - "epoch": 1.303282340709936, - "grad_norm": 2.2476203441619873, - "learning_rate": 2.2180305955740385e-05, - "loss": 0.0747, + "epoch": 2.606564681419872, + "grad_norm": 1.1554667949676514, + "learning_rate": 7.393435318580129e-06, + "loss": 0.0617, "step": 17550 }, { - "epoch": 1.3040249517302835, - "grad_norm": 0.6878722906112671, - "learning_rate": 2.2175850289618297e-05, - "loss": 0.0894, + "epoch": 2.608049903460567, + "grad_norm": 1.0690228939056396, + "learning_rate": 7.391950096539433e-06, + "loss": 0.0686, "step": 17560 }, { - "epoch": 1.3047675627506312, - "grad_norm": 2.48412823677063, - "learning_rate": 2.2171394623496215e-05, - "loss": 0.0809, + "epoch": 2.6095351255012624, + "grad_norm": 0.49055570363998413, + "learning_rate": 7.390464874498738e-06, + "loss": 0.0869, "step": 17570 }, { - "epoch": 1.3055101737709789, - "grad_norm": 1.0366617441177368, - "learning_rate": 2.2166938957374127e-05, - "loss": 0.0724, + "epoch": 2.6110203475419578, + "grad_norm": 0.8171910643577576, + "learning_rate": 7.388979652458043e-06, + "loss": 0.0636, "step": 17580 }, { - "epoch": 1.3062527847913263, - "grad_norm": 0.9051563739776611, - "learning_rate": 2.2162483291252042e-05, - "loss": 0.1042, + "epoch": 2.6125055695826527, + "grad_norm": 0.917361855506897, + "learning_rate": 7.387494430417348e-06, + "loss": 0.0772, "step": 17590 }, { - "epoch": 1.3069953958116738, - "grad_norm": 0.7146435976028442, - "learning_rate": 2.2158027625129957e-05, - "loss": 0.1003, + "epoch": 2.6139907916233476, + "grad_norm": 0.653695285320282, + "learning_rate": 7.386009208376653e-06, + "loss": 0.0572, "step": 17600 }, { - "epoch": 1.3077380068320215, - "grad_norm": 1.7611632347106934, - "learning_rate": 2.2153571959007872e-05, - "loss": 0.0837, + "epoch": 2.615476013664043, + "grad_norm": 0.48957398533821106, + "learning_rate": 7.384523986335958e-06, + "loss": 0.0906, "step": 17610 }, { - "epoch": 1.308480617852369, - "grad_norm": 1.3389374017715454, - "learning_rate": 2.2149116292885787e-05, - "loss": 0.0652, + "epoch": 2.616961235704738, + "grad_norm": 1.2944921255111694, + "learning_rate": 7.383038764295263e-06, + "loss": 0.0627, "step": 17620 }, { - "epoch": 1.3092232288727166, - "grad_norm": 2.716177463531494, - "learning_rate": 2.2144660626763702e-05, - "loss": 0.0718, + "epoch": 2.618446457745433, + "grad_norm": 0.4532714784145355, + "learning_rate": 7.381553542254568e-06, + "loss": 0.0544, "step": 17630 }, { - "epoch": 1.309965839893064, - "grad_norm": 0.6163918972015381, - "learning_rate": 2.2140204960641617e-05, - "loss": 0.0744, + "epoch": 2.619931679786128, + "grad_norm": 0.8860215544700623, + "learning_rate": 7.3800683202138726e-06, + "loss": 0.0554, "step": 17640 }, { - "epoch": 1.3107084509134115, - "grad_norm": 2.4319982528686523, - "learning_rate": 2.2135749294519532e-05, - "loss": 0.097, + "epoch": 2.621416901826823, + "grad_norm": 0.8389759659767151, + "learning_rate": 7.378583098173178e-06, + "loss": 0.0659, "step": 17650 }, { - "epoch": 1.3114510619337592, - "grad_norm": 2.8777670860290527, - "learning_rate": 2.2131293628397447e-05, - "loss": 0.0849, + "epoch": 2.6229021238675183, + "grad_norm": 0.4027443826198578, + "learning_rate": 7.377097876132483e-06, + "loss": 0.0703, "step": 17660 }, { - "epoch": 1.3121936729541066, - "grad_norm": 2.7861387729644775, - "learning_rate": 2.2126837962275362e-05, - "loss": 0.076, + "epoch": 2.624387345908213, + "grad_norm": 1.314283847808838, + "learning_rate": 7.3756126540917876e-06, + "loss": 0.0805, "step": 17670 }, { - "epoch": 1.3129362839744543, - "grad_norm": 0.650431752204895, - "learning_rate": 2.2122382296153277e-05, - "loss": 0.0888, + "epoch": 2.6258725679489086, + "grad_norm": 0.6991994976997375, + "learning_rate": 7.374127432051093e-06, + "loss": 0.0639, "step": 17680 }, { - "epoch": 1.3136788949948017, - "grad_norm": 0.553596019744873, - "learning_rate": 2.211792663003119e-05, - "loss": 0.0619, + "epoch": 2.6273577899896035, + "grad_norm": 0.8641003966331482, + "learning_rate": 7.3726422100103964e-06, + "loss": 0.0895, "step": 17690 }, { - "epoch": 1.3144215060151492, - "grad_norm": 1.0089176893234253, - "learning_rate": 2.2113470963909103e-05, - "loss": 0.0777, + "epoch": 2.6288430120302984, + "grad_norm": 0.6983335614204407, + "learning_rate": 7.371156987969702e-06, + "loss": 0.0591, "step": 17700 }, { - "epoch": 1.3151641170354968, - "grad_norm": 1.6355758905410767, - "learning_rate": 2.2109015297787022e-05, - "loss": 0.0507, + "epoch": 2.6303282340709937, + "grad_norm": 0.9575406908988953, + "learning_rate": 7.369671765929007e-06, + "loss": 0.0835, "step": 17710 }, { - "epoch": 1.3159067280558443, - "grad_norm": 1.1922086477279663, - "learning_rate": 2.2104559631664933e-05, - "loss": 0.0612, + "epoch": 2.6318134561116886, + "grad_norm": 0.5745441317558289, + "learning_rate": 7.3681865438883114e-06, + "loss": 0.0633, "step": 17720 }, { - "epoch": 1.316649339076192, - "grad_norm": 3.1001734733581543, - "learning_rate": 2.210010396554285e-05, - "loss": 0.103, + "epoch": 2.633298678152384, + "grad_norm": 0.5129587650299072, + "learning_rate": 7.366701321847617e-06, + "loss": 0.057, "step": 17730 }, { - "epoch": 1.3173919500965394, - "grad_norm": 2.3296868801116943, - "learning_rate": 2.2095648299420767e-05, - "loss": 0.0626, + "epoch": 2.634783900193079, + "grad_norm": 0.9663777947425842, + "learning_rate": 7.365216099806922e-06, + "loss": 0.0712, "step": 17740 }, { - "epoch": 1.3181345611168869, - "grad_norm": 1.527961015701294, - "learning_rate": 2.209119263329868e-05, - "loss": 0.0762, + "epoch": 2.6362691222337737, + "grad_norm": 0.8724805116653442, + "learning_rate": 7.363730877766226e-06, + "loss": 0.0615, "step": 17750 }, { - "epoch": 1.3188771721372345, - "grad_norm": 2.4450912475585938, - "learning_rate": 2.2086736967176593e-05, - "loss": 0.0799, + "epoch": 2.637754344274469, + "grad_norm": 1.2762563228607178, + "learning_rate": 7.362245655725532e-06, + "loss": 0.0724, "step": 17760 }, { - "epoch": 1.319619783157582, - "grad_norm": 1.477561354637146, - "learning_rate": 2.2082281301054505e-05, - "loss": 0.0466, + "epoch": 2.639239566315164, + "grad_norm": 1.088175654411316, + "learning_rate": 7.360760433684836e-06, + "loss": 0.0636, "step": 17770 }, { - "epoch": 1.3203623941779297, - "grad_norm": 2.102966070175171, - "learning_rate": 2.2077825634932423e-05, - "loss": 0.0812, + "epoch": 2.6407247883558593, + "grad_norm": 1.3335635662078857, + "learning_rate": 7.359275211644141e-06, + "loss": 0.0685, "step": 17780 }, { - "epoch": 1.3211050051982771, - "grad_norm": 1.731831669807434, - "learning_rate": 2.207336996881034e-05, - "loss": 0.0636, + "epoch": 2.6422100103965542, + "grad_norm": 0.5454977750778198, + "learning_rate": 7.357789989603447e-06, + "loss": 0.0582, "step": 17790 }, { - "epoch": 1.3218476162186246, - "grad_norm": 1.5313726663589478, - "learning_rate": 2.206891430268825e-05, - "loss": 0.0619, + "epoch": 2.643695232437249, + "grad_norm": 0.5082247853279114, + "learning_rate": 7.356304767562751e-06, + "loss": 0.0589, "step": 17800 }, { - "epoch": 1.3225902272389722, - "grad_norm": 1.2742550373077393, - "learning_rate": 2.206445863656617e-05, - "loss": 0.0647, + "epoch": 2.6451804544779445, + "grad_norm": 1.032354474067688, + "learning_rate": 7.354819545522056e-06, + "loss": 0.0696, "step": 17810 }, { - "epoch": 1.3233328382593197, - "grad_norm": 0.7429075241088867, - "learning_rate": 2.2060002970444083e-05, - "loss": 0.0574, + "epoch": 2.6466656765186394, + "grad_norm": 0.808302104473114, + "learning_rate": 7.353334323481362e-06, + "loss": 0.0766, "step": 17820 }, { - "epoch": 1.3240754492796674, - "grad_norm": 2.3844103813171387, - "learning_rate": 2.2055547304321995e-05, - "loss": 0.0455, + "epoch": 2.6481508985593347, + "grad_norm": 0.6936222314834595, + "learning_rate": 7.351849101440665e-06, + "loss": 0.0599, "step": 17830 }, { - "epoch": 1.3248180603000148, - "grad_norm": 3.4696733951568604, - "learning_rate": 2.2051091638199913e-05, - "loss": 0.0856, + "epoch": 2.6496361206000296, + "grad_norm": 0.5071762800216675, + "learning_rate": 7.3503638793999706e-06, + "loss": 0.0645, "step": 17840 }, { - "epoch": 1.3255606713203623, - "grad_norm": 1.2580214738845825, - "learning_rate": 2.204663597207783e-05, - "loss": 0.0525, + "epoch": 2.6511213426407245, + "grad_norm": 0.5135564208030701, + "learning_rate": 7.348878657359277e-06, + "loss": 0.0625, "step": 17850 }, { - "epoch": 1.32630328234071, - "grad_norm": 2.1355206966400146, - "learning_rate": 2.204218030595574e-05, - "loss": 0.0532, + "epoch": 2.65260656468142, + "grad_norm": 1.0177409648895264, + "learning_rate": 7.34739343531858e-06, + "loss": 0.0707, "step": 17860 }, { - "epoch": 1.3270458933610576, - "grad_norm": 3.5435104370117188, - "learning_rate": 2.2037724639833655e-05, - "loss": 0.0589, + "epoch": 2.654091786722115, + "grad_norm": 0.4974595904350281, + "learning_rate": 7.3459082132778856e-06, + "loss": 0.065, "step": 17870 }, { - "epoch": 1.327788504381405, - "grad_norm": 0.7375121712684631, - "learning_rate": 2.203326897371157e-05, - "loss": 0.0516, + "epoch": 2.65557700876281, + "grad_norm": 0.5838668346405029, + "learning_rate": 7.34442299123719e-06, + "loss": 0.07, "step": 17880 }, { - "epoch": 1.3285311154017525, - "grad_norm": 1.3405452966690063, - "learning_rate": 2.2028813307589485e-05, - "loss": 0.0609, + "epoch": 2.657062230803505, + "grad_norm": 0.531019389629364, + "learning_rate": 7.342937769196495e-06, + "loss": 0.0636, "step": 17890 }, { - "epoch": 1.3292737264221002, - "grad_norm": 1.448654294013977, - "learning_rate": 2.20243576414674e-05, - "loss": 0.0716, + "epoch": 2.6585474528442004, + "grad_norm": 0.6077486276626587, + "learning_rate": 7.3414525471558005e-06, + "loss": 0.0571, "step": 17900 }, { - "epoch": 1.3300163374424476, - "grad_norm": 1.0215409994125366, - "learning_rate": 2.2019901975345315e-05, - "loss": 0.0627, + "epoch": 2.6600326748848953, + "grad_norm": 0.7069369554519653, + "learning_rate": 7.339967325115105e-06, + "loss": 0.0712, "step": 17910 }, { - "epoch": 1.3307589484627953, - "grad_norm": 2.50747013092041, - "learning_rate": 2.201544630922323e-05, - "loss": 0.0607, + "epoch": 2.6615178969255906, + "grad_norm": 0.5238094329833984, + "learning_rate": 7.33848210307441e-06, + "loss": 0.0733, "step": 17920 }, { - "epoch": 1.3315015594831427, - "grad_norm": 0.947949230670929, - "learning_rate": 2.2010990643101145e-05, - "loss": 0.092, + "epoch": 2.6630031189662855, + "grad_norm": 0.4017098546028137, + "learning_rate": 7.3369968810337155e-06, + "loss": 0.0559, "step": 17930 }, { - "epoch": 1.3322441705034902, - "grad_norm": 1.444300889968872, - "learning_rate": 2.2006534976979057e-05, - "loss": 0.052, + "epoch": 2.6644883410069804, + "grad_norm": 0.3973262906074524, + "learning_rate": 7.33551165899302e-06, + "loss": 0.0629, "step": 17940 }, { - "epoch": 1.3329867815238379, - "grad_norm": 1.492150068283081, - "learning_rate": 2.2002079310856975e-05, - "loss": 0.0644, + "epoch": 2.6659735630476757, + "grad_norm": 0.6550332903862, + "learning_rate": 7.334026436952325e-06, + "loss": 0.0641, "step": 17950 }, { - "epoch": 1.3337293925441853, - "grad_norm": 3.0664021968841553, - "learning_rate": 2.199762364473489e-05, - "loss": 0.1176, + "epoch": 2.6674587850883706, + "grad_norm": 0.3614233136177063, + "learning_rate": 7.3325412149116305e-06, + "loss": 0.0517, "step": 17960 }, { - "epoch": 1.334472003564533, - "grad_norm": 2.1446638107299805, - "learning_rate": 2.19931679786128e-05, - "loss": 0.0621, + "epoch": 2.668944007129066, + "grad_norm": 0.5007290840148926, + "learning_rate": 7.331055992870935e-06, + "loss": 0.0672, "step": 17970 }, { - "epoch": 1.3352146145848804, - "grad_norm": 2.1840789318084717, - "learning_rate": 2.198871231249072e-05, - "loss": 0.0744, + "epoch": 2.670429229169761, + "grad_norm": 0.8679770231246948, + "learning_rate": 7.32957077083024e-06, + "loss": 0.0632, "step": 17980 }, { - "epoch": 1.335957225605228, - "grad_norm": 6.352825164794922, - "learning_rate": 2.198425664636863e-05, - "loss": 0.0636, + "epoch": 2.671914451210456, + "grad_norm": 0.9736119508743286, + "learning_rate": 7.328085548789544e-06, + "loss": 0.065, "step": 17990 }, { - "epoch": 1.3366998366255756, - "grad_norm": 1.7833071947097778, - "learning_rate": 2.1979800980246546e-05, - "loss": 0.0835, + "epoch": 2.673399673251151, + "grad_norm": 0.6604450941085815, + "learning_rate": 7.326600326748849e-06, + "loss": 0.0847, "step": 18000 }, { - "epoch": 1.337442447645923, - "grad_norm": 1.596863031387329, - "learning_rate": 2.197534531412446e-05, - "loss": 0.0639, + "epoch": 2.674884895291846, + "grad_norm": 1.0800334215164185, + "learning_rate": 7.325115104708154e-06, + "loss": 0.0903, "step": 18010 }, { - "epoch": 1.3381850586662707, - "grad_norm": 1.079342246055603, - "learning_rate": 2.1970889648002376e-05, - "loss": 0.0675, + "epoch": 2.6763701173325414, + "grad_norm": 0.4174042344093323, + "learning_rate": 7.323629882667459e-06, + "loss": 0.0554, "step": 18020 }, { - "epoch": 1.3389276696866181, - "grad_norm": 2.5016486644744873, - "learning_rate": 2.196643398188029e-05, - "loss": 0.0717, + "epoch": 2.6778553393732363, + "grad_norm": 0.7919684648513794, + "learning_rate": 7.322144660626764e-06, + "loss": 0.0537, "step": 18030 }, { - "epoch": 1.3396702807069656, - "grad_norm": 1.00153648853302, - "learning_rate": 2.1961978315758206e-05, - "loss": 0.0901, + "epoch": 2.679340561413931, + "grad_norm": 0.6713187098503113, + "learning_rate": 7.320659438586069e-06, + "loss": 0.0668, "step": 18040 }, { - "epoch": 1.3404128917273133, - "grad_norm": 0.4505369961261749, - "learning_rate": 2.195752264963612e-05, - "loss": 0.0617, + "epoch": 2.6808257834546265, + "grad_norm": 1.1003612279891968, + "learning_rate": 7.319174216545374e-06, + "loss": 0.0745, "step": 18050 }, { - "epoch": 1.3411555027476607, - "grad_norm": 0.8627389669418335, - "learning_rate": 2.1953066983514036e-05, - "loss": 0.0915, + "epoch": 2.6823110054953214, + "grad_norm": 1.0887432098388672, + "learning_rate": 7.317688994504679e-06, + "loss": 0.0697, "step": 18060 }, { - "epoch": 1.3418981137680084, - "grad_norm": 1.8708628416061401, - "learning_rate": 2.194861131739195e-05, - "loss": 0.087, + "epoch": 2.6837962275360168, + "grad_norm": 0.7097349762916565, + "learning_rate": 7.316203772463984e-06, + "loss": 0.067, "step": 18070 }, { - "epoch": 1.3426407247883558, - "grad_norm": 1.866942286491394, - "learning_rate": 2.1944155651269866e-05, - "loss": 0.0633, + "epoch": 2.6852814495767117, + "grad_norm": 0.8918297290802002, + "learning_rate": 7.314718550423289e-06, + "loss": 0.0597, "step": 18080 }, { - "epoch": 1.3433833358087033, - "grad_norm": 3.1067397594451904, - "learning_rate": 2.193969998514778e-05, - "loss": 0.065, + "epoch": 2.6867666716174066, + "grad_norm": 0.39780157804489136, + "learning_rate": 7.313233328382594e-06, + "loss": 0.0617, "step": 18090 }, { - "epoch": 1.344125946829051, - "grad_norm": 1.4775131940841675, - "learning_rate": 2.1935244319025693e-05, - "loss": 0.0858, + "epoch": 2.688251893658102, + "grad_norm": 0.7399517893791199, + "learning_rate": 7.3117481063418985e-06, + "loss": 0.0701, "step": 18100 }, { - "epoch": 1.3448685578493984, - "grad_norm": 1.9869881868362427, - "learning_rate": 2.1930788652903608e-05, - "loss": 0.088, + "epoch": 2.689737115698797, + "grad_norm": 0.567972719669342, + "learning_rate": 7.310262884301204e-06, + "loss": 0.0847, "step": 18110 }, { - "epoch": 1.345611168869746, - "grad_norm": 2.1801204681396484, - "learning_rate": 2.1926332986781526e-05, - "loss": 0.0941, + "epoch": 2.691222337739492, + "grad_norm": 0.699404239654541, + "learning_rate": 7.308777662260509e-06, + "loss": 0.0684, "step": 18120 }, { - "epoch": 1.3463537798900935, - "grad_norm": 1.2219593524932861, - "learning_rate": 2.1921877320659438e-05, - "loss": 0.0897, + "epoch": 2.692707559780187, + "grad_norm": 1.000461459159851, + "learning_rate": 7.307292440219813e-06, + "loss": 0.0784, "step": 18130 }, { - "epoch": 1.347096390910441, - "grad_norm": 1.6922121047973633, - "learning_rate": 2.1917421654537353e-05, - "loss": 0.0905, + "epoch": 2.694192781820882, + "grad_norm": 0.6232538223266602, + "learning_rate": 7.305807218179119e-06, + "loss": 0.0501, "step": 18140 }, { - "epoch": 1.3478390019307886, - "grad_norm": 4.362298488616943, - "learning_rate": 2.191296598841527e-05, - "loss": 0.0714, + "epoch": 2.6956780038615773, + "grad_norm": 0.5169805288314819, + "learning_rate": 7.304321996138424e-06, + "loss": 0.0608, "step": 18150 }, { - "epoch": 1.3485816129511363, - "grad_norm": 1.9066132307052612, - "learning_rate": 2.1908510322293183e-05, - "loss": 0.076, + "epoch": 2.6971632259022726, + "grad_norm": 1.0496106147766113, + "learning_rate": 7.302836774097728e-06, + "loss": 0.0598, "step": 18160 }, { - "epoch": 1.3493242239714838, - "grad_norm": 2.2827999591827393, - "learning_rate": 2.1904054656171098e-05, - "loss": 0.0872, + "epoch": 2.6986484479429675, + "grad_norm": 0.6121331453323364, + "learning_rate": 7.301351552057033e-06, + "loss": 0.0544, "step": 18170 }, { - "epoch": 1.3500668349918312, - "grad_norm": 0.735640287399292, - "learning_rate": 2.189959899004901e-05, - "loss": 0.0668, + "epoch": 2.7001336699836624, + "grad_norm": 0.779712975025177, + "learning_rate": 7.299866330016337e-06, + "loss": 0.0768, "step": 18180 }, { - "epoch": 1.350809446012179, - "grad_norm": 1.7946842908859253, - "learning_rate": 2.1895143323926928e-05, - "loss": 0.0767, + "epoch": 2.701618892024358, + "grad_norm": 0.36729615926742554, + "learning_rate": 7.298381107975643e-06, + "loss": 0.0643, "step": 18190 }, { - "epoch": 1.3515520570325263, - "grad_norm": 0.793258547782898, - "learning_rate": 2.1890687657804843e-05, - "loss": 0.0484, + "epoch": 2.7031041140650527, + "grad_norm": 0.8492846488952637, + "learning_rate": 7.296895885934948e-06, + "loss": 0.0681, "step": 18200 }, { - "epoch": 1.352294668052874, - "grad_norm": 1.9546618461608887, - "learning_rate": 2.1886231991682755e-05, - "loss": 0.1081, + "epoch": 2.704589336105748, + "grad_norm": 1.0522595643997192, + "learning_rate": 7.295410663894252e-06, + "loss": 0.0716, "step": 18210 }, { - "epoch": 1.3530372790732215, - "grad_norm": 1.6390115022659302, - "learning_rate": 2.1881776325560673e-05, - "loss": 0.051, + "epoch": 2.706074558146443, + "grad_norm": 1.1811456680297852, + "learning_rate": 7.293925441853558e-06, + "loss": 0.0666, "step": 18220 }, { - "epoch": 1.353779890093569, - "grad_norm": 0.6895598769187927, - "learning_rate": 2.1877320659438588e-05, - "loss": 0.0765, + "epoch": 2.707559780187138, + "grad_norm": 1.1208118200302124, + "learning_rate": 7.292440219812863e-06, + "loss": 0.0618, "step": 18230 }, { - "epoch": 1.3545225011139166, - "grad_norm": 1.3696023225784302, - "learning_rate": 2.18728649933165e-05, - "loss": 0.1083, - "step": 18240 + "epoch": 2.709045002227833, + "grad_norm": 0.8347768187522888, + "learning_rate": 7.290954997772167e-06, + "loss": 0.069, + "step": 18240 }, { - "epoch": 1.355265112134264, - "grad_norm": 1.8594785928726196, - "learning_rate": 2.1868409327194418e-05, - "loss": 0.0773, + "epoch": 2.710530224268528, + "grad_norm": 0.9092279672622681, + "learning_rate": 7.289469775731473e-06, + "loss": 0.0555, "step": 18250 }, { - "epoch": 1.3560077231546117, - "grad_norm": 1.0615592002868652, - "learning_rate": 2.1863953661072333e-05, - "loss": 0.079, + "epoch": 2.7120154463092234, + "grad_norm": 1.1388651132583618, + "learning_rate": 7.287984553690778e-06, + "loss": 0.0818, "step": 18260 }, { - "epoch": 1.3567503341749592, - "grad_norm": 1.725924015045166, - "learning_rate": 2.1859497994950245e-05, - "loss": 0.0957, + "epoch": 2.7135006683499183, + "grad_norm": 0.8529207110404968, + "learning_rate": 7.286499331650082e-06, + "loss": 0.0424, "step": 18270 }, { - "epoch": 1.3574929451953066, - "grad_norm": 0.7727744579315186, - "learning_rate": 2.185504232882816e-05, - "loss": 0.0664, + "epoch": 2.714985890390613, + "grad_norm": 1.3158096075057983, + "learning_rate": 7.285014109609388e-06, + "loss": 0.0847, "step": 18280 }, { - "epoch": 1.3582355562156543, - "grad_norm": 0.4827175438404083, - "learning_rate": 2.1850586662706075e-05, - "loss": 0.0786, + "epoch": 2.7164711124313086, + "grad_norm": 1.0572584867477417, + "learning_rate": 7.283528887568691e-06, + "loss": 0.0485, "step": 18290 }, { - "epoch": 1.3589781672360017, - "grad_norm": 2.3868589401245117, - "learning_rate": 2.184613099658399e-05, - "loss": 0.0757, + "epoch": 2.7179563344720035, + "grad_norm": 0.9866123199462891, + "learning_rate": 7.2820436655279965e-06, + "loss": 0.063, "step": 18300 }, { - "epoch": 1.3597207782563494, - "grad_norm": 1.4127172231674194, - "learning_rate": 2.1841675330461905e-05, - "loss": 0.0805, + "epoch": 2.719441556512699, + "grad_norm": 0.6498306393623352, + "learning_rate": 7.280558443487303e-06, + "loss": 0.076, "step": 18310 }, { - "epoch": 1.3604633892766969, - "grad_norm": 3.6567182540893555, - "learning_rate": 2.183721966433982e-05, - "loss": 0.0501, + "epoch": 2.7209267785533937, + "grad_norm": 0.4041306972503662, + "learning_rate": 7.279073221446606e-06, + "loss": 0.0619, "step": 18320 }, { - "epoch": 1.3612060002970443, - "grad_norm": 0.46795493364334106, - "learning_rate": 2.1832763998217735e-05, - "loss": 0.0702, + "epoch": 2.7224120005940886, + "grad_norm": 0.3998267650604248, + "learning_rate": 7.2775879994059115e-06, + "loss": 0.0594, "step": 18330 }, { - "epoch": 1.361948611317392, - "grad_norm": 1.1529920101165771, - "learning_rate": 2.182830833209565e-05, - "loss": 0.0891, + "epoch": 2.723897222634784, + "grad_norm": 0.4064011573791504, + "learning_rate": 7.276102777365217e-06, + "loss": 0.0601, "step": 18340 }, { - "epoch": 1.3626912223377394, - "grad_norm": 1.0906422138214111, - "learning_rate": 2.182385266597356e-05, - "loss": 0.0789, + "epoch": 2.725382444675479, + "grad_norm": 0.5988971590995789, + "learning_rate": 7.274617555324521e-06, + "loss": 0.0691, "step": 18350 }, { - "epoch": 1.363433833358087, - "grad_norm": 4.010624408721924, - "learning_rate": 2.181939699985148e-05, - "loss": 0.1198, + "epoch": 2.726867666716174, + "grad_norm": 1.0736236572265625, + "learning_rate": 7.2731323332838265e-06, + "loss": 0.0621, "step": 18360 }, { - "epoch": 1.3641764443784345, - "grad_norm": 1.2487231492996216, - "learning_rate": 2.1814941333729395e-05, - "loss": 0.0811, + "epoch": 2.728352888756869, + "grad_norm": 1.3379560708999634, + "learning_rate": 7.271647111243132e-06, + "loss": 0.0683, "step": 18370 }, { - "epoch": 1.364919055398782, - "grad_norm": 1.9714354276657104, - "learning_rate": 2.1810485667607306e-05, - "loss": 0.065, + "epoch": 2.729838110797564, + "grad_norm": 0.5252112150192261, + "learning_rate": 7.270161889202436e-06, + "loss": 0.055, "step": 18380 }, { - "epoch": 1.3656616664191297, - "grad_norm": 2.3405370712280273, - "learning_rate": 2.1806030001485224e-05, - "loss": 0.1, + "epoch": 2.7313233328382593, + "grad_norm": 1.0529993772506714, + "learning_rate": 7.2686766671617415e-06, + "loss": 0.0797, "step": 18390 }, { - "epoch": 1.3664042774394771, - "grad_norm": 0.7739295959472656, - "learning_rate": 2.1801574335363136e-05, - "loss": 0.0794, + "epoch": 2.7328085548789542, + "grad_norm": 1.2092722654342651, + "learning_rate": 7.267191445121046e-06, + "loss": 0.0717, "step": 18400 }, { - "epoch": 1.3671468884598248, - "grad_norm": 1.5467528104782104, - "learning_rate": 2.179711866924105e-05, - "loss": 0.0625, + "epoch": 2.7342937769196496, + "grad_norm": 0.4357248842716217, + "learning_rate": 7.265706223080351e-06, + "loss": 0.0524, "step": 18410 }, { - "epoch": 1.3678894994801722, - "grad_norm": 2.0062620639801025, - "learning_rate": 2.1792663003118966e-05, - "loss": 0.1144, + "epoch": 2.7357789989603445, + "grad_norm": 0.6754854917526245, + "learning_rate": 7.2642210010396565e-06, + "loss": 0.0754, "step": 18420 }, { - "epoch": 1.3686321105005197, - "grad_norm": 1.0899155139923096, - "learning_rate": 2.178820733699688e-05, - "loss": 0.0832, + "epoch": 2.7372642210010394, + "grad_norm": 1.5296664237976074, + "learning_rate": 7.262735778998961e-06, + "loss": 0.0769, "step": 18430 }, { - "epoch": 1.3693747215208674, - "grad_norm": 1.5174329280853271, - "learning_rate": 2.1783751670874796e-05, - "loss": 0.0886, + "epoch": 2.7387494430417347, + "grad_norm": 0.5845615267753601, + "learning_rate": 7.261250556958266e-06, + "loss": 0.0528, "step": 18440 }, { - "epoch": 1.370117332541215, - "grad_norm": 1.060883641242981, - "learning_rate": 2.177929600475271e-05, - "loss": 0.0619, + "epoch": 2.74023466508243, + "grad_norm": 0.5885583758354187, + "learning_rate": 7.2597653349175715e-06, + "loss": 0.0692, "step": 18450 }, { - "epoch": 1.3708599435615625, - "grad_norm": 2.5630977153778076, - "learning_rate": 2.1774840338630626e-05, - "loss": 0.0697, + "epoch": 2.741719887123125, + "grad_norm": 1.1398755311965942, + "learning_rate": 7.258280112876875e-06, + "loss": 0.0751, "step": 18460 }, { - "epoch": 1.37160255458191, - "grad_norm": 1.6093450784683228, - "learning_rate": 2.177038467250854e-05, - "loss": 0.0803, + "epoch": 2.74320510916382, + "grad_norm": 1.1153408288955688, + "learning_rate": 7.25679489083618e-06, + "loss": 0.0528, "step": 18470 }, { - "epoch": 1.3723451656022576, - "grad_norm": 1.996664047241211, - "learning_rate": 2.1765929006386456e-05, - "loss": 0.0881, + "epoch": 2.744690331204515, + "grad_norm": 0.9327086210250854, + "learning_rate": 7.255309668795486e-06, + "loss": 0.0748, "step": 18480 }, { - "epoch": 1.373087776622605, - "grad_norm": 1.6483838558197021, - "learning_rate": 2.176147334026437e-05, - "loss": 0.0536, + "epoch": 2.74617555324521, + "grad_norm": 0.783569872379303, + "learning_rate": 7.25382444675479e-06, + "loss": 0.0803, "step": 18490 }, { - "epoch": 1.3738303876429527, - "grad_norm": 2.682058572769165, - "learning_rate": 2.1757017674142286e-05, - "loss": 0.0822, + "epoch": 2.7476607752859055, + "grad_norm": 0.7508075833320618, + "learning_rate": 7.252339224714095e-06, + "loss": 0.052, "step": 18500 }, { - "epoch": 1.3745729986633002, - "grad_norm": 0.638530969619751, - "learning_rate": 2.1752562008020198e-05, - "loss": 0.0843, + "epoch": 2.7491459973266004, + "grad_norm": 0.318877249956131, + "learning_rate": 7.2508540026734e-06, + "loss": 0.0644, "step": 18510 }, { - "epoch": 1.3753156096836476, - "grad_norm": 0.7603070139884949, - "learning_rate": 2.1748106341898113e-05, - "loss": 0.0736, + "epoch": 2.7506312193672953, + "grad_norm": 0.6378808617591858, + "learning_rate": 7.249368780632705e-06, + "loss": 0.0547, "step": 18520 }, { - "epoch": 1.3760582207039953, - "grad_norm": 1.7410355806350708, - "learning_rate": 2.174365067577603e-05, - "loss": 0.0999, + "epoch": 2.7521164414079906, + "grad_norm": 1.1061114072799683, + "learning_rate": 7.24788355859201e-06, + "loss": 0.0833, "step": 18530 }, { - "epoch": 1.3768008317243428, - "grad_norm": 1.657575011253357, - "learning_rate": 2.1739195009653943e-05, - "loss": 0.0609, + "epoch": 2.7536016634486855, + "grad_norm": 1.0416938066482544, + "learning_rate": 7.246398336551315e-06, + "loss": 0.0604, "step": 18540 }, { - "epoch": 1.3775434427446904, - "grad_norm": 0.7977071404457092, - "learning_rate": 2.1734739343531858e-05, - "loss": 0.0636, + "epoch": 2.755086885489381, + "grad_norm": 0.9235290884971619, + "learning_rate": 7.24491311451062e-06, + "loss": 0.0627, "step": 18550 }, { - "epoch": 1.3782860537650379, - "grad_norm": 0.5938560962677002, - "learning_rate": 2.1730283677409776e-05, - "loss": 0.0776, + "epoch": 2.7565721075300758, + "grad_norm": 0.8502004146575928, + "learning_rate": 7.243427892469925e-06, + "loss": 0.0575, "step": 18560 }, { - "epoch": 1.3790286647853853, - "grad_norm": 2.3194963932037354, - "learning_rate": 2.1725828011287688e-05, - "loss": 0.0953, + "epoch": 2.7580573295707707, + "grad_norm": 0.780972421169281, + "learning_rate": 7.24194267042923e-06, + "loss": 0.0748, "step": 18570 }, { - "epoch": 1.379771275805733, - "grad_norm": 1.4146885871887207, - "learning_rate": 2.1721372345165603e-05, - "loss": 0.0885, + "epoch": 2.759542551611466, + "grad_norm": 0.6407886743545532, + "learning_rate": 7.240457448388535e-06, + "loss": 0.0777, "step": 18580 }, { - "epoch": 1.3805138868260804, - "grad_norm": 2.4094855785369873, - "learning_rate": 2.1716916679043518e-05, - "loss": 0.0629, + "epoch": 2.761027773652161, + "grad_norm": 0.5302232503890991, + "learning_rate": 7.23897222634784e-06, + "loss": 0.0513, "step": 18590 }, { - "epoch": 1.3812564978464281, - "grad_norm": 2.476471424102783, - "learning_rate": 2.1712461012921433e-05, - "loss": 0.0522, + "epoch": 2.7625129956928562, + "grad_norm": 0.8282850980758667, + "learning_rate": 7.237487004307145e-06, + "loss": 0.0683, "step": 18600 }, { - "epoch": 1.3819991088667756, - "grad_norm": 1.0352263450622559, - "learning_rate": 2.1708005346799348e-05, - "loss": 0.0605, + "epoch": 2.763998217733551, + "grad_norm": 0.811703085899353, + "learning_rate": 7.23600178226645e-06, + "loss": 0.0624, "step": 18610 }, { - "epoch": 1.382741719887123, - "grad_norm": 1.2471846342086792, - "learning_rate": 2.170354968067726e-05, - "loss": 0.0606, + "epoch": 2.765483439774246, + "grad_norm": 0.7585626840591431, + "learning_rate": 7.234516560225754e-06, + "loss": 0.0698, "step": 18620 }, { - "epoch": 1.3834843309074707, - "grad_norm": 0.748393177986145, - "learning_rate": 2.1699094014555178e-05, - "loss": 0.0825, + "epoch": 2.7669686618149414, + "grad_norm": 0.6847484707832336, + "learning_rate": 7.233031338185059e-06, + "loss": 0.0661, "step": 18630 }, { - "epoch": 1.3842269419278181, - "grad_norm": 2.0027894973754883, - "learning_rate": 2.1694638348433093e-05, - "loss": 0.1089, + "epoch": 2.7684538838556363, + "grad_norm": 0.8766204714775085, + "learning_rate": 7.231546116144364e-06, + "loss": 0.0804, "step": 18640 }, { - "epoch": 1.3849695529481658, - "grad_norm": 0.7631524205207825, - "learning_rate": 2.1690182682311004e-05, - "loss": 0.0627, + "epoch": 2.7699391058963316, + "grad_norm": 1.2006505727767944, + "learning_rate": 7.230060894103669e-06, + "loss": 0.0816, "step": 18650 }, { - "epoch": 1.3857121639685133, - "grad_norm": 1.3359373807907104, - "learning_rate": 2.1685727016188923e-05, - "loss": 0.0581, + "epoch": 2.7714243279370265, + "grad_norm": 1.2238951921463013, + "learning_rate": 7.228575672062974e-06, + "loss": 0.0765, "step": 18660 }, { - "epoch": 1.3864547749888607, - "grad_norm": 1.8302745819091797, - "learning_rate": 2.1681271350066838e-05, - "loss": 0.0578, + "epoch": 2.7729095499777214, + "grad_norm": 0.6546773910522461, + "learning_rate": 7.227090450022279e-06, + "loss": 0.0775, "step": 18670 }, { - "epoch": 1.3871973860092084, - "grad_norm": 1.2512954473495483, - "learning_rate": 2.167681568394475e-05, - "loss": 0.0746, + "epoch": 2.7743947720184168, + "grad_norm": 0.8361360430717468, + "learning_rate": 7.225605227981584e-06, + "loss": 0.0624, "step": 18680 }, { - "epoch": 1.3879399970295558, - "grad_norm": 1.0682908296585083, - "learning_rate": 2.1672360017822664e-05, - "loss": 0.0568, + "epoch": 2.7758799940591117, + "grad_norm": 0.6821973323822021, + "learning_rate": 7.224120005940889e-06, + "loss": 0.0617, "step": 18690 }, { - "epoch": 1.3886826080499035, - "grad_norm": 0.7478026747703552, - "learning_rate": 2.166790435170058e-05, - "loss": 0.0947, + "epoch": 2.777365216099807, + "grad_norm": 0.9379958510398865, + "learning_rate": 7.222634783900193e-06, + "loss": 0.0695, "step": 18700 }, { - "epoch": 1.389425219070251, - "grad_norm": 1.646852731704712, - "learning_rate": 2.1663448685578494e-05, - "loss": 0.0718, + "epoch": 2.778850438140502, + "grad_norm": 1.1127417087554932, + "learning_rate": 7.221149561859499e-06, + "loss": 0.0648, "step": 18710 }, { - "epoch": 1.3901678300905984, - "grad_norm": 1.0658780336380005, - "learning_rate": 2.165899301945641e-05, - "loss": 0.0939, + "epoch": 2.780335660181197, + "grad_norm": 0.6109861135482788, + "learning_rate": 7.219664339818804e-06, + "loss": 0.0694, "step": 18720 }, { - "epoch": 1.390910441110946, - "grad_norm": 1.9189115762710571, - "learning_rate": 2.1654537353334324e-05, - "loss": 0.0844, + "epoch": 2.781820882221892, + "grad_norm": 0.9905062317848206, + "learning_rate": 7.218179117778108e-06, + "loss": 0.0667, "step": 18730 }, { - "epoch": 1.3916530521312938, - "grad_norm": 1.009257197380066, - "learning_rate": 2.165008168721224e-05, - "loss": 0.0549, + "epoch": 2.7833061042625875, + "grad_norm": 0.9200990796089172, + "learning_rate": 7.216693895737414e-06, + "loss": 0.0533, "step": 18740 }, { - "epoch": 1.3923956631516412, - "grad_norm": 1.1717352867126465, - "learning_rate": 2.1645626021090154e-05, - "loss": 0.0741, + "epoch": 2.7847913263032824, + "grad_norm": 0.9881905317306519, + "learning_rate": 7.215208673696719e-06, + "loss": 0.0768, "step": 18750 }, { - "epoch": 1.3931382741719887, - "grad_norm": 1.070173740386963, - "learning_rate": 2.1641170354968066e-05, - "loss": 0.099, + "epoch": 2.7862765483439773, + "grad_norm": 0.7869969606399536, + "learning_rate": 7.2137234516560225e-06, + "loss": 0.0823, "step": 18760 }, { - "epoch": 1.3938808851923363, - "grad_norm": 2.932997465133667, - "learning_rate": 2.1636714688845984e-05, - "loss": 0.0674, + "epoch": 2.7877617703846727, + "grad_norm": 0.6413224935531616, + "learning_rate": 7.212238229615328e-06, + "loss": 0.0477, "step": 18770 }, { - "epoch": 1.3946234962126838, - "grad_norm": 0.7119723558425903, - "learning_rate": 2.16322590227239e-05, - "loss": 0.0768, + "epoch": 2.7892469924253676, + "grad_norm": 1.2393912076950073, + "learning_rate": 7.210753007574634e-06, + "loss": 0.0602, "step": 18780 }, { - "epoch": 1.3953661072330314, - "grad_norm": 2.5483529567718506, - "learning_rate": 2.162780335660181e-05, - "loss": 0.0427, + "epoch": 2.790732214466063, + "grad_norm": 0.6673420667648315, + "learning_rate": 7.2092677855339375e-06, + "loss": 0.06, "step": 18790 }, { - "epoch": 1.396108718253379, - "grad_norm": 1.3585890531539917, - "learning_rate": 2.162334769047973e-05, - "loss": 0.0429, + "epoch": 2.792217436506758, + "grad_norm": 1.4284379482269287, + "learning_rate": 7.207782563493243e-06, + "loss": 0.0562, "step": 18800 }, { - "epoch": 1.3968513292737263, - "grad_norm": 1.786074161529541, - "learning_rate": 2.161889202435764e-05, - "loss": 0.0965, + "epoch": 2.7937026585474527, + "grad_norm": 0.855707585811615, + "learning_rate": 7.206297341452547e-06, + "loss": 0.0704, "step": 18810 }, { - "epoch": 1.397593940294074, - "grad_norm": 2.01636004447937, - "learning_rate": 2.1614436358235556e-05, - "loss": 0.0469, + "epoch": 2.795187880588148, + "grad_norm": 1.2716715335845947, + "learning_rate": 7.2048121194118525e-06, + "loss": 0.0804, "step": 18820 }, { - "epoch": 1.3983365513144215, - "grad_norm": 1.713550090789795, - "learning_rate": 2.160998069211347e-05, - "loss": 0.0725, + "epoch": 2.796673102628843, + "grad_norm": 1.4810649156570435, + "learning_rate": 7.203326897371158e-06, + "loss": 0.0635, "step": 18830 }, { - "epoch": 1.3990791623347691, - "grad_norm": 4.37731409072876, - "learning_rate": 2.1605525025991386e-05, - "loss": 0.0788, + "epoch": 2.7981583246695383, + "grad_norm": 0.6314173340797424, + "learning_rate": 7.201841675330462e-06, + "loss": 0.0558, "step": 18840 }, { - "epoch": 1.3998217733551166, - "grad_norm": 2.8098561763763428, - "learning_rate": 2.16010693598693e-05, - "loss": 0.0677, + "epoch": 2.799643546710233, + "grad_norm": 0.5451415181159973, + "learning_rate": 7.2003564532897675e-06, + "loss": 0.0534, "step": 18850 }, { - "epoch": 1.400564384375464, - "grad_norm": 2.595644235610962, - "learning_rate": 2.1596613693747216e-05, - "loss": 0.088, + "epoch": 2.801128768750928, + "grad_norm": 1.1898857355117798, + "learning_rate": 7.198871231249073e-06, + "loss": 0.0718, "step": 18860 }, { - "epoch": 1.4013069953958117, - "grad_norm": 2.6050455570220947, - "learning_rate": 2.159215802762513e-05, - "loss": 0.0683, + "epoch": 2.8026139907916234, + "grad_norm": 0.6038246750831604, + "learning_rate": 7.197386009208377e-06, + "loss": 0.0625, "step": 18870 }, { - "epoch": 1.4020496064161592, - "grad_norm": 1.2562687397003174, - "learning_rate": 2.1587702361503046e-05, - "loss": 0.0605, + "epoch": 2.8040992128323183, + "grad_norm": 1.0258615016937256, + "learning_rate": 7.1959007871676825e-06, + "loss": 0.0971, "step": 18880 }, { - "epoch": 1.4027922174365068, - "grad_norm": 0.9690125584602356, - "learning_rate": 2.158324669538096e-05, - "loss": 0.0506, + "epoch": 2.8055844348730137, + "grad_norm": 0.45901933312416077, + "learning_rate": 7.194415565126988e-06, + "loss": 0.041, "step": 18890 }, { - "epoch": 1.4035348284568543, - "grad_norm": 0.6721828579902649, - "learning_rate": 2.1578791029258876e-05, - "loss": 0.0553, + "epoch": 2.8070696569137086, + "grad_norm": 0.6708415746688843, + "learning_rate": 7.192930343086292e-06, + "loss": 0.0704, "step": 18900 }, { - "epoch": 1.4042774394772017, - "grad_norm": 0.4955576956272125, - "learning_rate": 2.157433536313679e-05, - "loss": 0.065, + "epoch": 2.8085548789544035, + "grad_norm": 0.7538895010948181, + "learning_rate": 7.1914451210455974e-06, + "loss": 0.0905, "step": 18910 }, { - "epoch": 1.4050200504975494, - "grad_norm": 2.1765050888061523, - "learning_rate": 2.1569879697014702e-05, - "loss": 0.0669, + "epoch": 2.810040100995099, + "grad_norm": 1.0020769834518433, + "learning_rate": 7.189959899004901e-06, + "loss": 0.0682, "step": 18920 }, { - "epoch": 1.4057626615178969, - "grad_norm": 1.8637815713882446, - "learning_rate": 2.1565424030892617e-05, - "loss": 0.0629, + "epoch": 2.8115253230357937, + "grad_norm": 0.47720983624458313, + "learning_rate": 7.188474676964206e-06, + "loss": 0.0661, "step": 18930 }, { - "epoch": 1.4065052725382445, - "grad_norm": 2.6646058559417725, - "learning_rate": 2.1560968364770536e-05, - "loss": 0.0729, + "epoch": 2.813010545076489, + "grad_norm": 0.21680277585983276, + "learning_rate": 7.186989454923512e-06, + "loss": 0.0711, "step": 18940 }, { - "epoch": 1.407247883558592, - "grad_norm": 1.4432345628738403, - "learning_rate": 2.1556512698648447e-05, - "loss": 0.0601, + "epoch": 2.814495767117184, + "grad_norm": 0.8336321711540222, + "learning_rate": 7.185504232882816e-06, + "loss": 0.055, "step": 18950 }, { - "epoch": 1.4079904945789394, - "grad_norm": 0.9706814885139465, - "learning_rate": 2.1552057032526362e-05, - "loss": 0.0596, + "epoch": 2.815980989157879, + "grad_norm": 0.4809732735157013, + "learning_rate": 7.184019010842121e-06, + "loss": 0.0636, "step": 18960 }, { - "epoch": 1.408733105599287, - "grad_norm": 1.4810203313827515, - "learning_rate": 2.154760136640428e-05, - "loss": 0.0816, + "epoch": 2.817466211198574, + "grad_norm": 0.7873929738998413, + "learning_rate": 7.182533788801427e-06, + "loss": 0.0749, "step": 18970 }, { - "epoch": 1.4094757166196346, - "grad_norm": 0.5370448231697083, - "learning_rate": 2.1543145700282192e-05, - "loss": 0.0685, + "epoch": 2.818951433239269, + "grad_norm": 1.0418553352355957, + "learning_rate": 7.181048566760731e-06, + "loss": 0.0742, "step": 18980 }, { - "epoch": 1.4102183276399822, - "grad_norm": 1.4338277578353882, - "learning_rate": 2.1538690034160107e-05, - "loss": 0.0821, + "epoch": 2.8204366552799645, + "grad_norm": 0.7662057280540466, + "learning_rate": 7.179563344720036e-06, + "loss": 0.0733, "step": 18990 }, { - "epoch": 1.4109609386603297, - "grad_norm": 1.7360191345214844, - "learning_rate": 2.1534234368038022e-05, - "loss": 0.0853, + "epoch": 2.8219218773206594, + "grad_norm": 1.117795705795288, + "learning_rate": 7.178078122679342e-06, + "loss": 0.068, "step": 19000 }, { - "epoch": 1.4117035496806771, - "grad_norm": 1.8380330801010132, - "learning_rate": 2.1529778701915937e-05, - "loss": 0.0599, + "epoch": 2.8234070993613543, + "grad_norm": 0.7875536680221558, + "learning_rate": 7.176592900638646e-06, + "loss": 0.0709, "step": 19010 }, { - "epoch": 1.4124461607010248, - "grad_norm": 0.9603464007377625, - "learning_rate": 2.1525323035793852e-05, - "loss": 0.0783, + "epoch": 2.8248923214020496, + "grad_norm": 0.7272695302963257, + "learning_rate": 7.175107678597951e-06, + "loss": 0.0655, "step": 19020 }, { - "epoch": 1.4131887717213725, - "grad_norm": 1.198040246963501, - "learning_rate": 2.1520867369671764e-05, - "loss": 0.0801, + "epoch": 2.826377543442745, + "grad_norm": 0.6126848459243774, + "learning_rate": 7.173622456557256e-06, + "loss": 0.0745, "step": 19030 }, { - "epoch": 1.41393138274172, - "grad_norm": 2.3278064727783203, - "learning_rate": 2.1516411703549682e-05, - "loss": 0.0785, + "epoch": 2.82786276548344, + "grad_norm": 0.6373000741004944, + "learning_rate": 7.172137234516561e-06, + "loss": 0.0835, "step": 19040 }, { - "epoch": 1.4146739937620674, - "grad_norm": 2.149242877960205, - "learning_rate": 2.1511956037427597e-05, - "loss": 0.0699, + "epoch": 2.8293479875241347, + "grad_norm": 0.6726350784301758, + "learning_rate": 7.170652012475866e-06, + "loss": 0.0589, "step": 19050 }, { - "epoch": 1.415416604782415, - "grad_norm": 1.5481926202774048, - "learning_rate": 2.150750037130551e-05, - "loss": 0.0785, + "epoch": 2.83083320956483, + "grad_norm": 0.8001404404640198, + "learning_rate": 7.16916679043517e-06, + "loss": 0.0541, "step": 19060 }, { - "epoch": 1.4161592158027625, - "grad_norm": 1.473336100578308, - "learning_rate": 2.1503044705183427e-05, - "loss": 0.1051, + "epoch": 2.832318431605525, + "grad_norm": 0.4326551556587219, + "learning_rate": 7.167681568394476e-06, + "loss": 0.05, "step": 19070 }, { - "epoch": 1.4169018268231102, - "grad_norm": 1.15213143825531, - "learning_rate": 2.1498589039061342e-05, - "loss": 0.08, + "epoch": 2.8338036536462203, + "grad_norm": 0.7540894150733948, + "learning_rate": 7.166196346353781e-06, + "loss": 0.0643, "step": 19080 }, { - "epoch": 1.4176444378434576, - "grad_norm": 1.5161607265472412, - "learning_rate": 2.1494133372939254e-05, - "loss": 0.08, + "epoch": 2.8352888756869152, + "grad_norm": 1.2359886169433594, + "learning_rate": 7.164711124313085e-06, + "loss": 0.0773, "step": 19090 }, { - "epoch": 1.418387048863805, - "grad_norm": 1.6533968448638916, - "learning_rate": 2.148967770681717e-05, - "loss": 0.0633, + "epoch": 2.83677409772761, + "grad_norm": 0.692885160446167, + "learning_rate": 7.16322590227239e-06, + "loss": 0.0625, "step": 19100 }, { - "epoch": 1.4191296598841527, - "grad_norm": 3.5496084690093994, - "learning_rate": 2.1485222040695087e-05, - "loss": 0.0849, + "epoch": 2.8382593197683055, + "grad_norm": 1.0626580715179443, + "learning_rate": 7.1617406802316954e-06, + "loss": 0.05, "step": 19110 }, { - "epoch": 1.4198722709045002, - "grad_norm": 0.9919148683547974, - "learning_rate": 2.1480766374573e-05, - "loss": 0.0349, + "epoch": 2.8397445418090004, + "grad_norm": 0.8324934840202332, + "learning_rate": 7.160255458191e-06, + "loss": 0.0746, "step": 19120 }, { - "epoch": 1.4206148819248479, - "grad_norm": 1.724232792854309, - "learning_rate": 2.1476310708450914e-05, - "loss": 0.0672, + "epoch": 2.8412297638496957, + "grad_norm": 0.3550914525985718, + "learning_rate": 7.158770236150305e-06, + "loss": 0.0829, "step": 19130 }, { - "epoch": 1.4213574929451953, - "grad_norm": 2.5495779514312744, - "learning_rate": 2.147185504232883e-05, - "loss": 0.0786, + "epoch": 2.8427149858903906, + "grad_norm": 0.6439893841743469, + "learning_rate": 7.15728501410961e-06, + "loss": 0.0676, "step": 19140 }, { - "epoch": 1.4221001039655428, - "grad_norm": 1.9082344770431519, - "learning_rate": 2.1467399376206744e-05, - "loss": 0.0668, + "epoch": 2.8442002079310855, + "grad_norm": 1.1629294157028198, + "learning_rate": 7.155799792068915e-06, + "loss": 0.0656, "step": 19150 }, { - "epoch": 1.4228427149858904, - "grad_norm": 0.5212798714637756, - "learning_rate": 2.146294371008466e-05, - "loss": 0.059, + "epoch": 2.845685429971781, + "grad_norm": 1.1239734888076782, + "learning_rate": 7.15431457002822e-06, + "loss": 0.0821, "step": 19160 }, { - "epoch": 1.4235853260062379, - "grad_norm": 1.6344729661941528, - "learning_rate": 2.145848804396257e-05, - "loss": 0.0821, + "epoch": 2.8471706520124758, + "grad_norm": 0.692963719367981, + "learning_rate": 7.1528293479875246e-06, + "loss": 0.0694, "step": 19170 }, { - "epoch": 1.4243279370265856, - "grad_norm": 1.4449615478515625, - "learning_rate": 2.145403237784049e-05, - "loss": 0.0597, + "epoch": 2.848655874053171, + "grad_norm": 0.7630389928817749, + "learning_rate": 7.15134412594683e-06, + "loss": 0.0664, "step": 19180 }, { - "epoch": 1.425070548046933, - "grad_norm": 1.3735612630844116, - "learning_rate": 2.1449576711718404e-05, - "loss": 0.049, + "epoch": 2.850141096093866, + "grad_norm": 0.7898661494255066, + "learning_rate": 7.149858903906135e-06, + "loss": 0.0778, "step": 19190 }, { - "epoch": 1.4258131590672805, - "grad_norm": 1.045433521270752, - "learning_rate": 2.1445121045596315e-05, - "loss": 0.079, + "epoch": 2.851626318134561, + "grad_norm": 1.2775942087173462, + "learning_rate": 7.1483736818654396e-06, + "loss": 0.0733, "step": 19200 }, { - "epoch": 1.4265557700876281, - "grad_norm": 0.6579065918922424, - "learning_rate": 2.1440665379474234e-05, - "loss": 0.0651, + "epoch": 2.8531115401752563, + "grad_norm": 1.9423375129699707, + "learning_rate": 7.146888459824745e-06, + "loss": 0.0681, "step": 19210 }, { - "epoch": 1.4272983811079756, - "grad_norm": 2.254539728164673, - "learning_rate": 2.1436209713352145e-05, - "loss": 0.0883, + "epoch": 2.854596762215951, + "grad_norm": 0.9860016703605652, + "learning_rate": 7.1454032377840484e-06, + "loss": 0.0859, "step": 19220 }, { - "epoch": 1.4280409921283232, - "grad_norm": 0.9771292209625244, - "learning_rate": 2.143175404723006e-05, - "loss": 0.0561, + "epoch": 2.8560819842566465, + "grad_norm": 0.6084530353546143, + "learning_rate": 7.143918015743354e-06, + "loss": 0.0599, "step": 19230 }, { - "epoch": 1.4287836031486707, - "grad_norm": 1.2068768739700317, - "learning_rate": 2.142729838110798e-05, - "loss": 0.0761, + "epoch": 2.8575672062973414, + "grad_norm": 1.0293453931808472, + "learning_rate": 7.142432793702659e-06, + "loss": 0.0628, "step": 19240 }, { - "epoch": 1.4295262141690182, - "grad_norm": 2.0585765838623047, - "learning_rate": 2.142284271498589e-05, - "loss": 0.0978, + "epoch": 2.8590524283380363, + "grad_norm": 0.9693927764892578, + "learning_rate": 7.1409475716619634e-06, + "loss": 0.0749, "step": 19250 }, { - "epoch": 1.4302688251893658, - "grad_norm": 1.229931354522705, - "learning_rate": 2.1418387048863805e-05, - "loss": 0.0664, + "epoch": 2.8605376503787316, + "grad_norm": 0.5707103610038757, + "learning_rate": 7.139462349621269e-06, + "loss": 0.0656, "step": 19260 }, { - "epoch": 1.4310114362097133, - "grad_norm": 1.6777613162994385, - "learning_rate": 2.141393138274172e-05, - "loss": 0.0979, + "epoch": 2.8620228724194265, + "grad_norm": 0.5898536443710327, + "learning_rate": 7.137977127580574e-06, + "loss": 0.0452, "step": 19270 }, { - "epoch": 1.431754047230061, - "grad_norm": 1.9495104551315308, - "learning_rate": 2.1409475716619635e-05, - "loss": 0.0684, + "epoch": 2.863508094460122, + "grad_norm": 0.3954699635505676, + "learning_rate": 7.136491905539878e-06, + "loss": 0.0623, "step": 19280 }, { - "epoch": 1.4324966582504084, - "grad_norm": 2.273432493209839, - "learning_rate": 2.140502005049755e-05, - "loss": 0.0975, + "epoch": 2.864993316500817, + "grad_norm": 1.130014419555664, + "learning_rate": 7.135006683499184e-06, + "loss": 0.0673, "step": 19290 }, { - "epoch": 1.4332392692707558, - "grad_norm": 1.896386742591858, - "learning_rate": 2.1400564384375465e-05, - "loss": 0.0974, + "epoch": 2.8664785385415117, + "grad_norm": 0.5030196905136108, + "learning_rate": 7.133521461458489e-06, + "loss": 0.0747, "step": 19300 }, { - "epoch": 1.4339818802911035, - "grad_norm": 0.9202299118041992, - "learning_rate": 2.139610871825338e-05, - "loss": 0.0809, + "epoch": 2.867963760582207, + "grad_norm": 1.10323166847229, + "learning_rate": 7.132036239417793e-06, + "loss": 0.0867, "step": 19310 }, { - "epoch": 1.4347244913114512, - "grad_norm": 1.1943680047988892, - "learning_rate": 2.1391653052131295e-05, - "loss": 0.0808, + "epoch": 2.8694489826229024, + "grad_norm": 0.868181586265564, + "learning_rate": 7.130551017377099e-06, + "loss": 0.0642, "step": 19320 }, { - "epoch": 1.4354671023317986, - "grad_norm": 2.0343806743621826, - "learning_rate": 2.1387197386009207e-05, - "loss": 0.072, + "epoch": 2.8709342046635973, + "grad_norm": 0.4541347324848175, + "learning_rate": 7.129065795336403e-06, + "loss": 0.0708, "step": 19330 }, { - "epoch": 1.436209713352146, - "grad_norm": 3.626370668411255, - "learning_rate": 2.1382741719887122e-05, - "loss": 0.0568, + "epoch": 2.872419426704292, + "grad_norm": 0.479602187871933, + "learning_rate": 7.127580573295708e-06, + "loss": 0.0578, "step": 19340 }, { - "epoch": 1.4369523243724938, - "grad_norm": 5.546724796295166, - "learning_rate": 2.137828605376504e-05, - "loss": 0.0625, + "epoch": 2.8739046487449875, + "grad_norm": 0.6411767601966858, + "learning_rate": 7.126095351255014e-06, + "loss": 0.0558, "step": 19350 }, { - "epoch": 1.4376949353928412, - "grad_norm": 1.03886878490448, - "learning_rate": 2.1373830387642952e-05, - "loss": 0.0897, + "epoch": 2.8753898707856824, + "grad_norm": 0.45273324847221375, + "learning_rate": 7.124610129214318e-06, + "loss": 0.0778, "step": 19360 }, { - "epoch": 1.4384375464131889, - "grad_norm": 2.2745583057403564, - "learning_rate": 2.1369374721520867e-05, - "loss": 0.0781, + "epoch": 2.8768750928263778, + "grad_norm": 0.6530646085739136, + "learning_rate": 7.123124907173623e-06, + "loss": 0.0693, "step": 19370 }, { - "epoch": 1.4391801574335363, - "grad_norm": 2.1215226650238037, - "learning_rate": 2.1364919055398785e-05, - "loss": 0.0764, + "epoch": 2.8783603148670727, + "grad_norm": 0.6566230654716492, + "learning_rate": 7.121639685132929e-06, + "loss": 0.0648, "step": 19380 }, { - "epoch": 1.4399227684538838, - "grad_norm": 1.7415093183517456, - "learning_rate": 2.1360463389276697e-05, - "loss": 0.0959, + "epoch": 2.8798455369077676, + "grad_norm": 1.1032828092575073, + "learning_rate": 7.120154463092232e-06, + "loss": 0.0647, "step": 19390 }, { - "epoch": 1.4406653794742315, - "grad_norm": 4.175904750823975, - "learning_rate": 2.1356007723154612e-05, - "loss": 0.0898, + "epoch": 2.881330758948463, + "grad_norm": 0.5805070400238037, + "learning_rate": 7.1186692410515376e-06, + "loss": 0.0836, "step": 19400 }, { - "epoch": 1.441407990494579, - "grad_norm": 1.0260313749313354, - "learning_rate": 2.1351552057032527e-05, - "loss": 0.1057, + "epoch": 2.882815980989158, + "grad_norm": 0.7455107569694519, + "learning_rate": 7.117184019010843e-06, + "loss": 0.0589, "step": 19410 }, { - "epoch": 1.4421506015149266, - "grad_norm": 1.1393053531646729, - "learning_rate": 2.1347096390910442e-05, - "loss": 0.0632, + "epoch": 2.884301203029853, + "grad_norm": 0.6039009690284729, + "learning_rate": 7.115698796970147e-06, + "loss": 0.0623, "step": 19420 }, { - "epoch": 1.442893212535274, - "grad_norm": 0.7006543278694153, - "learning_rate": 2.1342640724788357e-05, - "loss": 0.0601, + "epoch": 2.885786425070548, + "grad_norm": 0.39744484424591064, + "learning_rate": 7.1142135749294525e-06, + "loss": 0.0587, "step": 19430 }, { - "epoch": 1.4436358235556215, - "grad_norm": 1.356958270072937, - "learning_rate": 2.133818505866627e-05, - "loss": 0.0522, + "epoch": 2.887271647111243, + "grad_norm": 0.4915350377559662, + "learning_rate": 7.112728352888757e-06, + "loss": 0.0667, "step": 19440 }, { - "epoch": 1.4443784345759692, - "grad_norm": 0.9803171753883362, - "learning_rate": 2.1333729392544187e-05, - "loss": 0.0593, + "epoch": 2.8887568691519383, + "grad_norm": 2.344184637069702, + "learning_rate": 7.111243130848062e-06, + "loss": 0.0832, "step": 19450 }, { - "epoch": 1.4451210455963166, - "grad_norm": 0.602212131023407, - "learning_rate": 2.1329273726422102e-05, - "loss": 0.0412, + "epoch": 2.890242091192633, + "grad_norm": 0.5884201526641846, + "learning_rate": 7.1097579088073675e-06, + "loss": 0.0669, "step": 19460 }, { - "epoch": 1.4458636566166643, - "grad_norm": 2.612487554550171, - "learning_rate": 2.1324818060300013e-05, - "loss": 0.1007, + "epoch": 2.8917273132333285, + "grad_norm": 0.6957660913467407, + "learning_rate": 7.108272686766672e-06, + "loss": 0.0678, "step": 19470 }, { - "epoch": 1.4466062676370117, - "grad_norm": 2.2010037899017334, - "learning_rate": 2.1320362394177932e-05, - "loss": 0.0845, + "epoch": 2.8932125352740234, + "grad_norm": 0.752271294593811, + "learning_rate": 7.106787464725977e-06, + "loss": 0.0565, "step": 19480 }, { - "epoch": 1.4473488786573592, - "grad_norm": 1.3629838228225708, - "learning_rate": 2.1315906728055847e-05, - "loss": 0.0794, + "epoch": 2.8946977573147183, + "grad_norm": 0.8159077167510986, + "learning_rate": 7.1053022426852825e-06, + "loss": 0.0651, "step": 19490 }, { - "epoch": 1.4480914896777068, - "grad_norm": 1.0975171327590942, - "learning_rate": 2.131145106193376e-05, - "loss": 0.0656, + "epoch": 2.8961829793554137, + "grad_norm": 0.9899120926856995, + "learning_rate": 7.103817020644587e-06, + "loss": 0.0457, "step": 19500 }, { - "epoch": 1.4488341006980543, - "grad_norm": 1.8527283668518066, - "learning_rate": 2.1306995395811673e-05, - "loss": 0.0724, + "epoch": 2.8976682013961086, + "grad_norm": 0.5578276515007019, + "learning_rate": 7.102331798603892e-06, + "loss": 0.0737, "step": 19510 }, { - "epoch": 1.449576711718402, - "grad_norm": 1.6812669038772583, - "learning_rate": 2.1302539729689592e-05, - "loss": 0.0959, + "epoch": 2.899153423436804, + "grad_norm": 1.1280609369277954, + "learning_rate": 7.1008465765631975e-06, + "loss": 0.0853, "step": 19520 }, { - "epoch": 1.4503193227387494, - "grad_norm": 1.0494896173477173, - "learning_rate": 2.1298084063567503e-05, - "loss": 0.0544, + "epoch": 2.900638645477499, + "grad_norm": 1.308525562286377, + "learning_rate": 7.099361354522501e-06, + "loss": 0.0582, "step": 19530 }, { - "epoch": 1.4510619337590969, - "grad_norm": 0.5817059874534607, - "learning_rate": 2.129362839744542e-05, - "loss": 0.0808, + "epoch": 2.9021238675181937, + "grad_norm": 0.870124340057373, + "learning_rate": 7.097876132481807e-06, + "loss": 0.062, "step": 19540 }, { - "epoch": 1.4518045447794445, - "grad_norm": 1.512558102607727, - "learning_rate": 2.1289172731323333e-05, - "loss": 0.0952, + "epoch": 2.903609089558889, + "grad_norm": 0.7574545741081238, + "learning_rate": 7.096390910441111e-06, + "loss": 0.057, "step": 19550 }, { - "epoch": 1.452547155799792, - "grad_norm": 4.8917694091796875, - "learning_rate": 2.128471706520125e-05, - "loss": 0.0987, + "epoch": 2.905094311599584, + "grad_norm": 1.3428432941436768, + "learning_rate": 7.094905688400416e-06, + "loss": 0.0614, "step": 19560 }, { - "epoch": 1.4532897668201397, - "grad_norm": 2.3295490741729736, - "learning_rate": 2.1280261399079163e-05, - "loss": 0.0872, + "epoch": 2.9065795336402793, + "grad_norm": 0.890991747379303, + "learning_rate": 7.093420466359721e-06, + "loss": 0.0814, "step": 19570 }, { - "epoch": 1.4540323778404871, - "grad_norm": 1.3101624250411987, - "learning_rate": 2.1275805732957075e-05, - "loss": 0.0484, + "epoch": 2.9080647556809742, + "grad_norm": 1.0695637464523315, + "learning_rate": 7.091935244319026e-06, + "loss": 0.0692, "step": 19580 }, { - "epoch": 1.4547749888608346, - "grad_norm": 3.150947332382202, - "learning_rate": 2.1271350066834993e-05, - "loss": 0.0876, + "epoch": 2.909549977721669, + "grad_norm": 0.8482929468154907, + "learning_rate": 7.090450022278331e-06, + "loss": 0.0635, "step": 19590 }, { - "epoch": 1.4555175998811822, - "grad_norm": 1.6120469570159912, - "learning_rate": 2.1266894400712908e-05, - "loss": 0.0806, + "epoch": 2.9110351997623645, + "grad_norm": 0.8700017333030701, + "learning_rate": 7.088964800237636e-06, + "loss": 0.075, "step": 19600 }, { - "epoch": 1.45626021090153, - "grad_norm": 1.210063099861145, - "learning_rate": 2.126243873459082e-05, - "loss": 0.0542, + "epoch": 2.91252042180306, + "grad_norm": 1.818686842918396, + "learning_rate": 7.087479578196941e-06, + "loss": 0.0619, "step": 19610 }, { - "epoch": 1.4570028219218774, - "grad_norm": 2.553395986557007, - "learning_rate": 2.1257983068468738e-05, - "loss": 0.0743, + "epoch": 2.9140056438437547, + "grad_norm": 1.458304762840271, + "learning_rate": 7.085994356156246e-06, + "loss": 0.0644, "step": 19620 }, { - "epoch": 1.4577454329422248, - "grad_norm": 2.5680768489837646, - "learning_rate": 2.125352740234665e-05, - "loss": 0.0764, + "epoch": 2.9154908658844496, + "grad_norm": 0.509807825088501, + "learning_rate": 7.0845091341155505e-06, + "loss": 0.071, "step": 19630 }, { - "epoch": 1.4584880439625725, - "grad_norm": 1.1068662405014038, - "learning_rate": 2.1249071736224565e-05, - "loss": 0.0778, + "epoch": 2.916976087925145, + "grad_norm": 1.3976842164993286, + "learning_rate": 7.083023912074856e-06, + "loss": 0.0732, "step": 19640 }, { - "epoch": 1.45923065498292, - "grad_norm": 2.1891543865203857, - "learning_rate": 2.1244616070102483e-05, - "loss": 0.0654, + "epoch": 2.91846130996584, + "grad_norm": 0.8412267565727234, + "learning_rate": 7.081538690034161e-06, + "loss": 0.051, "step": 19650 }, { - "epoch": 1.4599732660032676, - "grad_norm": 1.6965099573135376, - "learning_rate": 2.1240160403980395e-05, - "loss": 0.0634, + "epoch": 2.919946532006535, + "grad_norm": 0.7144739627838135, + "learning_rate": 7.0800534679934655e-06, + "loss": 0.0491, "step": 19660 }, { - "epoch": 1.460715877023615, - "grad_norm": 2.3436694145202637, - "learning_rate": 2.123570473785831e-05, - "loss": 0.098, + "epoch": 2.92143175404723, + "grad_norm": 1.5335413217544556, + "learning_rate": 7.078568245952771e-06, + "loss": 0.0607, "step": 19670 }, { - "epoch": 1.4614584880439625, - "grad_norm": 0.8156054615974426, - "learning_rate": 2.1231249071736225e-05, - "loss": 0.0676, + "epoch": 2.922916976087925, + "grad_norm": 0.42868825793266296, + "learning_rate": 7.077083023912076e-06, + "loss": 0.0714, "step": 19680 }, { - "epoch": 1.4622010990643102, - "grad_norm": 1.2002021074295044, - "learning_rate": 2.122679340561414e-05, - "loss": 0.082, + "epoch": 2.9244021981286203, + "grad_norm": 0.5871951580047607, + "learning_rate": 7.07559780187138e-06, + "loss": 0.063, "step": 19690 }, { - "epoch": 1.4629437100846576, - "grad_norm": 1.8691362142562866, - "learning_rate": 2.1222337739492055e-05, - "loss": 0.0487, + "epoch": 2.9258874201693152, + "grad_norm": 1.0395044088363647, + "learning_rate": 7.074112579830685e-06, + "loss": 0.074, "step": 19700 }, { - "epoch": 1.4636863211050053, - "grad_norm": 2.66479229927063, - "learning_rate": 2.121788207336997e-05, - "loss": 0.0784, + "epoch": 2.9273726422100106, + "grad_norm": 0.9330030679702759, + "learning_rate": 7.07262735778999e-06, + "loss": 0.0591, "step": 19710 }, { - "epoch": 1.4644289321253527, - "grad_norm": 1.1679737567901611, - "learning_rate": 2.1213426407247885e-05, - "loss": 0.0645, + "epoch": 2.9288578642507055, + "grad_norm": 0.6595268845558167, + "learning_rate": 7.071142135749295e-06, + "loss": 0.059, "step": 19720 }, { - "epoch": 1.4651715431457002, - "grad_norm": 1.454779028892517, - "learning_rate": 2.12089707411258e-05, - "loss": 0.0739, + "epoch": 2.9303430862914004, + "grad_norm": 0.7226126194000244, + "learning_rate": 7.0696569137086e-06, + "loss": 0.0627, "step": 19730 }, { - "epoch": 1.4659141541660479, - "grad_norm": 2.86212420463562, - "learning_rate": 2.120451507500371e-05, - "loss": 0.0479, + "epoch": 2.9318283083320957, + "grad_norm": 0.6599835753440857, + "learning_rate": 7.068171691667904e-06, + "loss": 0.0657, "step": 19740 }, { - "epoch": 1.4666567651863953, - "grad_norm": 1.7495118379592896, - "learning_rate": 2.1200059408881626e-05, - "loss": 0.0796, + "epoch": 2.9333135303727906, + "grad_norm": 1.201311469078064, + "learning_rate": 7.06668646962721e-06, + "loss": 0.0639, "step": 19750 }, { - "epoch": 1.467399376206743, - "grad_norm": 1.2656298875808716, - "learning_rate": 2.1195603742759545e-05, - "loss": 0.079, + "epoch": 2.934798752413486, + "grad_norm": 0.5775285959243774, + "learning_rate": 7.065201247586515e-06, + "loss": 0.0832, "step": 19760 }, { - "epoch": 1.4681419872270904, - "grad_norm": 0.7113642692565918, - "learning_rate": 2.1191148076637456e-05, - "loss": 0.0707, + "epoch": 2.936283974454181, + "grad_norm": 0.5414377450942993, + "learning_rate": 7.063716025545819e-06, + "loss": 0.0557, "step": 19770 }, { - "epoch": 1.468884598247438, - "grad_norm": 0.5482021570205688, - "learning_rate": 2.118669241051537e-05, - "loss": 0.0678, + "epoch": 2.937769196494876, + "grad_norm": 0.9754282236099243, + "learning_rate": 7.062230803505125e-06, + "loss": 0.0642, "step": 19780 }, { - "epoch": 1.4696272092677856, - "grad_norm": 2.1381452083587646, - "learning_rate": 2.118223674439329e-05, - "loss": 0.0847, + "epoch": 2.939254418535571, + "grad_norm": 0.7484234571456909, + "learning_rate": 7.06074558146443e-06, + "loss": 0.0688, "step": 19790 }, { - "epoch": 1.470369820288133, - "grad_norm": 1.8469703197479248, - "learning_rate": 2.11777810782712e-05, - "loss": 0.0654, + "epoch": 2.940739640576266, + "grad_norm": 0.8926592469215393, + "learning_rate": 7.059260359423734e-06, + "loss": 0.079, "step": 19800 }, { - "epoch": 1.4711124313084807, - "grad_norm": 1.900571346282959, - "learning_rate": 2.1173325412149116e-05, - "loss": 0.05, + "epoch": 2.9422248626169614, + "grad_norm": 0.9294021129608154, + "learning_rate": 7.05777513738304e-06, + "loss": 0.064, "step": 19810 }, { - "epoch": 1.4718550423288281, - "grad_norm": 0.9532872438430786, - "learning_rate": 2.116886974602703e-05, - "loss": 0.0535, + "epoch": 2.9437100846576563, + "grad_norm": 0.4306425452232361, + "learning_rate": 7.056289915342345e-06, + "loss": 0.0875, "step": 19820 }, { - "epoch": 1.4725976533491756, - "grad_norm": 1.2694770097732544, - "learning_rate": 2.1164414079904946e-05, - "loss": 0.0907, + "epoch": 2.945195306698351, + "grad_norm": 0.8793667554855347, + "learning_rate": 7.054804693301649e-06, + "loss": 0.0745, "step": 19830 }, { - "epoch": 1.4733402643695233, - "grad_norm": 0.9396808743476868, - "learning_rate": 2.115995841378286e-05, - "loss": 0.0592, + "epoch": 2.9466805287390465, + "grad_norm": 1.5072189569473267, + "learning_rate": 7.053319471260955e-06, + "loss": 0.0922, "step": 19840 }, { - "epoch": 1.4740828753898707, - "grad_norm": 1.6375855207443237, - "learning_rate": 2.1155502747660773e-05, - "loss": 0.0707, + "epoch": 2.9481657507797414, + "grad_norm": 0.5707129836082458, + "learning_rate": 7.051834249220258e-06, + "loss": 0.0642, "step": 19850 }, { - "epoch": 1.4748254864102184, - "grad_norm": 2.0587351322174072, - "learning_rate": 2.115104708153869e-05, - "loss": 0.084, + "epoch": 2.9496509728204368, + "grad_norm": 1.5201908349990845, + "learning_rate": 7.0503490271795635e-06, + "loss": 0.0679, "step": 19860 }, { - "epoch": 1.4755680974305658, - "grad_norm": 0.8906083106994629, - "learning_rate": 2.1146591415416606e-05, - "loss": 0.078, + "epoch": 2.9511361948611317, + "grad_norm": 0.6788731217384338, + "learning_rate": 7.048863805138869e-06, + "loss": 0.0794, "step": 19870 }, { - "epoch": 1.4763107084509133, - "grad_norm": 0.7745434045791626, - "learning_rate": 2.1142135749294518e-05, - "loss": 0.0601, + "epoch": 2.9526214169018266, + "grad_norm": 0.6304814219474792, + "learning_rate": 7.047378583098173e-06, + "loss": 0.0579, "step": 19880 }, { - "epoch": 1.477053319471261, - "grad_norm": 1.4954042434692383, - "learning_rate": 2.1137680083172436e-05, - "loss": 0.0797, + "epoch": 2.954106638942522, + "grad_norm": 0.5509234666824341, + "learning_rate": 7.0458933610574785e-06, + "loss": 0.0608, "step": 19890 }, { - "epoch": 1.4777959304916086, - "grad_norm": 1.5005775690078735, - "learning_rate": 2.113322441705035e-05, - "loss": 0.0563, + "epoch": 2.9555918609832172, + "grad_norm": 0.6580905914306641, + "learning_rate": 7.044408139016784e-06, + "loss": 0.0758, "step": 19900 }, { - "epoch": 1.478538541511956, - "grad_norm": 1.535308837890625, - "learning_rate": 2.1128768750928263e-05, - "loss": 0.0555, + "epoch": 2.957077083023912, + "grad_norm": 1.1142622232437134, + "learning_rate": 7.042922916976088e-06, + "loss": 0.0556, "step": 19910 }, { - "epoch": 1.4792811525323035, - "grad_norm": 1.6805989742279053, - "learning_rate": 2.1124313084806178e-05, - "loss": 0.0413, + "epoch": 2.958562305064607, + "grad_norm": 0.4926334023475647, + "learning_rate": 7.0414376949353935e-06, + "loss": 0.0578, "step": 19920 }, { - "epoch": 1.4800237635526512, - "grad_norm": 1.622406244277954, - "learning_rate": 2.1119857418684096e-05, - "loss": 0.0634, + "epoch": 2.9600475271053024, + "grad_norm": 0.603307843208313, + "learning_rate": 7.039952472894699e-06, + "loss": 0.0609, "step": 19930 }, { - "epoch": 1.4807663745729986, - "grad_norm": 1.6346774101257324, - "learning_rate": 2.1115401752562008e-05, - "loss": 0.0793, + "epoch": 2.9615327491459973, + "grad_norm": 1.4161272048950195, + "learning_rate": 7.038467250854003e-06, + "loss": 0.0848, "step": 19940 }, { - "epoch": 1.4815089855933463, - "grad_norm": 2.6986865997314453, - "learning_rate": 2.1110946086439923e-05, - "loss": 0.0689, + "epoch": 2.9630179711866926, + "grad_norm": 0.8850290179252625, + "learning_rate": 7.0369820288133085e-06, + "loss": 0.0722, "step": 19950 }, { - "epoch": 1.4822515966136938, - "grad_norm": 2.2464749813079834, - "learning_rate": 2.1106490420317838e-05, - "loss": 0.0736, + "epoch": 2.9645031932273875, + "grad_norm": 0.6632411479949951, + "learning_rate": 7.035496806772613e-06, + "loss": 0.0677, "step": 19960 }, { - "epoch": 1.4829942076340412, - "grad_norm": 0.5846890211105347, - "learning_rate": 2.1102034754195753e-05, - "loss": 0.055, + "epoch": 2.9659884152680824, + "grad_norm": 0.4602072536945343, + "learning_rate": 7.034011584731918e-06, + "loss": 0.0488, "step": 19970 }, { - "epoch": 1.483736818654389, - "grad_norm": 2.5977261066436768, - "learning_rate": 2.1097579088073668e-05, - "loss": 0.0637, + "epoch": 2.967473637308778, + "grad_norm": 0.9665570259094238, + "learning_rate": 7.0325263626912235e-06, + "loss": 0.0609, "step": 19980 }, { - "epoch": 1.4844794296747363, - "grad_norm": 2.5025405883789062, - "learning_rate": 2.109312342195158e-05, - "loss": 0.0687, + "epoch": 2.9689588593494727, + "grad_norm": 0.6160033941268921, + "learning_rate": 7.031041140650527e-06, + "loss": 0.0588, "step": 19990 }, { - "epoch": 1.485222040695084, - "grad_norm": 0.5519008040428162, - "learning_rate": 2.1088667755829498e-05, - "loss": 0.0369, + "epoch": 2.970444081390168, + "grad_norm": 0.8523101210594177, + "learning_rate": 7.029555918609832e-06, + "loss": 0.0786, "step": 20000 }, { - "epoch": 1.4859646517154315, - "grad_norm": 2.253185272216797, - "learning_rate": 2.1084212089707413e-05, - "loss": 0.0943, + "epoch": 2.971929303430863, + "grad_norm": 0.8637480139732361, + "learning_rate": 7.0280706965691385e-06, + "loss": 0.0539, "step": 20010 }, { - "epoch": 1.486707262735779, - "grad_norm": 1.709266185760498, - "learning_rate": 2.1079756423585325e-05, - "loss": 0.0721, + "epoch": 2.973414525471558, + "grad_norm": 1.4097833633422852, + "learning_rate": 7.026585474528442e-06, + "loss": 0.0854, "step": 20020 }, { - "epoch": 1.4874498737561266, - "grad_norm": 1.548275351524353, - "learning_rate": 2.1075300757463243e-05, - "loss": 0.0819, + "epoch": 2.974899747512253, + "grad_norm": 1.2111132144927979, + "learning_rate": 7.025100252487747e-06, + "loss": 0.0548, "step": 20030 }, { - "epoch": 1.488192484776474, - "grad_norm": 0.7985262274742126, - "learning_rate": 2.1070845091341158e-05, - "loss": 0.0646, + "epoch": 2.976384969552948, + "grad_norm": 0.8269698023796082, + "learning_rate": 7.023615030447053e-06, + "loss": 0.0945, "step": 20040 }, { - "epoch": 1.4889350957968217, - "grad_norm": 2.2757515907287598, - "learning_rate": 2.106638942521907e-05, - "loss": 0.0766, + "epoch": 2.9778701915936434, + "grad_norm": 0.7142363786697388, + "learning_rate": 7.022129808406357e-06, + "loss": 0.0738, "step": 20050 }, { - "epoch": 1.4896777068171692, - "grad_norm": 1.4162112474441528, - "learning_rate": 2.1061933759096988e-05, - "loss": 0.0765, + "epoch": 2.9793554136343383, + "grad_norm": 0.6995881199836731, + "learning_rate": 7.020644586365662e-06, + "loss": 0.0609, "step": 20060 }, { - "epoch": 1.4904203178375166, - "grad_norm": 2.37214994430542, - "learning_rate": 2.10574780929749e-05, - "loss": 0.0794, + "epoch": 2.980840635675033, + "grad_norm": 1.4468334913253784, + "learning_rate": 7.019159364324967e-06, + "loss": 0.0832, "step": 20070 }, { - "epoch": 1.4911629288578643, - "grad_norm": 2.4197685718536377, - "learning_rate": 2.1053022426852814e-05, - "loss": 0.1291, + "epoch": 2.9823258577157286, + "grad_norm": 0.9121166467666626, + "learning_rate": 7.017674142284272e-06, + "loss": 0.0681, "step": 20080 }, { - "epoch": 1.4919055398782117, - "grad_norm": 1.205228328704834, - "learning_rate": 2.104856676073073e-05, - "loss": 0.1016, + "epoch": 2.9838110797564235, + "grad_norm": 0.7170482873916626, + "learning_rate": 7.016188920243577e-06, + "loss": 0.0653, "step": 20090 }, { - "epoch": 1.4926481508985594, - "grad_norm": 0.3539555072784424, - "learning_rate": 2.1044111094608644e-05, - "loss": 0.0769, + "epoch": 2.985296301797119, + "grad_norm": 0.7121782302856445, + "learning_rate": 7.014703698202882e-06, + "loss": 0.0743, "step": 20100 }, { - "epoch": 1.4933907619189069, - "grad_norm": 0.9795430302619934, - "learning_rate": 2.103965542848656e-05, - "loss": 0.0789, + "epoch": 2.9867815238378137, + "grad_norm": 0.46788862347602844, + "learning_rate": 7.013218476162187e-06, + "loss": 0.0587, "step": 20110 }, { - "epoch": 1.4941333729392543, - "grad_norm": 2.009331703186035, - "learning_rate": 2.1035199762364474e-05, - "loss": 0.0557, + "epoch": 2.9882667458785086, + "grad_norm": 1.205629587173462, + "learning_rate": 7.011733254121492e-06, + "loss": 0.0547, "step": 20120 }, { - "epoch": 1.494875983959602, - "grad_norm": 2.3191285133361816, - "learning_rate": 2.103074409624239e-05, - "loss": 0.065, + "epoch": 2.989751967919204, + "grad_norm": 0.6308774352073669, + "learning_rate": 7.010248032080797e-06, + "loss": 0.0729, "step": 20130 }, { - "epoch": 1.4956185949799494, - "grad_norm": 2.4035158157348633, - "learning_rate": 2.1026288430120304e-05, - "loss": 0.0668, + "epoch": 2.991237189959899, + "grad_norm": 1.0978872776031494, + "learning_rate": 7.008762810040102e-06, + "loss": 0.0767, "step": 20140 }, { - "epoch": 1.496361206000297, - "grad_norm": 1.0998272895812988, - "learning_rate": 2.1021832763998216e-05, - "loss": 0.0536, + "epoch": 2.992722412000594, + "grad_norm": 0.6353699564933777, + "learning_rate": 7.007277587999406e-06, + "loss": 0.0645, "step": 20150 }, { - "epoch": 1.4971038170206445, - "grad_norm": 3.933292865753174, - "learning_rate": 2.101737709787613e-05, - "loss": 0.1027, + "epoch": 2.994207634041289, + "grad_norm": 0.7821415066719055, + "learning_rate": 7.005792365958711e-06, + "loss": 0.0642, "step": 20160 }, { - "epoch": 1.497846428040992, - "grad_norm": 0.8710070848464966, - "learning_rate": 2.101292143175405e-05, - "loss": 0.0521, + "epoch": 2.995692856081984, + "grad_norm": 0.493858277797699, + "learning_rate": 7.004307143918016e-06, + "loss": 0.0356, "step": 20170 }, { - "epoch": 1.4985890390613397, - "grad_norm": 3.407097816467285, - "learning_rate": 2.100846576563196e-05, - "loss": 0.0823, + "epoch": 2.9971780781226793, + "grad_norm": 0.9723030924797058, + "learning_rate": 7.002821921877321e-06, + "loss": 0.0715, "step": 20180 }, { - "epoch": 1.4993316500816873, - "grad_norm": 2.3524112701416016, - "learning_rate": 2.1004010099509876e-05, - "loss": 0.1098, + "epoch": 2.9986633001633747, + "grad_norm": 0.9995088577270508, + "learning_rate": 7.001336699836626e-06, + "loss": 0.0564, "step": 20190 }, { - "epoch": 1.5000742611020348, - "grad_norm": 2.8110392093658447, - "learning_rate": 2.0999554433387794e-05, - "loss": 0.0903, + "epoch": 3.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.0620584562420845, + "eval_runtime": 210.5838, + "eval_samples_per_second": 180.541, + "eval_steps_per_second": 5.646, + "step": 20199 + }, + { + "epoch": 3.0001485222040696, + "grad_norm": 0.7721754908561707, + "learning_rate": 6.999851477795931e-06, + "loss": 0.0561, "step": 20200 }, { - "epoch": 1.5008168721223822, - "grad_norm": 2.2008228302001953, - "learning_rate": 2.0995098767265706e-05, - "loss": 0.0546, + "epoch": 3.0016337442447645, + "grad_norm": 0.5628637075424194, + "learning_rate": 6.998366255755236e-06, + "loss": 0.0575, "step": 20210 }, { - "epoch": 1.5015594831427297, - "grad_norm": 0.38597363233566284, - "learning_rate": 2.099064310114362e-05, - "loss": 0.049, + "epoch": 3.00311896628546, + "grad_norm": 0.7381501793861389, + "learning_rate": 6.996881033714541e-06, + "loss": 0.0614, "step": 20220 }, { - "epoch": 1.5023020941630774, - "grad_norm": 1.819575309753418, - "learning_rate": 2.098618743502154e-05, - "loss": 0.0668, + "epoch": 3.0046041883261547, + "grad_norm": 0.8572022318840027, + "learning_rate": 6.995395811673846e-06, + "loss": 0.0706, "step": 20230 }, { - "epoch": 1.503044705183425, - "grad_norm": 1.7330279350280762, - "learning_rate": 2.098173176889945e-05, - "loss": 0.0746, + "epoch": 3.0060894103668496, + "grad_norm": 0.702950656414032, + "learning_rate": 6.993910589633151e-06, + "loss": 0.063, "step": 20240 }, { - "epoch": 1.5037873162037725, - "grad_norm": 1.4571242332458496, - "learning_rate": 2.0977276102777366e-05, - "loss": 0.0804, + "epoch": 3.007574632407545, + "grad_norm": 0.7224368453025818, + "learning_rate": 6.992425367592456e-06, + "loss": 0.078, "step": 20250 }, { - "epoch": 1.50452992722412, - "grad_norm": 0.8865562677383423, - "learning_rate": 2.0972820436655278e-05, - "loss": 0.08, + "epoch": 3.00905985444824, + "grad_norm": 0.8272649049758911, + "learning_rate": 6.99094014555176e-06, + "loss": 0.0787, "step": 20260 }, { - "epoch": 1.5052725382444674, - "grad_norm": 1.0869084596633911, - "learning_rate": 2.0968364770533196e-05, - "loss": 0.0526, + "epoch": 3.010545076488935, + "grad_norm": 1.782045841217041, + "learning_rate": 6.989454923511066e-06, + "loss": 0.0621, "step": 20270 }, { - "epoch": 1.506015149264815, - "grad_norm": 1.397093653678894, - "learning_rate": 2.096390910441111e-05, - "loss": 0.0899, + "epoch": 3.01203029852963, + "grad_norm": 0.8865758180618286, + "learning_rate": 6.987969701470371e-06, + "loss": 0.0582, "step": 20280 }, { - "epoch": 1.5067577602851627, - "grad_norm": 1.4997838735580444, - "learning_rate": 2.0959453438289023e-05, - "loss": 0.0929, + "epoch": 3.0135155205703255, + "grad_norm": 1.336371898651123, + "learning_rate": 6.9864844794296745e-06, + "loss": 0.0694, "step": 20290 }, { - "epoch": 1.5075003713055102, - "grad_norm": 2.05659556388855, - "learning_rate": 2.095499777216694e-05, - "loss": 0.0715, + "epoch": 3.0150007426110204, + "grad_norm": 0.6026767492294312, + "learning_rate": 6.984999257388981e-06, + "loss": 0.0563, "step": 20300 }, { - "epoch": 1.5082429823258576, - "grad_norm": 2.342632293701172, - "learning_rate": 2.0950542106044856e-05, - "loss": 0.0825, + "epoch": 3.0164859646517153, + "grad_norm": 0.6530321836471558, + "learning_rate": 6.983514035348286e-06, + "loss": 0.0826, "step": 20310 }, { - "epoch": 1.5089855933462053, - "grad_norm": 2.975311040878296, - "learning_rate": 2.0946086439922768e-05, - "loss": 0.0916, + "epoch": 3.0179711866924106, + "grad_norm": 1.187461256980896, + "learning_rate": 6.9820288133075895e-06, + "loss": 0.0576, "step": 20320 }, { - "epoch": 1.5097282043665528, - "grad_norm": 2.030165672302246, - "learning_rate": 2.0941630773800683e-05, - "loss": 0.0953, + "epoch": 3.0194564087331055, + "grad_norm": 0.42498815059661865, + "learning_rate": 6.980543591266895e-06, + "loss": 0.0541, "step": 20330 }, { - "epoch": 1.5104708153869004, - "grad_norm": 1.042168378829956, - "learning_rate": 2.09371751076786e-05, - "loss": 0.0659, + "epoch": 3.020941630773801, + "grad_norm": 1.0660574436187744, + "learning_rate": 6.9790583692262e-06, + "loss": 0.0542, "step": 20340 }, { - "epoch": 1.5112134264072479, - "grad_norm": 1.342262864112854, - "learning_rate": 2.0932719441556513e-05, - "loss": 0.0727, + "epoch": 3.0224268528144957, + "grad_norm": 0.7157576084136963, + "learning_rate": 6.9775731471855045e-06, + "loss": 0.0531, "step": 20350 }, { - "epoch": 1.5119560374275953, - "grad_norm": 1.2941288948059082, - "learning_rate": 2.0928263775434428e-05, - "loss": 0.0511, + "epoch": 3.0239120748551906, + "grad_norm": 0.721301257610321, + "learning_rate": 6.97608792514481e-06, + "loss": 0.0606, "step": 20360 }, { - "epoch": 1.512698648447943, - "grad_norm": 3.6500983238220215, - "learning_rate": 2.0923808109312343e-05, - "loss": 0.1324, + "epoch": 3.025397296895886, + "grad_norm": 0.23004470765590668, + "learning_rate": 6.974602703104114e-06, + "loss": 0.0517, "step": 20370 }, { - "epoch": 1.5134412594682907, - "grad_norm": 1.0113955736160278, - "learning_rate": 2.0919352443190258e-05, - "loss": 0.0696, + "epoch": 3.026882518936581, + "grad_norm": 1.1255507469177246, + "learning_rate": 6.9731174810634195e-06, + "loss": 0.0836, "step": 20380 }, { - "epoch": 1.5141838704886381, - "grad_norm": 1.395719289779663, - "learning_rate": 2.0914896777068173e-05, - "loss": 0.0683, + "epoch": 3.0283677409772762, + "grad_norm": 0.5415315628051758, + "learning_rate": 6.971632259022725e-06, + "loss": 0.0604, "step": 20390 }, { - "epoch": 1.5149264815089856, - "grad_norm": 1.7119672298431396, - "learning_rate": 2.0910441110946084e-05, - "loss": 0.062, + "epoch": 3.029852963017971, + "grad_norm": 1.0092153549194336, + "learning_rate": 6.970147036982029e-06, + "loss": 0.0625, "step": 20400 }, { - "epoch": 1.515669092529333, - "grad_norm": 1.9944767951965332, - "learning_rate": 2.0905985444824003e-05, - "loss": 0.0649, + "epoch": 3.0313381850586665, + "grad_norm": 1.8862570524215698, + "learning_rate": 6.9686618149413345e-06, + "loss": 0.063, "step": 20410 }, { - "epoch": 1.5164117035496807, - "grad_norm": 1.1254253387451172, - "learning_rate": 2.0901529778701917e-05, - "loss": 0.0545, + "epoch": 3.0328234070993614, + "grad_norm": 1.144673466682434, + "learning_rate": 6.96717659290064e-06, + "loss": 0.0678, "step": 20420 }, { - "epoch": 1.5171543145700284, - "grad_norm": 2.864976406097412, - "learning_rate": 2.089707411257983e-05, - "loss": 0.0984, + "epoch": 3.0343086291400563, + "grad_norm": 0.7662794589996338, + "learning_rate": 6.965691370859944e-06, + "loss": 0.0744, "step": 20430 }, { - "epoch": 1.5178969255903758, - "grad_norm": 2.1159725189208984, - "learning_rate": 2.0892618446457747e-05, - "loss": 0.06, + "epoch": 3.0357938511807516, + "grad_norm": 0.6166210770606995, + "learning_rate": 6.9642061488192494e-06, + "loss": 0.0679, "step": 20440 }, { - "epoch": 1.5186395366107233, - "grad_norm": 0.9338457584381104, - "learning_rate": 2.0888162780335662e-05, - "loss": 0.0659, + "epoch": 3.0372790732214465, + "grad_norm": 0.8427115082740784, + "learning_rate": 6.962720926778555e-06, + "loss": 0.0656, "step": 20450 }, { - "epoch": 1.5193821476310707, - "grad_norm": 1.4100627899169922, - "learning_rate": 2.0883707114213574e-05, - "loss": 0.0716, + "epoch": 3.038764295262142, + "grad_norm": 0.5689003467559814, + "learning_rate": 6.961235704737858e-06, + "loss": 0.0518, "step": 20460 }, { - "epoch": 1.5201247586514184, - "grad_norm": 1.341646432876587, - "learning_rate": 2.0879251448091492e-05, - "loss": 0.0737, + "epoch": 3.0402495173028368, + "grad_norm": 1.7824311256408691, + "learning_rate": 6.9597504826971644e-06, + "loss": 0.0769, "step": 20470 }, { - "epoch": 1.520867369671766, - "grad_norm": 3.197697401046753, - "learning_rate": 2.0874795781969404e-05, - "loss": 0.0897, + "epoch": 3.0417347393435317, + "grad_norm": 1.074300765991211, + "learning_rate": 6.958265260656468e-06, + "loss": 0.0741, "step": 20480 }, { - "epoch": 1.5216099806921135, - "grad_norm": 0.8237298727035522, - "learning_rate": 2.087034011584732e-05, - "loss": 0.0553, + "epoch": 3.043219961384227, + "grad_norm": 0.41286715865135193, + "learning_rate": 6.956780038615773e-06, + "loss": 0.0683, "step": 20490 }, { - "epoch": 1.522352591712461, - "grad_norm": 1.78400456905365, - "learning_rate": 2.0865884449725234e-05, - "loss": 0.0745, + "epoch": 3.044705183424922, + "grad_norm": 0.6059936881065369, + "learning_rate": 6.955294816575079e-06, + "loss": 0.046, "step": 20500 }, { - "epoch": 1.5230952027328084, - "grad_norm": 2.6086509227752686, - "learning_rate": 2.086142878360315e-05, - "loss": 0.1298, + "epoch": 3.0461904054656173, + "grad_norm": 0.7452453374862671, + "learning_rate": 6.953809594534383e-06, + "loss": 0.0655, "step": 20510 }, { - "epoch": 1.523837813753156, - "grad_norm": 1.0803875923156738, - "learning_rate": 2.0856973117481064e-05, - "loss": 0.0673, + "epoch": 3.047675627506312, + "grad_norm": 0.7540690302848816, + "learning_rate": 6.952324372493688e-06, + "loss": 0.0612, "step": 20520 }, { - "epoch": 1.5245804247735038, - "grad_norm": 0.390265554189682, - "learning_rate": 2.085251745135898e-05, - "loss": 0.0434, + "epoch": 3.0491608495470075, + "grad_norm": 0.7747832536697388, + "learning_rate": 6.950839150452994e-06, + "loss": 0.0667, "step": 20530 }, { - "epoch": 1.5253230357938512, - "grad_norm": 1.218558669090271, - "learning_rate": 2.0848061785236894e-05, - "loss": 0.0666, + "epoch": 3.0506460715877024, + "grad_norm": 0.6237127184867859, + "learning_rate": 6.949353928412298e-06, + "loss": 0.056, "step": 20540 }, { - "epoch": 1.5260656468141987, - "grad_norm": 1.0969456434249878, - "learning_rate": 2.084360611911481e-05, - "loss": 0.095, + "epoch": 3.0521312936283973, + "grad_norm": 0.6594868898391724, + "learning_rate": 6.947868706371603e-06, + "loss": 0.0704, "step": 20550 }, { - "epoch": 1.526808257834546, - "grad_norm": 1.8818721771240234, - "learning_rate": 2.0839150452992724e-05, - "loss": 0.0808, + "epoch": 3.0536165156690926, + "grad_norm": 0.7020158767700195, + "learning_rate": 6.9463834843309086e-06, + "loss": 0.0649, "step": 20560 }, { - "epoch": 1.5275508688548938, - "grad_norm": 2.201045036315918, - "learning_rate": 2.0834694786870636e-05, - "loss": 0.1017, + "epoch": 3.0551017377097875, + "grad_norm": 0.36921775341033936, + "learning_rate": 6.944898262290213e-06, + "loss": 0.0774, "step": 20570 }, { - "epoch": 1.5282934798752414, - "grad_norm": 1.147294521331787, - "learning_rate": 2.0830239120748554e-05, - "loss": 0.0744, + "epoch": 3.056586959750483, + "grad_norm": 0.8545680046081543, + "learning_rate": 6.943413040249518e-06, + "loss": 0.0592, "step": 20580 }, { - "epoch": 1.529036090895589, - "grad_norm": 0.8210429549217224, - "learning_rate": 2.0825783454626466e-05, - "loss": 0.0528, + "epoch": 3.058072181791178, + "grad_norm": 1.0698968172073364, + "learning_rate": 6.941927818208823e-06, + "loss": 0.0846, "step": 20590 }, { - "epoch": 1.5297787019159363, - "grad_norm": 3.2045974731445312, - "learning_rate": 2.082132778850438e-05, - "loss": 0.0918, + "epoch": 3.0595574038318727, + "grad_norm": 1.441307783126831, + "learning_rate": 6.940442596168128e-06, + "loss": 0.0766, "step": 20600 }, { - "epoch": 1.530521312936284, - "grad_norm": 3.5814733505249023, - "learning_rate": 2.08168721223823e-05, - "loss": 0.0625, + "epoch": 3.061042625872568, + "grad_norm": 0.8100225329399109, + "learning_rate": 6.938957374127433e-06, + "loss": 0.063, "step": 20610 }, { - "epoch": 1.5312639239566315, - "grad_norm": 0.8140150308609009, - "learning_rate": 2.081241645626021e-05, - "loss": 0.1008, + "epoch": 3.062527847913263, + "grad_norm": 0.7782602906227112, + "learning_rate": 6.937472152086737e-06, + "loss": 0.084, "step": 20620 }, { - "epoch": 1.5320065349769791, - "grad_norm": 2.3892927169799805, - "learning_rate": 2.0807960790138126e-05, - "loss": 0.0747, + "epoch": 3.0640130699539583, + "grad_norm": 1.3601396083831787, + "learning_rate": 6.935986930046042e-06, + "loss": 0.09, "step": 20630 }, { - "epoch": 1.5327491459973266, - "grad_norm": 1.1212306022644043, - "learning_rate": 2.0803505124016044e-05, - "loss": 0.0827, + "epoch": 3.065498291994653, + "grad_norm": 0.49343305826187134, + "learning_rate": 6.9345017080053474e-06, + "loss": 0.0694, "step": 20640 }, { - "epoch": 1.533491757017674, - "grad_norm": 1.2150450944900513, - "learning_rate": 2.0799049457893956e-05, - "loss": 0.056, + "epoch": 3.066983514035348, + "grad_norm": 0.6675896048545837, + "learning_rate": 6.933016485964652e-06, + "loss": 0.0515, "step": 20650 }, { - "epoch": 1.5342343680380217, - "grad_norm": 2.157820463180542, - "learning_rate": 2.079459379177187e-05, - "loss": 0.0952, + "epoch": 3.0684687360760434, + "grad_norm": 1.5816264152526855, + "learning_rate": 6.931531263923957e-06, + "loss": 0.0867, "step": 20660 }, { - "epoch": 1.5349769790583694, - "grad_norm": 2.5032124519348145, - "learning_rate": 2.0790138125649782e-05, - "loss": 0.0591, + "epoch": 3.0699539581167383, + "grad_norm": 0.8865456581115723, + "learning_rate": 6.930046041883262e-06, + "loss": 0.068, "step": 20670 }, { - "epoch": 1.5357195900787168, - "grad_norm": 1.2760533094406128, - "learning_rate": 2.07856824595277e-05, - "loss": 0.0746, + "epoch": 3.0714391801574337, + "grad_norm": 0.9473548531532288, + "learning_rate": 6.928560819842567e-06, + "loss": 0.0694, "step": 20680 }, { - "epoch": 1.5364622010990643, - "grad_norm": 3.50283145904541, - "learning_rate": 2.0781226793405616e-05, - "loss": 0.0743, + "epoch": 3.0729244021981286, + "grad_norm": 0.991002082824707, + "learning_rate": 6.927075597801872e-06, + "loss": 0.0642, "step": 20690 }, { - "epoch": 1.5372048121194117, - "grad_norm": 2.669391393661499, - "learning_rate": 2.0776771127283527e-05, - "loss": 0.0721, + "epoch": 3.074409624238824, + "grad_norm": 0.24137896299362183, + "learning_rate": 6.9255903757611766e-06, + "loss": 0.0697, "step": 20700 }, { - "epoch": 1.5379474231397594, - "grad_norm": 3.1741724014282227, - "learning_rate": 2.0772315461161446e-05, - "loss": 0.0815, + "epoch": 3.075894846279519, + "grad_norm": 1.1634925603866577, + "learning_rate": 6.924105153720482e-06, + "loss": 0.073, "step": 20710 }, { - "epoch": 1.538690034160107, - "grad_norm": 2.0243866443634033, - "learning_rate": 2.076785979503936e-05, - "loss": 0.0678, + "epoch": 3.0773800683202137, + "grad_norm": 0.7352584004402161, + "learning_rate": 6.922619931679787e-06, + "loss": 0.0552, "step": 20720 }, { - "epoch": 1.5394326451804545, - "grad_norm": 3.2684175968170166, - "learning_rate": 2.0763404128917272e-05, - "loss": 0.0838, + "epoch": 3.078865290360909, + "grad_norm": 0.663188636302948, + "learning_rate": 6.9211347096390916e-06, + "loss": 0.0549, "step": 20730 }, { - "epoch": 1.540175256200802, - "grad_norm": 1.0323513746261597, - "learning_rate": 2.0758948462795187e-05, - "loss": 0.0892, + "epoch": 3.080350512401604, + "grad_norm": 0.4469948709011078, + "learning_rate": 6.919649487598397e-06, + "loss": 0.0448, "step": 20740 }, { - "epoch": 1.5409178672211494, - "grad_norm": 0.852678656578064, - "learning_rate": 2.0754492796673106e-05, - "loss": 0.0938, + "epoch": 3.0818357344422993, + "grad_norm": 0.750174880027771, + "learning_rate": 6.918164265557702e-06, + "loss": 0.0807, "step": 20750 }, { - "epoch": 1.541660478241497, - "grad_norm": 1.296675205230713, - "learning_rate": 2.0750037130551017e-05, - "loss": 0.0761, + "epoch": 3.083320956482994, + "grad_norm": 0.8470863699913025, + "learning_rate": 6.9166790435170066e-06, + "loss": 0.0693, "step": 20760 }, { - "epoch": 1.5424030892618448, - "grad_norm": 0.6336604952812195, - "learning_rate": 2.0745581464428932e-05, - "loss": 0.0521, + "epoch": 3.084806178523689, + "grad_norm": 0.5609787702560425, + "learning_rate": 6.915193821476312e-06, + "loss": 0.0694, "step": 20770 }, { - "epoch": 1.5431457002821922, - "grad_norm": 0.9696953296661377, - "learning_rate": 2.0741125798306847e-05, - "loss": 0.0773, + "epoch": 3.0862914005643844, + "grad_norm": 0.9973790645599365, + "learning_rate": 6.9137085994356154e-06, + "loss": 0.0679, "step": 20780 }, { - "epoch": 1.5438883113025397, - "grad_norm": 2.513984203338623, - "learning_rate": 2.0736670132184762e-05, - "loss": 0.0938, + "epoch": 3.0877766226050793, + "grad_norm": 0.9720730781555176, + "learning_rate": 6.912223377394921e-06, + "loss": 0.0527, "step": 20790 }, { - "epoch": 1.5446309223228871, - "grad_norm": 1.3726938962936401, - "learning_rate": 2.0732214466062677e-05, - "loss": 0.0799, + "epoch": 3.0892618446457747, + "grad_norm": 1.0503332614898682, + "learning_rate": 6.910738155354226e-06, + "loss": 0.0687, "step": 20800 }, { - "epoch": 1.5453735333432348, - "grad_norm": 1.0419371128082275, - "learning_rate": 2.0727758799940592e-05, - "loss": 0.0765, + "epoch": 3.0907470666864696, + "grad_norm": 1.0064181089401245, + "learning_rate": 6.90925293331353e-06, + "loss": 0.0805, "step": 20810 }, { - "epoch": 1.5461161443635825, - "grad_norm": 4.229785442352295, - "learning_rate": 2.0723303133818507e-05, - "loss": 0.0761, + "epoch": 3.092232288727165, + "grad_norm": 0.841968834400177, + "learning_rate": 6.907767711272836e-06, + "loss": 0.0781, "step": 20820 }, { - "epoch": 1.54685875538393, - "grad_norm": 3.3588624000549316, - "learning_rate": 2.0718847467696422e-05, - "loss": 0.0683, + "epoch": 3.09371751076786, + "grad_norm": 0.9688336849212646, + "learning_rate": 6.906282489232141e-06, + "loss": 0.0701, "step": 20830 }, { - "epoch": 1.5476013664042774, - "grad_norm": 1.541447401046753, - "learning_rate": 2.0714391801574334e-05, - "loss": 0.0785, + "epoch": 3.0952027328085547, + "grad_norm": 0.6418406367301941, + "learning_rate": 6.904797267191445e-06, + "loss": 0.0568, "step": 20840 }, { - "epoch": 1.5483439774246248, - "grad_norm": 1.4682326316833496, - "learning_rate": 2.0709936135452252e-05, - "loss": 0.056, + "epoch": 3.09668795484925, + "grad_norm": 1.104293704032898, + "learning_rate": 6.903312045150751e-06, + "loss": 0.0787, "step": 20850 }, { - "epoch": 1.5490865884449725, - "grad_norm": 1.9115557670593262, - "learning_rate": 2.0705480469330167e-05, - "loss": 0.0658, + "epoch": 3.098173176889945, + "grad_norm": 0.5715312957763672, + "learning_rate": 6.901826823110056e-06, + "loss": 0.0802, "step": 20860 }, { - "epoch": 1.5498291994653202, - "grad_norm": 2.6742310523986816, - "learning_rate": 2.070102480320808e-05, - "loss": 0.0542, + "epoch": 3.0996583989306403, + "grad_norm": 0.5578724145889282, + "learning_rate": 6.90034160106936e-06, + "loss": 0.0707, "step": 20870 }, { - "epoch": 1.5505718104856676, - "grad_norm": 1.7176462411880493, - "learning_rate": 2.0696569137085997e-05, - "loss": 0.0604, + "epoch": 3.1011436209713352, + "grad_norm": 0.7479428052902222, + "learning_rate": 6.898856379028666e-06, + "loss": 0.0599, "step": 20880 }, { - "epoch": 1.551314421506015, - "grad_norm": 2.445446252822876, - "learning_rate": 2.069211347096391e-05, - "loss": 0.1063, + "epoch": 3.10262884301203, + "grad_norm": 0.7864044904708862, + "learning_rate": 6.89737115698797e-06, + "loss": 0.0726, "step": 20890 }, { - "epoch": 1.5520570325263627, - "grad_norm": 2.274242401123047, - "learning_rate": 2.0687657804841824e-05, - "loss": 0.0842, + "epoch": 3.1041140650527255, + "grad_norm": 1.1238430738449097, + "learning_rate": 6.895885934947275e-06, + "loss": 0.0693, "step": 20900 }, { - "epoch": 1.5527996435467102, - "grad_norm": 0.9645692110061646, - "learning_rate": 2.068320213871974e-05, - "loss": 0.0487, + "epoch": 3.1055992870934204, + "grad_norm": 0.8007240295410156, + "learning_rate": 6.894400712906581e-06, + "loss": 0.0705, "step": 20910 }, { - "epoch": 1.5535422545670579, - "grad_norm": 2.782325506210327, - "learning_rate": 2.0678746472597654e-05, - "loss": 0.0783, + "epoch": 3.1070845091341157, + "grad_norm": 0.8057619333267212, + "learning_rate": 6.892915490865884e-06, + "loss": 0.0757, "step": 20920 }, { - "epoch": 1.5542848655874053, - "grad_norm": 2.1064939498901367, - "learning_rate": 2.067429080647557e-05, - "loss": 0.0555, + "epoch": 3.1085697311748106, + "grad_norm": 1.511337399482727, + "learning_rate": 6.8914302688251896e-06, + "loss": 0.0735, "step": 20930 }, { - "epoch": 1.5550274766077528, - "grad_norm": 2.168714761734009, - "learning_rate": 2.0669835140353484e-05, - "loss": 0.0864, + "epoch": 3.1100549532155055, + "grad_norm": 0.6716585755348206, + "learning_rate": 6.889945046784496e-06, + "loss": 0.0592, "step": 20940 }, { - "epoch": 1.5557700876281004, - "grad_norm": 1.001583456993103, - "learning_rate": 2.06653794742314e-05, - "loss": 0.0761, + "epoch": 3.111540175256201, + "grad_norm": 1.2774038314819336, + "learning_rate": 6.888459824743799e-06, + "loss": 0.0791, "step": 20950 }, { - "epoch": 1.556512698648448, - "grad_norm": 1.3306684494018555, - "learning_rate": 2.0660923808109314e-05, - "loss": 0.0596, + "epoch": 3.1130253972968958, + "grad_norm": 1.4515156745910645, + "learning_rate": 6.8869746027031045e-06, + "loss": 0.0599, "step": 20960 }, { - "epoch": 1.5572553096687956, - "grad_norm": 0.9188536405563354, - "learning_rate": 2.065646814198723e-05, - "loss": 0.0653, + "epoch": 3.114510619337591, + "grad_norm": 0.8734662532806396, + "learning_rate": 6.88548938066241e-06, + "loss": 0.0537, "step": 20970 }, { - "epoch": 1.557997920689143, - "grad_norm": 1.159742832183838, - "learning_rate": 2.065201247586514e-05, - "loss": 0.0942, + "epoch": 3.115995841378286, + "grad_norm": 1.2333000898361206, + "learning_rate": 6.884004158621714e-06, + "loss": 0.0668, "step": 20980 }, { - "epoch": 1.5587405317094905, - "grad_norm": 3.285846471786499, - "learning_rate": 2.064755680974306e-05, - "loss": 0.0673, + "epoch": 3.1174810634189813, + "grad_norm": 1.3227999210357666, + "learning_rate": 6.8825189365810195e-06, + "loss": 0.067, "step": 20990 }, { - "epoch": 1.5594831427298381, - "grad_norm": 1.9212596416473389, - "learning_rate": 2.064310114362097e-05, - "loss": 0.0477, + "epoch": 3.1189662854596762, + "grad_norm": 0.49217718839645386, + "learning_rate": 6.881033714540324e-06, + "loss": 0.0727, "step": 21000 }, { - "epoch": 1.5602257537501858, - "grad_norm": 1.4942169189453125, - "learning_rate": 2.0638645477498885e-05, - "loss": 0.0735, + "epoch": 3.120451507500371, + "grad_norm": 1.2024414539337158, + "learning_rate": 6.879548492499629e-06, + "loss": 0.0791, "step": 21010 }, { - "epoch": 1.5609683647705332, - "grad_norm": 2.1586103439331055, - "learning_rate": 2.0634189811376804e-05, - "loss": 0.0594, + "epoch": 3.1219367295410665, + "grad_norm": 1.4273792505264282, + "learning_rate": 6.8780632704589345e-06, + "loss": 0.0819, "step": 21020 }, { - "epoch": 1.5617109757908807, - "grad_norm": 0.46512606739997864, - "learning_rate": 2.0629734145254715e-05, - "loss": 0.0662, + "epoch": 3.1234219515817614, + "grad_norm": 0.9906319975852966, + "learning_rate": 6.876578048418239e-06, + "loss": 0.0628, "step": 21030 }, { - "epoch": 1.5624535868112281, - "grad_norm": 1.9914196729660034, - "learning_rate": 2.062527847913263e-05, - "loss": 0.0415, + "epoch": 3.1249071736224567, + "grad_norm": 0.7808650135993958, + "learning_rate": 6.875092826377544e-06, + "loss": 0.0563, "step": 21040 }, { - "epoch": 1.5631961978315758, - "grad_norm": 0.8854469060897827, - "learning_rate": 2.062082281301055e-05, - "loss": 0.0776, + "epoch": 3.1263923956631516, + "grad_norm": 0.8338149189949036, + "learning_rate": 6.8736076043368495e-06, + "loss": 0.066, "step": 21050 }, { - "epoch": 1.5639388088519235, - "grad_norm": 0.9890866875648499, - "learning_rate": 2.061636714688846e-05, - "loss": 0.0666, + "epoch": 3.1278776177038465, + "grad_norm": 0.4437405467033386, + "learning_rate": 6.872122382296154e-06, + "loss": 0.0722, "step": 21060 }, { - "epoch": 1.564681419872271, - "grad_norm": 1.0312457084655762, - "learning_rate": 2.0611911480766375e-05, - "loss": 0.0777, + "epoch": 3.129362839744542, + "grad_norm": 0.47249501943588257, + "learning_rate": 6.870637160255459e-06, + "loss": 0.0819, "step": 21070 }, { - "epoch": 1.5654240308926184, - "grad_norm": 1.1136667728424072, - "learning_rate": 2.0607455814644287e-05, - "loss": 0.0548, + "epoch": 3.130848061785237, + "grad_norm": 0.8643490076065063, + "learning_rate": 6.869151938214763e-06, + "loss": 0.0589, "step": 21080 }, { - "epoch": 1.5661666419129658, - "grad_norm": 1.6554890871047974, - "learning_rate": 2.0603000148522205e-05, - "loss": 0.0659, + "epoch": 3.132333283825932, + "grad_norm": 0.6852704882621765, + "learning_rate": 6.867666716174068e-06, + "loss": 0.077, "step": 21090 }, { - "epoch": 1.5669092529333135, - "grad_norm": 1.3619014024734497, - "learning_rate": 2.059854448240012e-05, - "loss": 0.0773, + "epoch": 3.133818505866627, + "grad_norm": 0.37101641297340393, + "learning_rate": 6.866181494133373e-06, + "loss": 0.0799, "step": 21100 }, { - "epoch": 1.5676518639536612, - "grad_norm": 2.3610949516296387, - "learning_rate": 2.0594088816278032e-05, - "loss": 0.0513, + "epoch": 3.1353037279073224, + "grad_norm": 0.873170793056488, + "learning_rate": 6.864696272092678e-06, + "loss": 0.0502, "step": 21110 }, { - "epoch": 1.5683944749740086, - "grad_norm": 1.5495983362197876, - "learning_rate": 2.058963315015595e-05, - "loss": 0.0767, + "epoch": 3.1367889499480173, + "grad_norm": 1.023847222328186, + "learning_rate": 6.863211050051983e-06, + "loss": 0.0494, "step": 21120 }, { - "epoch": 1.569137085994356, - "grad_norm": 0.9158768653869629, - "learning_rate": 2.0585177484033865e-05, - "loss": 0.0831, + "epoch": 3.138274171988712, + "grad_norm": 0.7707538604736328, + "learning_rate": 6.861725828011288e-06, + "loss": 0.06, "step": 21130 }, { - "epoch": 1.5698796970147035, - "grad_norm": 0.8416073322296143, - "learning_rate": 2.0580721817911777e-05, - "loss": 0.0824, + "epoch": 3.1397593940294075, + "grad_norm": 0.9110133647918701, + "learning_rate": 6.860240605970593e-06, + "loss": 0.0495, "step": 21140 }, { - "epoch": 1.5706223080350512, - "grad_norm": 4.620311737060547, - "learning_rate": 2.0576266151789692e-05, - "loss": 0.0638, + "epoch": 3.1412446160701024, + "grad_norm": 0.9806025624275208, + "learning_rate": 6.858755383929898e-06, + "loss": 0.0693, "step": 21150 }, { - "epoch": 1.5713649190553989, - "grad_norm": 1.3405332565307617, - "learning_rate": 2.057181048566761e-05, - "loss": 0.0829, + "epoch": 3.1427298381107978, + "grad_norm": 0.6785071492195129, + "learning_rate": 6.857270161889203e-06, + "loss": 0.0631, "step": 21160 }, { - "epoch": 1.5721075300757463, - "grad_norm": 1.7006781101226807, - "learning_rate": 2.0567354819545522e-05, - "loss": 0.0619, + "epoch": 3.1442150601514927, + "grad_norm": 1.0490822792053223, + "learning_rate": 6.855784939848508e-06, + "loss": 0.0658, "step": 21170 }, { - "epoch": 1.5728501410960938, - "grad_norm": 2.3096494674682617, - "learning_rate": 2.0562899153423437e-05, - "loss": 0.069, + "epoch": 3.1457002821921876, + "grad_norm": 0.9065208435058594, + "learning_rate": 6.854299717807813e-06, + "loss": 0.0626, "step": 21180 }, { - "epoch": 1.5735927521164415, - "grad_norm": 1.4846018552780151, - "learning_rate": 2.0558443487301352e-05, - "loss": 0.0623, + "epoch": 3.147185504232883, + "grad_norm": 0.8384219408035278, + "learning_rate": 6.8528144957671175e-06, + "loss": 0.0498, "step": 21190 }, { - "epoch": 1.574335363136789, - "grad_norm": 1.648088812828064, - "learning_rate": 2.0553987821179267e-05, - "loss": 0.0544, + "epoch": 3.148670726273578, + "grad_norm": 1.3046029806137085, + "learning_rate": 6.851329273726423e-06, + "loss": 0.0716, "step": 21200 }, { - "epoch": 1.5750779741571366, - "grad_norm": 1.0111830234527588, - "learning_rate": 2.0549532155057182e-05, - "loss": 0.0544, + "epoch": 3.150155948314273, + "grad_norm": 1.7878382205963135, + "learning_rate": 6.849844051685728e-06, + "loss": 0.0644, "step": 21210 }, { - "epoch": 1.575820585177484, - "grad_norm": 3.3681936264038086, - "learning_rate": 2.0545076488935097e-05, - "loss": 0.0648, + "epoch": 3.151641170354968, + "grad_norm": 1.844111442565918, + "learning_rate": 6.848358829645032e-06, + "loss": 0.0655, "step": 21220 }, { - "epoch": 1.5765631961978315, - "grad_norm": 2.3970768451690674, - "learning_rate": 2.0540620822813012e-05, - "loss": 0.0545, + "epoch": 3.153126392395663, + "grad_norm": 0.8533865809440613, + "learning_rate": 6.846873607604338e-06, + "loss": 0.0531, "step": 21230 }, { - "epoch": 1.5773058072181791, - "grad_norm": 3.242074489593506, - "learning_rate": 2.0536165156690927e-05, - "loss": 0.0729, + "epoch": 3.1546116144363583, + "grad_norm": 1.1432991027832031, + "learning_rate": 6.845388385563643e-06, + "loss": 0.0805, "step": 21240 }, { - "epoch": 1.5780484182385268, - "grad_norm": 0.41246843338012695, - "learning_rate": 2.0531709490568838e-05, - "loss": 0.0549, + "epoch": 3.156096836477053, + "grad_norm": 0.9231775999069214, + "learning_rate": 6.843903163522947e-06, + "loss": 0.0618, "step": 21250 }, { - "epoch": 1.5787910292588743, - "grad_norm": 1.8131650686264038, - "learning_rate": 2.0527253824446757e-05, - "loss": 0.0633, + "epoch": 3.1575820585177485, + "grad_norm": 1.0056854486465454, + "learning_rate": 6.842417941482252e-06, + "loss": 0.0775, "step": 21260 }, { - "epoch": 1.5795336402792217, - "grad_norm": 1.4795677661895752, - "learning_rate": 2.052279815832467e-05, - "loss": 0.0668, + "epoch": 3.1590672805584434, + "grad_norm": 0.24411170184612274, + "learning_rate": 6.840932719441557e-06, + "loss": 0.0648, "step": 21270 }, { - "epoch": 1.5802762512995692, - "grad_norm": 0.927021324634552, - "learning_rate": 2.0518342492202583e-05, - "loss": 0.0673, + "epoch": 3.1605525025991383, + "grad_norm": 1.0229218006134033, + "learning_rate": 6.839447497400862e-06, + "loss": 0.0763, "step": 21280 }, { - "epoch": 1.5810188623199168, - "grad_norm": 3.456859827041626, - "learning_rate": 2.05138868260805e-05, - "loss": 0.0764, + "epoch": 3.1620377246398337, + "grad_norm": 0.6935153007507324, + "learning_rate": 6.837962275360167e-06, + "loss": 0.0518, "step": 21290 }, { - "epoch": 1.5817614733402645, - "grad_norm": 2.829115390777588, - "learning_rate": 2.0509431159958413e-05, - "loss": 0.0865, + "epoch": 3.1635229466805286, + "grad_norm": 1.5722551345825195, + "learning_rate": 6.836477053319471e-06, + "loss": 0.0602, "step": 21300 }, { - "epoch": 1.582504084360612, - "grad_norm": 3.3823864459991455, - "learning_rate": 2.0504975493836328e-05, - "loss": 0.0578, + "epoch": 3.165008168721224, + "grad_norm": 1.5089282989501953, + "learning_rate": 6.834991831278777e-06, + "loss": 0.0584, "step": 21310 }, { - "epoch": 1.5832466953809594, - "grad_norm": 1.5947513580322266, - "learning_rate": 2.0500519827714243e-05, - "loss": 0.0997, + "epoch": 3.166493390761919, + "grad_norm": 0.6778764128684998, + "learning_rate": 6.833506609238082e-06, + "loss": 0.0563, "step": 21320 }, { - "epoch": 1.5839893064013069, - "grad_norm": 2.1573078632354736, - "learning_rate": 2.0496064161592158e-05, - "loss": 0.0788, + "epoch": 3.167978612802614, + "grad_norm": 0.6647319793701172, + "learning_rate": 6.832021387197386e-06, + "loss": 0.067, "step": 21330 }, { - "epoch": 1.5847319174216545, - "grad_norm": 0.3694283962249756, - "learning_rate": 2.0491608495470073e-05, - "loss": 0.0275, + "epoch": 3.169463834843309, + "grad_norm": 0.9124835133552551, + "learning_rate": 6.830536165156692e-06, + "loss": 0.0702, "step": 21340 }, { - "epoch": 1.5854745284420022, - "grad_norm": 1.0279000997543335, - "learning_rate": 2.0487152829347988e-05, - "loss": 0.0967, + "epoch": 3.170949056884004, + "grad_norm": 0.538489818572998, + "learning_rate": 6.829050943115997e-06, + "loss": 0.0722, "step": 21350 }, { - "epoch": 1.5862171394623497, - "grad_norm": 1.637056827545166, - "learning_rate": 2.0482697163225903e-05, - "loss": 0.0605, + "epoch": 3.1724342789246993, + "grad_norm": 0.7582644820213318, + "learning_rate": 6.827565721075301e-06, + "loss": 0.0611, "step": 21360 }, { - "epoch": 1.586959750482697, - "grad_norm": 1.0048965215682983, - "learning_rate": 2.0478241497103818e-05, - "loss": 0.0656, + "epoch": 3.173919500965394, + "grad_norm": 1.9703272581100464, + "learning_rate": 6.826080499034607e-06, + "loss": 0.075, "step": 21370 }, { - "epoch": 1.5877023615030446, - "grad_norm": 1.906299114227295, - "learning_rate": 2.0473785830981733e-05, - "loss": 0.0811, + "epoch": 3.1754047230060896, + "grad_norm": 0.6227496862411499, + "learning_rate": 6.824595276993912e-06, + "loss": 0.0663, "step": 21380 }, { - "epoch": 1.5884449725233922, - "grad_norm": 3.761151075363159, - "learning_rate": 2.0469330164859645e-05, - "loss": 0.0829, + "epoch": 3.1768899450467845, + "grad_norm": 1.0263561010360718, + "learning_rate": 6.8231100549532155e-06, + "loss": 0.0663, "step": 21390 }, { - "epoch": 1.58918758354374, - "grad_norm": 3.385910749435425, - "learning_rate": 2.0464874498737563e-05, - "loss": 0.0476, + "epoch": 3.17837516708748, + "grad_norm": 0.9529784917831421, + "learning_rate": 6.821624832912521e-06, + "loss": 0.0672, "step": 21400 }, { - "epoch": 1.5899301945640874, - "grad_norm": 1.3071726560592651, - "learning_rate": 2.0460418832615475e-05, - "loss": 0.0931, + "epoch": 3.1798603891281747, + "grad_norm": 0.8865375518798828, + "learning_rate": 6.820139610871825e-06, + "loss": 0.0604, "step": 21410 }, { - "epoch": 1.5906728055844348, - "grad_norm": 1.4268453121185303, - "learning_rate": 2.045596316649339e-05, - "loss": 0.1017, + "epoch": 3.1813456111688696, + "grad_norm": 0.518654465675354, + "learning_rate": 6.8186543888311305e-06, + "loss": 0.0463, "step": 21420 }, { - "epoch": 1.5914154166047823, - "grad_norm": 0.96445232629776, - "learning_rate": 2.0451507500371308e-05, - "loss": 0.0711, + "epoch": 3.182830833209565, + "grad_norm": 1.2982616424560547, + "learning_rate": 6.817169166790436e-06, + "loss": 0.0803, "step": 21430 }, { - "epoch": 1.59215802762513, - "grad_norm": 1.8206923007965088, - "learning_rate": 2.044705183424922e-05, - "loss": 0.0682, + "epoch": 3.18431605525026, + "grad_norm": 1.0875815153121948, + "learning_rate": 6.81568394474974e-06, + "loss": 0.0565, "step": 21440 }, { - "epoch": 1.5929006386454776, - "grad_norm": 1.7625178098678589, - "learning_rate": 2.0442596168127135e-05, - "loss": 0.0502, + "epoch": 3.185801277290955, + "grad_norm": 0.6639821529388428, + "learning_rate": 6.8141987227090455e-06, + "loss": 0.0526, "step": 21450 }, { - "epoch": 1.593643249665825, - "grad_norm": 1.0990653038024902, - "learning_rate": 2.0438140502005053e-05, - "loss": 0.0705, + "epoch": 3.18728649933165, + "grad_norm": 0.9514920711517334, + "learning_rate": 6.812713500668351e-06, + "loss": 0.0559, "step": 21460 }, { - "epoch": 1.5943858606861725, - "grad_norm": 0.8799698352813721, - "learning_rate": 2.0433684835882965e-05, - "loss": 0.0513, + "epoch": 3.188771721372345, + "grad_norm": 1.0590705871582031, + "learning_rate": 6.811228278627655e-06, + "loss": 0.0871, "step": 21470 }, { - "epoch": 1.5951284717065202, - "grad_norm": 1.2714344263076782, - "learning_rate": 2.042922916976088e-05, - "loss": 0.0877, + "epoch": 3.1902569434130403, + "grad_norm": 1.098676323890686, + "learning_rate": 6.8097430565869605e-06, + "loss": 0.0616, "step": 21480 }, { - "epoch": 1.5958710827268676, - "grad_norm": 0.9506982564926147, - "learning_rate": 2.0424773503638795e-05, - "loss": 0.1113, + "epoch": 3.1917421654537352, + "grad_norm": 0.8639383912086487, + "learning_rate": 6.808257834546266e-06, + "loss": 0.0494, "step": 21490 }, { - "epoch": 1.5966136937472153, - "grad_norm": 1.751642107963562, - "learning_rate": 2.042031783751671e-05, - "loss": 0.08, + "epoch": 3.1932273874944306, + "grad_norm": 1.6292216777801514, + "learning_rate": 6.80677261250557e-06, + "loss": 0.0588, "step": 21500 }, { - "epoch": 1.5973563047675627, - "grad_norm": 3.1982038021087646, - "learning_rate": 2.0415862171394625e-05, - "loss": 0.074, + "epoch": 3.1947126095351255, + "grad_norm": 0.9250429272651672, + "learning_rate": 6.8052873904648755e-06, + "loss": 0.0608, "step": 21510 }, { - "epoch": 1.5980989157879102, - "grad_norm": 1.0957239866256714, - "learning_rate": 2.0411406505272536e-05, - "loss": 0.059, + "epoch": 3.1961978315758204, + "grad_norm": 0.8409736752510071, + "learning_rate": 6.80380216842418e-06, + "loss": 0.0742, "step": 21520 }, { - "epoch": 1.5988415268082579, - "grad_norm": 0.893408477306366, - "learning_rate": 2.0406950839150455e-05, - "loss": 0.0789, + "epoch": 3.1976830536165157, + "grad_norm": 1.2568039894104004, + "learning_rate": 6.802316946383485e-06, + "loss": 0.073, "step": 21530 }, { - "epoch": 1.5995841378286055, - "grad_norm": 1.416986346244812, - "learning_rate": 2.040249517302837e-05, - "loss": 0.0941, + "epoch": 3.1991682756572106, + "grad_norm": 0.3838708698749542, + "learning_rate": 6.8008317243427905e-06, + "loss": 0.0668, "step": 21540 }, { - "epoch": 1.600326748848953, - "grad_norm": 0.7409796118736267, - "learning_rate": 2.039803950690628e-05, - "loss": 0.0687, + "epoch": 3.200653497697906, + "grad_norm": 0.5494322180747986, + "learning_rate": 6.799346502302094e-06, + "loss": 0.0736, "step": 21550 }, { - "epoch": 1.6010693598693004, - "grad_norm": 1.517598271369934, - "learning_rate": 2.0393583840784196e-05, - "loss": 0.0638, + "epoch": 3.202138719738601, + "grad_norm": 0.5036866664886475, + "learning_rate": 6.797861280261399e-06, + "loss": 0.0657, "step": 21560 }, { - "epoch": 1.6018119708896479, - "grad_norm": 1.0811076164245605, - "learning_rate": 2.0389128174662115e-05, - "loss": 0.0784, + "epoch": 3.2036239417792958, + "grad_norm": 0.9766075015068054, + "learning_rate": 6.796376058220705e-06, + "loss": 0.0615, "step": 21570 }, { - "epoch": 1.6025545819099956, - "grad_norm": 3.014960765838623, - "learning_rate": 2.0384672508540026e-05, - "loss": 0.0529, + "epoch": 3.205109163819991, + "grad_norm": 0.7940554618835449, + "learning_rate": 6.794890836180009e-06, + "loss": 0.0751, "step": 21580 }, { - "epoch": 1.6032971929303432, - "grad_norm": 4.6855669021606445, - "learning_rate": 2.038021684241794e-05, - "loss": 0.0614, + "epoch": 3.206594385860686, + "grad_norm": 0.4779742956161499, + "learning_rate": 6.793405614139314e-06, + "loss": 0.0621, "step": 21590 }, - { - "epoch": 1.6040398039506907, - "grad_norm": 2.0930423736572266, - "learning_rate": 2.0375761176295856e-05, - "loss": 0.0794, + { + "epoch": 3.2080796079013814, + "grad_norm": 1.009729027748108, + "learning_rate": 6.791920392098619e-06, + "loss": 0.0938, "step": 21600 }, { - "epoch": 1.6047824149710381, - "grad_norm": 2.3684639930725098, - "learning_rate": 2.037130551017377e-05, - "loss": 0.0806, + "epoch": 3.2095648299420763, + "grad_norm": 0.7350361943244934, + "learning_rate": 6.790435170057924e-06, + "loss": 0.0586, "step": 21610 }, { - "epoch": 1.6055250259913856, - "grad_norm": 2.803929090499878, - "learning_rate": 2.0366849844051686e-05, - "loss": 0.1052, + "epoch": 3.2110500519827716, + "grad_norm": 0.8858749866485596, + "learning_rate": 6.788949948017229e-06, + "loss": 0.064, "step": 21620 }, { - "epoch": 1.6062676370117333, - "grad_norm": 1.4284909963607788, - "learning_rate": 2.03623941779296e-05, - "loss": 0.078, + "epoch": 3.2125352740234665, + "grad_norm": 0.4429284930229187, + "learning_rate": 6.787464725976534e-06, + "loss": 0.0734, "step": 21630 }, { - "epoch": 1.607010248032081, - "grad_norm": 1.5730488300323486, - "learning_rate": 2.0357938511807516e-05, - "loss": 0.0623, + "epoch": 3.2140204960641614, + "grad_norm": 0.8132091164588928, + "learning_rate": 6.785979503935839e-06, + "loss": 0.0817, "step": 21640 }, { - "epoch": 1.6077528590524284, - "grad_norm": 1.4216201305389404, - "learning_rate": 2.035348284568543e-05, - "loss": 0.0399, + "epoch": 3.2155057181048567, + "grad_norm": 0.9240391254425049, + "learning_rate": 6.784494281895144e-06, + "loss": 0.074, "step": 21650 }, { - "epoch": 1.6084954700727758, - "grad_norm": 1.4788250923156738, - "learning_rate": 2.0349027179563343e-05, - "loss": 0.0619, + "epoch": 3.2169909401455516, + "grad_norm": 1.1660693883895874, + "learning_rate": 6.783009059854449e-06, + "loss": 0.0687, "step": 21660 }, { - "epoch": 1.6092380810931233, - "grad_norm": 1.5001707077026367, - "learning_rate": 2.034457151344126e-05, - "loss": 0.0683, + "epoch": 3.218476162186247, + "grad_norm": 0.4858531355857849, + "learning_rate": 6.781523837813754e-06, + "loss": 0.0525, "step": 21670 }, { - "epoch": 1.609980692113471, - "grad_norm": 2.592287063598633, - "learning_rate": 2.0340115847319176e-05, - "loss": 0.0971, + "epoch": 3.219961384226942, + "grad_norm": 0.8546695113182068, + "learning_rate": 6.780038615773059e-06, + "loss": 0.0747, "step": 21680 }, { - "epoch": 1.6107233031338186, - "grad_norm": 2.6354775428771973, - "learning_rate": 2.0335660181197088e-05, - "loss": 0.0832, + "epoch": 3.2214466062676372, + "grad_norm": 1.3556727170944214, + "learning_rate": 6.778553393732363e-06, + "loss": 0.0839, "step": 21690 }, { - "epoch": 1.611465914154166, - "grad_norm": 0.5848486423492432, - "learning_rate": 2.0331204515075006e-05, - "loss": 0.066, + "epoch": 3.222931828308332, + "grad_norm": 0.890733003616333, + "learning_rate": 6.777068171691669e-06, + "loss": 0.0659, "step": 21700 }, { - "epoch": 1.6122085251745135, - "grad_norm": 2.4620141983032227, - "learning_rate": 2.0326748848952918e-05, - "loss": 0.0688, + "epoch": 3.224417050349027, + "grad_norm": 0.9196014404296875, + "learning_rate": 6.775582949650973e-06, + "loss": 0.0622, "step": 21710 }, { - "epoch": 1.612951136194861, - "grad_norm": 0.8085265755653381, - "learning_rate": 2.0322293182830833e-05, - "loss": 0.0639, + "epoch": 3.2259022723897224, + "grad_norm": 0.6223394870758057, + "learning_rate": 6.774097727610278e-06, + "loss": 0.0638, "step": 21720 }, { - "epoch": 1.6136937472152086, - "grad_norm": 2.16105580329895, - "learning_rate": 2.0317837516708748e-05, - "loss": 0.111, + "epoch": 3.2273874944304173, + "grad_norm": 0.6774187684059143, + "learning_rate": 6.772612505569583e-06, + "loss": 0.0636, "step": 21730 }, { - "epoch": 1.6144363582355563, - "grad_norm": 2.140782356262207, - "learning_rate": 2.0313381850586663e-05, - "loss": 0.0902, + "epoch": 3.2288727164711126, + "grad_norm": 1.0828312635421753, + "learning_rate": 6.771127283528888e-06, + "loss": 0.0967, "step": 21740 }, { - "epoch": 1.6151789692559038, - "grad_norm": 1.3173938989639282, - "learning_rate": 2.0308926184464578e-05, - "loss": 0.0528, + "epoch": 3.2303579385118075, + "grad_norm": 0.2529934346675873, + "learning_rate": 6.769642061488193e-06, + "loss": 0.0432, "step": 21750 }, { - "epoch": 1.6159215802762512, - "grad_norm": 1.9319645166397095, - "learning_rate": 2.0304470518342493e-05, - "loss": 0.061, + "epoch": 3.2318431605525024, + "grad_norm": 0.820793867111206, + "learning_rate": 6.768156839447498e-06, + "loss": 0.0566, "step": 21760 }, { - "epoch": 1.6166641912965989, - "grad_norm": 1.2936400175094604, - "learning_rate": 2.0300014852220408e-05, - "loss": 0.0601, + "epoch": 3.2333283825931978, + "grad_norm": 1.6257591247558594, + "learning_rate": 6.766671617406803e-06, + "loss": 0.0643, "step": 21770 }, { - "epoch": 1.6174068023169463, - "grad_norm": 0.23843184113502502, - "learning_rate": 2.0295559186098323e-05, - "loss": 0.0479, + "epoch": 3.2348136046338927, + "grad_norm": 0.9866087436676025, + "learning_rate": 6.765186395366108e-06, + "loss": 0.0694, "step": 21780 }, { - "epoch": 1.618149413337294, - "grad_norm": 5.628422260284424, - "learning_rate": 2.0291103519976238e-05, - "loss": 0.0611, + "epoch": 3.236298826674588, + "grad_norm": 1.2419787645339966, + "learning_rate": 6.763701173325413e-06, + "loss": 0.0691, "step": 21790 }, { - "epoch": 1.6188920243576415, - "grad_norm": 1.189815640449524, - "learning_rate": 2.028664785385415e-05, - "loss": 0.0843, + "epoch": 3.237784048715283, + "grad_norm": 0.8262988328933716, + "learning_rate": 6.762215951284718e-06, + "loss": 0.0702, "step": 21800 }, { - "epoch": 1.619634635377989, - "grad_norm": 0.9391959309577942, - "learning_rate": 2.0282192187732068e-05, - "loss": 0.0786, + "epoch": 3.239269270755978, + "grad_norm": 0.3723410367965698, + "learning_rate": 6.760730729244023e-06, + "loss": 0.0657, "step": 21810 }, { - "epoch": 1.6203772463983366, - "grad_norm": 2.154853582382202, - "learning_rate": 2.027773652160998e-05, - "loss": 0.0816, + "epoch": 3.240754492796673, + "grad_norm": 0.7780771851539612, + "learning_rate": 6.759245507203327e-06, + "loss": 0.0508, "step": 21820 }, { - "epoch": 1.6211198574186843, - "grad_norm": 2.5236504077911377, - "learning_rate": 2.0273280855487894e-05, - "loss": 0.0779, + "epoch": 3.242239714837368, + "grad_norm": 0.6054874062538147, + "learning_rate": 6.757760285162633e-06, + "loss": 0.0664, "step": 21830 }, { - "epoch": 1.6218624684390317, - "grad_norm": 1.4820054769515991, - "learning_rate": 2.0268825189365813e-05, - "loss": 0.071, + "epoch": 3.2437249368780634, + "grad_norm": 0.6230700612068176, + "learning_rate": 6.756275063121938e-06, + "loss": 0.0737, "step": 21840 }, { - "epoch": 1.6226050794593792, - "grad_norm": 0.6480633616447449, - "learning_rate": 2.0264369523243724e-05, - "loss": 0.0475, + "epoch": 3.2452101589187583, + "grad_norm": 0.7779073715209961, + "learning_rate": 6.7547898410812415e-06, + "loss": 0.0451, "step": 21850 }, { - "epoch": 1.6233476904797266, - "grad_norm": 2.3438937664031982, - "learning_rate": 2.025991385712164e-05, - "loss": 0.1059, + "epoch": 3.246695380959453, + "grad_norm": 1.2229901552200317, + "learning_rate": 6.753304619040547e-06, + "loss": 0.0666, "step": 21860 }, { - "epoch": 1.6240903015000743, - "grad_norm": 3.0132994651794434, - "learning_rate": 2.0255458190999558e-05, - "loss": 0.065, + "epoch": 3.2481806030001485, + "grad_norm": 0.6388977766036987, + "learning_rate": 6.751819396999852e-06, + "loss": 0.0563, "step": 21870 }, { - "epoch": 1.624832912520422, - "grad_norm": 3.362128496170044, - "learning_rate": 2.025100252487747e-05, - "loss": 0.0946, + "epoch": 3.2496658250408434, + "grad_norm": 0.4443511962890625, + "learning_rate": 6.7503341749591565e-06, + "loss": 0.0483, "step": 21880 }, { - "epoch": 1.6255755235407694, - "grad_norm": 2.070509672164917, - "learning_rate": 2.0246546858755384e-05, - "loss": 0.0744, + "epoch": 3.251151047081539, + "grad_norm": 1.2210910320281982, + "learning_rate": 6.748848952918462e-06, + "loss": 0.0581, "step": 21890 }, { - "epoch": 1.6263181345611168, - "grad_norm": 2.716153383255005, - "learning_rate": 2.02420911926333e-05, - "loss": 0.0517, + "epoch": 3.2526362691222337, + "grad_norm": 1.0396207571029663, + "learning_rate": 6.747363730877767e-06, + "loss": 0.0766, "step": 21900 }, { - "epoch": 1.6270607455814643, - "grad_norm": 0.9877446293830872, - "learning_rate": 2.0237635526511214e-05, - "loss": 0.0744, + "epoch": 3.254121491162929, + "grad_norm": 1.3031349182128906, + "learning_rate": 6.7458785088370715e-06, + "loss": 0.0791, "step": 21910 }, { - "epoch": 1.627803356601812, - "grad_norm": 0.48732122778892517, - "learning_rate": 2.023317986038913e-05, - "loss": 0.0395, + "epoch": 3.255606713203624, + "grad_norm": 0.9325276613235474, + "learning_rate": 6.744393286796377e-06, + "loss": 0.0728, "step": 21920 }, { - "epoch": 1.6285459676221596, - "grad_norm": 2.898503541946411, - "learning_rate": 2.022872419426704e-05, - "loss": 0.0776, + "epoch": 3.257091935244319, + "grad_norm": 0.5997186899185181, + "learning_rate": 6.742908064755681e-06, + "loss": 0.0606, "step": 21930 }, { - "epoch": 1.629288578642507, - "grad_norm": 1.4797714948654175, - "learning_rate": 2.022426852814496e-05, - "loss": 0.0721, + "epoch": 3.258577157285014, + "grad_norm": 0.7627629041671753, + "learning_rate": 6.7414228427149865e-06, + "loss": 0.0741, "step": 21940 }, { - "epoch": 1.6300311896628545, - "grad_norm": 2.628574848175049, - "learning_rate": 2.0219812862022874e-05, - "loss": 0.0751, + "epoch": 3.260062379325709, + "grad_norm": 0.27523115277290344, + "learning_rate": 6.739937620674292e-06, + "loss": 0.0632, "step": 21950 }, { - "epoch": 1.630773800683202, - "grad_norm": 0.8774972558021545, - "learning_rate": 2.0215357195900786e-05, - "loss": 0.0778, + "epoch": 3.2615476013664044, + "grad_norm": 1.0456265211105347, + "learning_rate": 6.738452398633596e-06, + "loss": 0.06, "step": 21960 }, { - "epoch": 1.6315164117035497, - "grad_norm": 3.261282205581665, - "learning_rate": 2.02109015297787e-05, - "loss": 0.0896, + "epoch": 3.2630328234070993, + "grad_norm": 1.1044933795928955, + "learning_rate": 6.7369671765929014e-06, + "loss": 0.09, "step": 21970 }, { - "epoch": 1.6322590227238973, - "grad_norm": 1.9803194999694824, - "learning_rate": 2.020644586365662e-05, - "loss": 0.0978, + "epoch": 3.2645180454477947, + "grad_norm": 1.1576274633407593, + "learning_rate": 6.735481954552207e-06, + "loss": 0.0607, "step": 21980 }, { - "epoch": 1.6330016337442448, - "grad_norm": 2.566403865814209, - "learning_rate": 2.020199019753453e-05, - "loss": 0.0794, + "epoch": 3.2660032674884896, + "grad_norm": 1.0158040523529053, + "learning_rate": 6.733996732511511e-06, + "loss": 0.0657, "step": 21990 }, { - "epoch": 1.6337442447645922, - "grad_norm": 1.7836480140686035, - "learning_rate": 2.0197534531412446e-05, - "loss": 0.0688, + "epoch": 3.2674884895291845, + "grad_norm": 0.6219592094421387, + "learning_rate": 6.7325115104708164e-06, + "loss": 0.052, "step": 22000 }, { - "epoch": 1.6344868557849397, - "grad_norm": 1.82455575466156, - "learning_rate": 2.0193078865290364e-05, - "loss": 0.0448, + "epoch": 3.26897371156988, + "grad_norm": 1.1890630722045898, + "learning_rate": 6.731026288430122e-06, + "loss": 0.0553, "step": 22010 }, { - "epoch": 1.6352294668052874, - "grad_norm": 2.849226713180542, - "learning_rate": 2.0188623199168276e-05, - "loss": 0.0601, + "epoch": 3.2704589336105747, + "grad_norm": 1.2006758451461792, + "learning_rate": 6.729541066389425e-06, + "loss": 0.0468, "step": 22020 }, { - "epoch": 1.635972077825635, - "grad_norm": 2.3754022121429443, - "learning_rate": 2.018416753304619e-05, - "loss": 0.0588, + "epoch": 3.27194415565127, + "grad_norm": 0.7339828610420227, + "learning_rate": 6.728055844348731e-06, + "loss": 0.0567, "step": 22030 }, { - "epoch": 1.6367146888459825, - "grad_norm": 3.1693973541259766, - "learning_rate": 2.0179711866924106e-05, - "loss": 0.0812, + "epoch": 3.273429377691965, + "grad_norm": 0.8527355790138245, + "learning_rate": 6.726570622308035e-06, + "loss": 0.058, "step": 22040 }, { - "epoch": 1.63745729986633, - "grad_norm": 1.5568816661834717, - "learning_rate": 2.017525620080202e-05, - "loss": 0.0859, + "epoch": 3.27491459973266, + "grad_norm": 0.9126393795013428, + "learning_rate": 6.72508540026734e-06, + "loss": 0.0603, "step": 22050 }, { - "epoch": 1.6381999108866776, - "grad_norm": 1.8013701438903809, - "learning_rate": 2.0170800534679936e-05, - "loss": 0.1001, + "epoch": 3.276399821773355, + "grad_norm": 0.3882531523704529, + "learning_rate": 6.723600178226646e-06, + "loss": 0.0692, "step": 22060 }, { - "epoch": 1.638942521907025, - "grad_norm": 1.7124766111373901, - "learning_rate": 2.0166344868557847e-05, - "loss": 0.0779, + "epoch": 3.27788504381405, + "grad_norm": 0.45046180486679077, + "learning_rate": 6.72211495618595e-06, + "loss": 0.0676, "step": 22070 }, { - "epoch": 1.6396851329273727, - "grad_norm": 3.0362048149108887, - "learning_rate": 2.0161889202435766e-05, - "loss": 0.0592, + "epoch": 3.2793702658547454, + "grad_norm": 1.058706283569336, + "learning_rate": 6.720629734145255e-06, + "loss": 0.0747, "step": 22080 }, { - "epoch": 1.6404277439477202, - "grad_norm": 3.313502788543701, - "learning_rate": 2.015743353631368e-05, - "loss": 0.0933, + "epoch": 3.2808554878954403, + "grad_norm": 1.137387990951538, + "learning_rate": 6.7191445121045606e-06, + "loss": 0.0847, "step": 22090 }, { - "epoch": 1.6411703549680676, - "grad_norm": 1.0095113515853882, - "learning_rate": 2.0152977870191592e-05, - "loss": 0.0747, + "epoch": 3.2823407099361352, + "grad_norm": 1.0290052890777588, + "learning_rate": 6.717659290063865e-06, + "loss": 0.0638, "step": 22100 }, { - "epoch": 1.6419129659884153, - "grad_norm": 1.5468275547027588, - "learning_rate": 2.014852220406951e-05, - "loss": 0.0409, + "epoch": 3.2838259319768306, + "grad_norm": 0.8567383885383606, + "learning_rate": 6.71617406802317e-06, + "loss": 0.0608, "step": 22110 }, { - "epoch": 1.642655577008763, - "grad_norm": 4.015323162078857, - "learning_rate": 2.0144066537947422e-05, - "loss": 0.0768, + "epoch": 3.2853111540175255, + "grad_norm": 0.39815622568130493, + "learning_rate": 6.714688845982475e-06, + "loss": 0.0598, "step": 22120 }, { - "epoch": 1.6433981880291104, - "grad_norm": 2.2448365688323975, - "learning_rate": 2.0139610871825337e-05, - "loss": 0.059, + "epoch": 3.286796376058221, + "grad_norm": 0.8857595920562744, + "learning_rate": 6.71320362394178e-06, + "loss": 0.0543, "step": 22130 }, { - "epoch": 1.6441407990494579, - "grad_norm": 1.0703582763671875, - "learning_rate": 2.0135155205703252e-05, - "loss": 0.0744, + "epoch": 3.2882815980989157, + "grad_norm": 0.857346773147583, + "learning_rate": 6.711718401901085e-06, + "loss": 0.059, "step": 22140 }, { - "epoch": 1.6448834100698053, - "grad_norm": 1.5378329753875732, - "learning_rate": 2.0130699539581167e-05, - "loss": 0.0652, + "epoch": 3.2897668201396106, + "grad_norm": 0.6323465704917908, + "learning_rate": 6.710233179860389e-06, + "loss": 0.0602, "step": 22150 }, { - "epoch": 1.645626021090153, - "grad_norm": 3.0120882987976074, - "learning_rate": 2.0126243873459082e-05, - "loss": 0.0799, + "epoch": 3.291252042180306, + "grad_norm": 0.7687992453575134, + "learning_rate": 6.708747957819694e-06, + "loss": 0.0593, "step": 22160 }, { - "epoch": 1.6463686321105007, - "grad_norm": 2.719409227371216, - "learning_rate": 2.0121788207336997e-05, - "loss": 0.0785, + "epoch": 3.292737264221001, + "grad_norm": 1.292677879333496, + "learning_rate": 6.707262735779e-06, + "loss": 0.0614, "step": 22170 }, { - "epoch": 1.6471112431308481, - "grad_norm": 2.3582966327667236, - "learning_rate": 2.0117332541214912e-05, - "loss": 0.0735, + "epoch": 3.2942224862616962, + "grad_norm": 1.0926405191421509, + "learning_rate": 6.705777513738304e-06, + "loss": 0.0496, "step": 22180 }, { - "epoch": 1.6478538541511956, - "grad_norm": 1.0670444965362549, - "learning_rate": 2.0112876875092827e-05, - "loss": 0.0701, + "epoch": 3.295707708302391, + "grad_norm": 1.2428966760635376, + "learning_rate": 6.704292291697609e-06, + "loss": 0.0719, "step": 22190 }, { - "epoch": 1.648596465171543, - "grad_norm": 2.2394518852233887, - "learning_rate": 2.0108421208970742e-05, - "loss": 0.07, + "epoch": 3.2971929303430865, + "grad_norm": 0.4689485430717468, + "learning_rate": 6.7028070696569144e-06, + "loss": 0.0553, "step": 22200 }, { - "epoch": 1.6493390761918907, - "grad_norm": 1.6190416812896729, - "learning_rate": 2.0103965542848657e-05, - "loss": 0.0538, + "epoch": 3.2986781523837814, + "grad_norm": 0.30755752325057983, + "learning_rate": 6.701321847616219e-06, + "loss": 0.0652, "step": 22210 }, { - "epoch": 1.6500816872122384, - "grad_norm": 3.418266773223877, - "learning_rate": 2.0099509876726572e-05, - "loss": 0.078, + "epoch": 3.3001633744244763, + "grad_norm": 1.099134087562561, + "learning_rate": 6.699836625575524e-06, + "loss": 0.0828, "step": 22220 }, { - "epoch": 1.6508242982325858, - "grad_norm": 1.8200223445892334, - "learning_rate": 2.0095054210604484e-05, - "loss": 0.0464, + "epoch": 3.3016485964651716, + "grad_norm": 0.6286141872406006, + "learning_rate": 6.6983514035348286e-06, + "loss": 0.0654, "step": 22230 }, { - "epoch": 1.6515669092529333, - "grad_norm": 1.1762034893035889, - "learning_rate": 2.00905985444824e-05, - "loss": 0.1071, + "epoch": 3.3031338185058665, + "grad_norm": 1.0803701877593994, + "learning_rate": 6.696866181494134e-06, + "loss": 0.0606, "step": 22240 }, { - "epoch": 1.6523095202732807, - "grad_norm": 2.370851755142212, - "learning_rate": 2.0086142878360317e-05, - "loss": 0.072, + "epoch": 3.304619040546562, + "grad_norm": 1.0683226585388184, + "learning_rate": 6.695380959453439e-06, + "loss": 0.0687, "step": 22250 }, { - "epoch": 1.6530521312936284, - "grad_norm": 1.7569416761398315, - "learning_rate": 2.008168721223823e-05, - "loss": 0.0923, + "epoch": 3.3061042625872568, + "grad_norm": 0.9039521813392639, + "learning_rate": 6.6938957374127436e-06, + "loss": 0.0662, "step": 22260 }, { - "epoch": 1.653794742313976, - "grad_norm": 1.144127368927002, - "learning_rate": 2.0077231546116144e-05, - "loss": 0.0771, + "epoch": 3.307589484627952, + "grad_norm": 1.0247334241867065, + "learning_rate": 6.692410515372049e-06, + "loss": 0.0781, "step": 22270 }, { - "epoch": 1.6545373533343235, - "grad_norm": 2.696286201477051, - "learning_rate": 2.0072775879994062e-05, - "loss": 0.0989, + "epoch": 3.309074706668647, + "grad_norm": 0.925523042678833, + "learning_rate": 6.690925293331354e-06, + "loss": 0.051, "step": 22280 }, { - "epoch": 1.655279964354671, - "grad_norm": 1.715278148651123, - "learning_rate": 2.0068320213871974e-05, - "loss": 0.0781, + "epoch": 3.310559928709342, + "grad_norm": 1.3613653182983398, + "learning_rate": 6.6894400712906586e-06, + "loss": 0.0791, "step": 22290 }, { - "epoch": 1.6560225753750184, - "grad_norm": 0.7108889818191528, - "learning_rate": 2.006386454774989e-05, - "loss": 0.0512, + "epoch": 3.3120451507500372, + "grad_norm": 1.0534887313842773, + "learning_rate": 6.687954849249964e-06, + "loss": 0.0755, "step": 22300 }, { - "epoch": 1.656765186395366, - "grad_norm": 1.54939603805542, - "learning_rate": 2.0059408881627804e-05, - "loss": 0.073, + "epoch": 3.313530372790732, + "grad_norm": 1.1597150564193726, + "learning_rate": 6.686469627209269e-06, + "loss": 0.0752, "step": 22310 }, { - "epoch": 1.6575077974157137, - "grad_norm": 1.0879472494125366, - "learning_rate": 2.005495321550572e-05, - "loss": 0.0884, + "epoch": 3.3150155948314275, + "grad_norm": 0.4005168676376343, + "learning_rate": 6.684984405168573e-06, + "loss": 0.0749, "step": 22320 }, { - "epoch": 1.6582504084360612, - "grad_norm": 0.8523910641670227, - "learning_rate": 2.0050497549383634e-05, - "loss": 0.0762, + "epoch": 3.3165008168721224, + "grad_norm": 0.8335085511207581, + "learning_rate": 6.683499183127878e-06, + "loss": 0.0635, "step": 22330 }, { - "epoch": 1.6589930194564086, - "grad_norm": 0.9075714945793152, - "learning_rate": 2.0046041883261546e-05, - "loss": 0.0792, + "epoch": 3.3179860389128173, + "grad_norm": 1.0074352025985718, + "learning_rate": 6.682013961087182e-06, + "loss": 0.0525, "step": 22340 }, { - "epoch": 1.6597356304767563, - "grad_norm": 1.5962119102478027, - "learning_rate": 2.0041586217139464e-05, - "loss": 0.0665, + "epoch": 3.3194712609535126, + "grad_norm": 1.2929524183273315, + "learning_rate": 6.680528739046488e-06, + "loss": 0.0658, "step": 22350 }, { - "epoch": 1.6604782414971038, - "grad_norm": 2.9406886100769043, - "learning_rate": 2.003713055101738e-05, - "loss": 0.0658, + "epoch": 3.3209564829942075, + "grad_norm": 1.0960065126419067, + "learning_rate": 6.679043517005793e-06, + "loss": 0.0688, "step": 22360 }, { - "epoch": 1.6612208525174514, - "grad_norm": 1.0619057416915894, - "learning_rate": 2.003267488489529e-05, - "loss": 0.0719, + "epoch": 3.322441705034903, + "grad_norm": 0.9017845392227173, + "learning_rate": 6.677558294965097e-06, + "loss": 0.063, "step": 22370 }, { - "epoch": 1.661963463537799, - "grad_norm": 1.0932631492614746, - "learning_rate": 2.0028219218773206e-05, - "loss": 0.0747, + "epoch": 3.323926927075598, + "grad_norm": 0.459738165140152, + "learning_rate": 6.676073072924403e-06, + "loss": 0.0863, "step": 22380 }, { - "epoch": 1.6627060745581463, - "grad_norm": 2.542506217956543, - "learning_rate": 2.0023763552651124e-05, - "loss": 0.067, + "epoch": 3.3254121491162927, + "grad_norm": 1.2496856451034546, + "learning_rate": 6.674587850883708e-06, + "loss": 0.0712, "step": 22390 }, { - "epoch": 1.663448685578494, - "grad_norm": 2.6380186080932617, - "learning_rate": 2.0019307886529036e-05, - "loss": 0.033, + "epoch": 3.326897371156988, + "grad_norm": 1.0878329277038574, + "learning_rate": 6.673102628843012e-06, + "loss": 0.0709, "step": 22400 }, { - "epoch": 1.6641912965988417, - "grad_norm": 1.5135997533798218, - "learning_rate": 2.001485222040695e-05, - "loss": 0.0786, + "epoch": 3.328382593197683, + "grad_norm": 0.8148536086082458, + "learning_rate": 6.671617406802318e-06, + "loss": 0.0757, "step": 22410 }, { - "epoch": 1.6649339076191891, - "grad_norm": 0.8384225368499756, - "learning_rate": 2.001039655428487e-05, - "loss": 0.084, + "epoch": 3.3298678152383783, + "grad_norm": 1.1004198789596558, + "learning_rate": 6.670132184761623e-06, + "loss": 0.0645, "step": 22420 }, { - "epoch": 1.6656765186395366, - "grad_norm": 2.0017759799957275, - "learning_rate": 2.000594088816278e-05, - "loss": 0.0913, + "epoch": 3.331353037279073, + "grad_norm": 0.932357132434845, + "learning_rate": 6.668646962720927e-06, + "loss": 0.0554, "step": 22430 }, { - "epoch": 1.666419129659884, - "grad_norm": 2.2100701332092285, - "learning_rate": 2.0001485222040696e-05, - "loss": 0.0485, + "epoch": 3.332838259319768, + "grad_norm": 0.9160596132278442, + "learning_rate": 6.667161740680233e-06, + "loss": 0.0582, "step": 22440 }, { - "epoch": 1.6671617406802317, - "grad_norm": 3.30169939994812, - "learning_rate": 1.999702955591861e-05, - "loss": 0.0674, + "epoch": 3.3343234813604634, + "grad_norm": 0.7866541147232056, + "learning_rate": 6.665676518639536e-06, + "loss": 0.077, "step": 22450 }, { - "epoch": 1.6679043517005794, - "grad_norm": 0.9708051085472107, - "learning_rate": 1.9992573889796525e-05, - "loss": 0.0807, + "epoch": 3.3358087034011583, + "grad_norm": 0.6644066572189331, + "learning_rate": 6.664191296598842e-06, + "loss": 0.0705, "step": 22460 }, { - "epoch": 1.6686469627209268, - "grad_norm": 0.9182741045951843, - "learning_rate": 1.998811822367444e-05, - "loss": 0.0432, + "epoch": 3.3372939254418537, + "grad_norm": 0.7443733811378479, + "learning_rate": 6.662706074558148e-06, + "loss": 0.0603, "step": 22470 }, { - "epoch": 1.6693895737412743, - "grad_norm": 4.127451419830322, - "learning_rate": 1.9983662557552352e-05, - "loss": 0.0857, + "epoch": 3.3387791474825486, + "grad_norm": 0.8200944662094116, + "learning_rate": 6.661220852517451e-06, + "loss": 0.0663, "step": 22480 }, { - "epoch": 1.6701321847616217, - "grad_norm": 1.7990877628326416, - "learning_rate": 1.997920689143027e-05, - "loss": 0.0993, + "epoch": 3.340264369523244, + "grad_norm": 0.42802894115448, + "learning_rate": 6.6597356304767565e-06, + "loss": 0.0498, "step": 22490 }, { - "epoch": 1.6708747957819694, - "grad_norm": 3.319918155670166, - "learning_rate": 1.9974751225308185e-05, - "loss": 0.0492, + "epoch": 3.341749591563939, + "grad_norm": 0.8416561484336853, + "learning_rate": 6.658250408436062e-06, + "loss": 0.0595, "step": 22500 }, { - "epoch": 1.671617406802317, - "grad_norm": 1.4016786813735962, - "learning_rate": 1.9970295559186097e-05, - "loss": 0.0593, + "epoch": 3.3432348136046337, + "grad_norm": 0.5720903277397156, + "learning_rate": 6.656765186395366e-06, + "loss": 0.0639, "step": 22510 }, { - "epoch": 1.6723600178226645, - "grad_norm": 3.1249544620513916, - "learning_rate": 1.9965839893064015e-05, - "loss": 0.1065, + "epoch": 3.344720035645329, + "grad_norm": 0.6999571919441223, + "learning_rate": 6.6552799643546715e-06, + "loss": 0.0479, "step": 22520 }, { - "epoch": 1.673102628843012, - "grad_norm": 1.5883194208145142, - "learning_rate": 1.9961384226941927e-05, - "loss": 0.083, + "epoch": 3.346205257686024, + "grad_norm": 1.437461018562317, + "learning_rate": 6.653794742313976e-06, + "loss": 0.0571, "step": 22530 }, { - "epoch": 1.6738452398633594, - "grad_norm": 0.8119624257087708, - "learning_rate": 1.9956928560819842e-05, - "loss": 0.0768, + "epoch": 3.3476904797267193, + "grad_norm": 0.9364057779312134, + "learning_rate": 6.652309520273281e-06, + "loss": 0.066, "step": 22540 }, { - "epoch": 1.674587850883707, - "grad_norm": 1.9466767311096191, - "learning_rate": 1.9952472894697757e-05, - "loss": 0.0688, + "epoch": 3.349175701767414, + "grad_norm": 0.8423236608505249, + "learning_rate": 6.6508242982325865e-06, + "loss": 0.0669, "step": 22550 }, { - "epoch": 1.6753304619040548, - "grad_norm": 1.9473903179168701, - "learning_rate": 1.9948017228575672e-05, - "loss": 0.0631, + "epoch": 3.3506609238081095, + "grad_norm": 0.8278792500495911, + "learning_rate": 6.649339076191891e-06, + "loss": 0.0747, "step": 22560 }, { - "epoch": 1.6760730729244022, - "grad_norm": 1.485198974609375, - "learning_rate": 1.9943561562453587e-05, - "loss": 0.0685, + "epoch": 3.3521461458488044, + "grad_norm": 0.6401219367980957, + "learning_rate": 6.647853854151196e-06, + "loss": 0.0752, "step": 22570 }, { - "epoch": 1.6768156839447497, - "grad_norm": 1.3554059267044067, - "learning_rate": 1.9939105896331502e-05, - "loss": 0.0596, + "epoch": 3.3536313678894993, + "grad_norm": 0.652182400226593, + "learning_rate": 6.6463686321105015e-06, + "loss": 0.0773, "step": 22580 }, { - "epoch": 1.6775582949650971, - "grad_norm": 2.8601107597351074, - "learning_rate": 1.9934650230209417e-05, - "loss": 0.0897, + "epoch": 3.3551165899301947, + "grad_norm": 0.4512597322463989, + "learning_rate": 6.644883410069806e-06, + "loss": 0.0624, "step": 22590 }, { - "epoch": 1.6783009059854448, - "grad_norm": 0.8527280688285828, - "learning_rate": 1.9930194564087332e-05, - "loss": 0.0749, + "epoch": 3.3566018119708896, + "grad_norm": 0.7210500836372375, + "learning_rate": 6.643398188029111e-06, + "loss": 0.0687, "step": 22600 }, { - "epoch": 1.6790435170057925, - "grad_norm": 1.9120954275131226, - "learning_rate": 1.9925738897965247e-05, - "loss": 0.0733, + "epoch": 3.358087034011585, + "grad_norm": 0.9175443649291992, + "learning_rate": 6.6419129659884165e-06, + "loss": 0.0716, "step": 22610 }, { - "epoch": 1.67978612802614, - "grad_norm": 0.8848724365234375, - "learning_rate": 1.9921283231843162e-05, - "loss": 0.047, + "epoch": 3.35957225605228, + "grad_norm": 0.7475764155387878, + "learning_rate": 6.64042774394772e-06, + "loss": 0.0603, "step": 22620 }, { - "epoch": 1.6805287390464874, - "grad_norm": 1.2848988771438599, - "learning_rate": 1.9916827565721077e-05, - "loss": 0.0584, + "epoch": 3.3610574780929747, + "grad_norm": 1.225459337234497, + "learning_rate": 6.638942521907026e-06, + "loss": 0.0497, "step": 22630 }, { - "epoch": 1.681271350066835, - "grad_norm": 1.2438756227493286, - "learning_rate": 1.991237189959899e-05, - "loss": 0.0784, + "epoch": 3.36254270013367, + "grad_norm": 0.9790806174278259, + "learning_rate": 6.63745729986633e-06, + "loss": 0.0666, "step": 22640 }, { - "epoch": 1.6820139610871825, - "grad_norm": 0.8794949054718018, - "learning_rate": 1.9907916233476904e-05, - "loss": 0.0759, + "epoch": 3.364027922174365, + "grad_norm": 1.1390143632888794, + "learning_rate": 6.635972077825635e-06, + "loss": 0.0543, "step": 22650 }, { - "epoch": 1.6827565721075302, - "grad_norm": 2.005244255065918, - "learning_rate": 1.9903460567354822e-05, - "loss": 0.0558, + "epoch": 3.3655131442150603, + "grad_norm": 1.215271234512329, + "learning_rate": 6.63448685578494e-06, + "loss": 0.0598, "step": 22660 }, { - "epoch": 1.6834991831278776, - "grad_norm": 1.2207728624343872, - "learning_rate": 1.9899004901232734e-05, - "loss": 0.0891, + "epoch": 3.366998366255755, + "grad_norm": 0.884739875793457, + "learning_rate": 6.633001633744245e-06, + "loss": 0.075, "step": 22670 }, { - "epoch": 1.684241794148225, - "grad_norm": 1.019566297531128, - "learning_rate": 1.989454923511065e-05, - "loss": 0.0762, + "epoch": 3.36848358829645, + "grad_norm": 0.5074040293693542, + "learning_rate": 6.63151641170355e-06, + "loss": 0.0495, "step": 22680 }, { - "epoch": 1.6849844051685727, - "grad_norm": 1.250605583190918, - "learning_rate": 1.9890093568988567e-05, - "loss": 0.0728, + "epoch": 3.3699688103371455, + "grad_norm": 1.5649079084396362, + "learning_rate": 6.630031189662855e-06, + "loss": 0.0591, "step": 22690 }, { - "epoch": 1.6857270161889204, - "grad_norm": 2.422374963760376, - "learning_rate": 1.988563790286648e-05, - "loss": 0.0747, + "epoch": 3.3714540323778404, + "grad_norm": 1.051287293434143, + "learning_rate": 6.62854596762216e-06, + "loss": 0.072, "step": 22700 }, { - "epoch": 1.6864696272092679, - "grad_norm": 2.8228814601898193, - "learning_rate": 1.9881182236744394e-05, - "loss": 0.0741, + "epoch": 3.3729392544185357, + "grad_norm": 0.6803810000419617, + "learning_rate": 6.627060745581465e-06, + "loss": 0.0678, "step": 22710 }, { - "epoch": 1.6872122382296153, - "grad_norm": 2.183687448501587, - "learning_rate": 1.987672657062231e-05, - "loss": 0.0659, + "epoch": 3.3744244764592306, + "grad_norm": 0.5742778778076172, + "learning_rate": 6.62557552354077e-06, + "loss": 0.0443, "step": 22720 }, { - "epoch": 1.6879548492499628, - "grad_norm": 1.0389907360076904, - "learning_rate": 1.9872270904500224e-05, - "loss": 0.0901, + "epoch": 3.3759096984999255, + "grad_norm": 0.5578451752662659, + "learning_rate": 6.624090301500075e-06, + "loss": 0.0693, "step": 22730 }, { - "epoch": 1.6886974602703104, - "grad_norm": 2.4531607627868652, - "learning_rate": 1.986781523837814e-05, - "loss": 0.0963, + "epoch": 3.377394920540621, + "grad_norm": 0.9065197706222534, + "learning_rate": 6.62260507945938e-06, + "loss": 0.0615, "step": 22740 }, { - "epoch": 1.689440071290658, - "grad_norm": 1.1364638805389404, - "learning_rate": 1.986335957225605e-05, - "loss": 0.0745, + "epoch": 3.3788801425813157, + "grad_norm": 0.6993559002876282, + "learning_rate": 6.6211198574186845e-06, + "loss": 0.0793, "step": 22750 }, { - "epoch": 1.6901826823110055, - "grad_norm": 1.6292158365249634, - "learning_rate": 1.985890390613397e-05, - "loss": 0.0886, + "epoch": 3.380365364622011, + "grad_norm": 1.2062517404556274, + "learning_rate": 6.61963463537799e-06, + "loss": 0.0769, "step": 22760 }, { - "epoch": 1.690925293331353, - "grad_norm": 2.255054473876953, - "learning_rate": 1.9854448240011884e-05, - "loss": 0.0885, + "epoch": 3.381850586662706, + "grad_norm": 0.5047938823699951, + "learning_rate": 6.618149413337295e-06, + "loss": 0.0767, "step": 22770 }, { - "epoch": 1.6916679043517004, - "grad_norm": 0.5803804993629456, - "learning_rate": 1.9849992573889795e-05, - "loss": 0.0325, + "epoch": 3.3833358087034013, + "grad_norm": 0.7543712854385376, + "learning_rate": 6.616664191296599e-06, + "loss": 0.0595, "step": 22780 }, { - "epoch": 1.6924105153720481, - "grad_norm": 1.7144925594329834, - "learning_rate": 1.984553690776771e-05, - "loss": 0.0705, + "epoch": 3.3848210307440962, + "grad_norm": 1.2753219604492188, + "learning_rate": 6.615178969255904e-06, + "loss": 0.0673, "step": 22790 }, { - "epoch": 1.6931531263923958, - "grad_norm": 0.6633053421974182, - "learning_rate": 1.984108124164563e-05, - "loss": 0.0524, + "epoch": 3.386306252784791, + "grad_norm": 1.725051760673523, + "learning_rate": 6.613693747215209e-06, + "loss": 0.0711, "step": 22800 }, { - "epoch": 1.6938957374127432, - "grad_norm": 3.752182960510254, - "learning_rate": 1.983662557552354e-05, - "loss": 0.0732, + "epoch": 3.3877914748254865, + "grad_norm": 0.7960348129272461, + "learning_rate": 6.612208525174514e-06, + "loss": 0.0603, "step": 22810 }, { - "epoch": 1.6946383484330907, - "grad_norm": 1.711698055267334, - "learning_rate": 1.9832169909401455e-05, - "loss": 0.0639, + "epoch": 3.3892766968661814, + "grad_norm": 0.9733383655548096, + "learning_rate": 6.610723303133819e-06, + "loss": 0.0635, "step": 22820 }, { - "epoch": 1.6953809594534381, - "grad_norm": 3.3594610691070557, - "learning_rate": 1.9827714243279374e-05, - "loss": 0.0828, + "epoch": 3.3907619189068767, + "grad_norm": 0.8336113095283508, + "learning_rate": 6.609238081093124e-06, + "loss": 0.053, "step": 22830 }, { - "epoch": 1.6961235704737858, - "grad_norm": 2.337766647338867, - "learning_rate": 1.9823258577157285e-05, - "loss": 0.0718, + "epoch": 3.3922471409475716, + "grad_norm": 0.8100163340568542, + "learning_rate": 6.607752859052429e-06, + "loss": 0.0821, "step": 22840 }, { - "epoch": 1.6968661814941335, - "grad_norm": 0.9109551310539246, - "learning_rate": 1.98188029110352e-05, - "loss": 0.0757, + "epoch": 3.393732362988267, + "grad_norm": 0.5547587275505066, + "learning_rate": 6.606267637011734e-06, + "loss": 0.0632, "step": 22850 }, { - "epoch": 1.697608792514481, - "grad_norm": 2.4265153408050537, - "learning_rate": 1.9814347244913115e-05, - "loss": 0.1019, + "epoch": 3.395217585028962, + "grad_norm": 0.4523991048336029, + "learning_rate": 6.604782414971038e-06, + "loss": 0.0573, "step": 22860 }, { - "epoch": 1.6983514035348284, - "grad_norm": 2.5311357975006104, - "learning_rate": 1.980989157879103e-05, - "loss": 0.07, + "epoch": 3.3967028070696568, + "grad_norm": 1.2900429964065552, + "learning_rate": 6.603297192930344e-06, + "loss": 0.0624, "step": 22870 }, { - "epoch": 1.6990940145551758, - "grad_norm": 1.0674959421157837, - "learning_rate": 1.9805435912668945e-05, - "loss": 0.0914, + "epoch": 3.398188029110352, + "grad_norm": 0.8760151267051697, + "learning_rate": 6.601811970889649e-06, + "loss": 0.0708, "step": 22880 }, { - "epoch": 1.6998366255755235, - "grad_norm": 1.6489328145980835, - "learning_rate": 1.9800980246546857e-05, - "loss": 0.0893, + "epoch": 3.399673251151047, + "grad_norm": 1.0388017892837524, + "learning_rate": 6.600326748848953e-06, + "loss": 0.0669, "step": 22890 }, { - "epoch": 1.7005792365958712, - "grad_norm": 1.366485595703125, - "learning_rate": 1.9796524580424775e-05, - "loss": 0.0594, + "epoch": 3.4011584731917424, + "grad_norm": 1.0040520429611206, + "learning_rate": 6.598841526808259e-06, + "loss": 0.0545, "step": 22900 }, { - "epoch": 1.7013218476162186, - "grad_norm": 1.1169344186782837, - "learning_rate": 1.979206891430269e-05, - "loss": 0.0928, + "epoch": 3.4026436952324373, + "grad_norm": 0.8709238767623901, + "learning_rate": 6.597356304767564e-06, + "loss": 0.0546, "step": 22910 }, { - "epoch": 1.702064458636566, - "grad_norm": 0.7352683544158936, - "learning_rate": 1.97876132481806e-05, - "loss": 0.0778, + "epoch": 3.404128917273132, + "grad_norm": 0.9482215642929077, + "learning_rate": 6.595871082726868e-06, + "loss": 0.0597, "step": 22920 }, { - "epoch": 1.7028070696569138, - "grad_norm": 1.1200909614562988, - "learning_rate": 1.978315758205852e-05, - "loss": 0.0808, + "epoch": 3.4056141393138275, + "grad_norm": 0.9480280876159668, + "learning_rate": 6.594385860686174e-06, + "loss": 0.0628, "step": 22930 }, { - "epoch": 1.7035496806772612, - "grad_norm": 1.3726412057876587, - "learning_rate": 1.9778701915936435e-05, - "loss": 0.0678, + "epoch": 3.4070993613545224, + "grad_norm": 0.3669753074645996, + "learning_rate": 6.592900638645479e-06, + "loss": 0.0636, "step": 22940 }, { - "epoch": 1.7042922916976089, - "grad_norm": 2.1558868885040283, - "learning_rate": 1.9774246249814347e-05, - "loss": 0.0842, + "epoch": 3.4085845833952177, + "grad_norm": 0.4074847400188446, + "learning_rate": 6.5914154166047825e-06, + "loss": 0.0455, "step": 22950 }, { - "epoch": 1.7050349027179563, - "grad_norm": 0.9559635519981384, - "learning_rate": 1.976979058369226e-05, - "loss": 0.0813, + "epoch": 3.4100698054359126, + "grad_norm": 0.976457417011261, + "learning_rate": 6.589930194564088e-06, + "loss": 0.0708, "step": 22960 }, { - "epoch": 1.7057775137383038, - "grad_norm": 0.5382719039916992, - "learning_rate": 1.9765334917570177e-05, - "loss": 0.0669, + "epoch": 3.4115550274766075, + "grad_norm": 0.8218967318534851, + "learning_rate": 6.588444972523392e-06, + "loss": 0.0435, "step": 22970 }, { - "epoch": 1.7065201247586514, - "grad_norm": 1.2556627988815308, - "learning_rate": 1.976087925144809e-05, - "loss": 0.0981, + "epoch": 3.413040249517303, + "grad_norm": 0.7639546990394592, + "learning_rate": 6.5869597504826975e-06, + "loss": 0.0596, "step": 22980 }, { - "epoch": 1.7072627357789991, - "grad_norm": 1.57675039768219, - "learning_rate": 1.9756423585326007e-05, - "loss": 0.0876, + "epoch": 3.414525471557998, + "grad_norm": 0.5619062781333923, + "learning_rate": 6.585474528442003e-06, + "loss": 0.0872, "step": 22990 }, { - "epoch": 1.7080053467993466, - "grad_norm": 1.055188536643982, - "learning_rate": 1.975196791920392e-05, - "loss": 0.0983, + "epoch": 3.416010693598693, + "grad_norm": 1.0580952167510986, + "learning_rate": 6.583989306401307e-06, + "loss": 0.0723, "step": 23000 }, { - "epoch": 1.708747957819694, - "grad_norm": 0.611940324306488, - "learning_rate": 1.9747512253081837e-05, - "loss": 0.0705, + "epoch": 3.417495915639388, + "grad_norm": 0.6083091497421265, + "learning_rate": 6.5825040843606125e-06, + "loss": 0.058, "step": 23010 }, { - "epoch": 1.7094905688400415, - "grad_norm": 2.1072449684143066, - "learning_rate": 1.974305658695975e-05, - "loss": 0.072, + "epoch": 3.418981137680083, + "grad_norm": 0.5163406133651733, + "learning_rate": 6.581018862319918e-06, + "loss": 0.0649, "step": 23020 }, { - "epoch": 1.7102331798603891, - "grad_norm": 1.6510035991668701, - "learning_rate": 1.9738600920837667e-05, - "loss": 0.0522, + "epoch": 3.4204663597207783, + "grad_norm": 1.1454685926437378, + "learning_rate": 6.579533640279222e-06, + "loss": 0.0502, "step": 23030 }, { - "epoch": 1.7109757908807368, - "grad_norm": 0.40311571955680847, - "learning_rate": 1.973414525471558e-05, - "loss": 0.0883, + "epoch": 3.421951581761473, + "grad_norm": 0.774257481098175, + "learning_rate": 6.5780484182385275e-06, + "loss": 0.0803, "step": 23040 }, { - "epoch": 1.7117184019010843, - "grad_norm": 3.123772144317627, - "learning_rate": 1.9729689588593493e-05, - "loss": 0.0727, + "epoch": 3.4234368038021685, + "grad_norm": 0.5684376955032349, + "learning_rate": 6.576563196197832e-06, + "loss": 0.0679, "step": 23050 }, { - "epoch": 1.7124610129214317, - "grad_norm": 0.9838127493858337, - "learning_rate": 1.9725233922471408e-05, - "loss": 0.0601, + "epoch": 3.4249220258428634, + "grad_norm": 1.2006176710128784, + "learning_rate": 6.575077974157137e-06, + "loss": 0.0824, "step": 23060 }, { - "epoch": 1.7132036239417792, - "grad_norm": 0.6052844524383545, - "learning_rate": 1.9720778256349327e-05, - "loss": 0.0905, + "epoch": 3.4264072478835588, + "grad_norm": 0.6941164135932922, + "learning_rate": 6.5735927521164425e-06, + "loss": 0.0684, "step": 23070 }, { - "epoch": 1.7139462349621268, - "grad_norm": 2.6029303073883057, - "learning_rate": 1.9716322590227238e-05, - "loss": 0.0854, + "epoch": 3.4278924699242537, + "grad_norm": 0.8499606847763062, + "learning_rate": 6.572107530075746e-06, + "loss": 0.069, "step": 23080 }, { - "epoch": 1.7146888459824745, - "grad_norm": 1.6953434944152832, - "learning_rate": 1.9711866924105153e-05, - "loss": 0.0739, + "epoch": 3.4293776919649486, + "grad_norm": 0.8829616904258728, + "learning_rate": 6.570622308035051e-06, + "loss": 0.0552, "step": 23090 }, { - "epoch": 1.715431457002822, - "grad_norm": 1.7296435832977295, - "learning_rate": 1.970741125798307e-05, - "loss": 0.0666, + "epoch": 3.430862914005644, + "grad_norm": 0.6436765789985657, + "learning_rate": 6.5691370859943575e-06, + "loss": 0.0682, "step": 23100 }, { - "epoch": 1.7161740680231694, - "grad_norm": 1.8964383602142334, - "learning_rate": 1.9702955591860983e-05, - "loss": 0.0687, + "epoch": 3.432348136046339, + "grad_norm": 0.8526592254638672, + "learning_rate": 6.567651863953661e-06, + "loss": 0.0473, "step": 23110 }, { - "epoch": 1.7169166790435169, - "grad_norm": 1.0528844594955444, - "learning_rate": 1.9698499925738898e-05, - "loss": 0.0555, + "epoch": 3.433833358087034, + "grad_norm": 0.9360619187355042, + "learning_rate": 6.566166641912966e-06, + "loss": 0.0771, "step": 23120 }, { - "epoch": 1.7176592900638645, - "grad_norm": 2.4928388595581055, - "learning_rate": 1.9694044259616813e-05, - "loss": 0.0738, + "epoch": 3.435318580127729, + "grad_norm": 1.5167564153671265, + "learning_rate": 6.564681419872272e-06, + "loss": 0.0585, "step": 23130 }, { - "epoch": 1.7184019010842122, - "grad_norm": 2.2617714405059814, - "learning_rate": 1.9689588593494728e-05, - "loss": 0.1023, + "epoch": 3.4368038021684244, + "grad_norm": 0.6640611886978149, + "learning_rate": 6.563196197831576e-06, + "loss": 0.0646, "step": 23140 }, { - "epoch": 1.7191445121045597, - "grad_norm": 1.0434247255325317, - "learning_rate": 1.9685132927372643e-05, - "loss": 0.0923, + "epoch": 3.4382890242091193, + "grad_norm": 0.8414837121963501, + "learning_rate": 6.561710975790881e-06, + "loss": 0.0584, "step": 23150 }, { - "epoch": 1.719887123124907, - "grad_norm": 0.6594432592391968, - "learning_rate": 1.9680677261250555e-05, - "loss": 0.0626, + "epoch": 3.439774246249814, + "grad_norm": 0.364753395318985, + "learning_rate": 6.560225753750186e-06, + "loss": 0.054, "step": 23160 }, { - "epoch": 1.7206297341452546, - "grad_norm": 2.8370988368988037, - "learning_rate": 1.9676221595128473e-05, - "loss": 0.093, + "epoch": 3.4412594682905095, + "grad_norm": 0.7285709977149963, + "learning_rate": 6.558740531709491e-06, + "loss": 0.0586, "step": 23170 }, { - "epoch": 1.7213723451656022, - "grad_norm": 0.7767960429191589, - "learning_rate": 1.9671765929006388e-05, - "loss": 0.0637, + "epoch": 3.4427446903312044, + "grad_norm": 0.9097562432289124, + "learning_rate": 6.557255309668796e-06, + "loss": 0.0407, "step": 23180 }, { - "epoch": 1.72211495618595, - "grad_norm": 1.8710448741912842, - "learning_rate": 1.96673102628843e-05, - "loss": 0.0675, + "epoch": 3.4442299123719, + "grad_norm": 0.6811128258705139, + "learning_rate": 6.555770087628101e-06, + "loss": 0.0655, "step": 23190 }, { - "epoch": 1.7228575672062973, - "grad_norm": 3.039166212081909, - "learning_rate": 1.9662854596762215e-05, - "loss": 0.0883, + "epoch": 3.4457151344125947, + "grad_norm": 0.30362212657928467, + "learning_rate": 6.554284865587406e-06, + "loss": 0.0798, "step": 23200 }, { - "epoch": 1.7236001782266448, - "grad_norm": 4.762219429016113, - "learning_rate": 1.9658398930640133e-05, - "loss": 0.0519, + "epoch": 3.4472003564532896, + "grad_norm": 0.7332984209060669, + "learning_rate": 6.552799643546711e-06, + "loss": 0.0654, "step": 23210 }, { - "epoch": 1.7243427892469925, - "grad_norm": 1.641481876373291, - "learning_rate": 1.9653943264518045e-05, - "loss": 0.0918, + "epoch": 3.448685578493985, + "grad_norm": 1.3603813648223877, + "learning_rate": 6.551314421506016e-06, + "loss": 0.0624, "step": 23220 }, { - "epoch": 1.72508540026734, - "grad_norm": 0.6783468127250671, - "learning_rate": 1.964948759839596e-05, - "loss": 0.0757, + "epoch": 3.45017080053468, + "grad_norm": 0.3083489239215851, + "learning_rate": 6.549829199465321e-06, + "loss": 0.0615, "step": 23230 }, { - "epoch": 1.7258280112876876, - "grad_norm": 1.0476303100585938, - "learning_rate": 1.9645031932273878e-05, - "loss": 0.0607, + "epoch": 3.451656022575375, + "grad_norm": 1.117027759552002, + "learning_rate": 6.548343977424626e-06, + "loss": 0.0636, "step": 23240 }, { - "epoch": 1.726570622308035, - "grad_norm": 1.5306792259216309, - "learning_rate": 1.964057626615179e-05, - "loss": 0.0644, + "epoch": 3.45314124461607, + "grad_norm": 0.9651148319244385, + "learning_rate": 6.54685875538393e-06, + "loss": 0.0591, "step": 23250 }, { - "epoch": 1.7273132333283825, - "grad_norm": 0.8044191598892212, - "learning_rate": 1.9636120600029705e-05, - "loss": 0.0881, + "epoch": 3.454626466656765, + "grad_norm": 1.4626349210739136, + "learning_rate": 6.545373533343235e-06, + "loss": 0.0519, "step": 23260 }, { - "epoch": 1.7280558443487302, - "grad_norm": 1.202543020248413, - "learning_rate": 1.963166493390762e-05, - "loss": 0.0665, + "epoch": 3.4561116886974603, + "grad_norm": 0.6410413384437561, + "learning_rate": 6.54388831130254e-06, + "loss": 0.0643, "step": 23270 }, { - "epoch": 1.7287984553690778, - "grad_norm": 2.2589240074157715, - "learning_rate": 1.9627209267785535e-05, - "loss": 0.0831, + "epoch": 3.4575969107381552, + "grad_norm": 1.3073314428329468, + "learning_rate": 6.542403089261845e-06, + "loss": 0.0811, "step": 23280 }, { - "epoch": 1.7295410663894253, - "grad_norm": 2.0191476345062256, - "learning_rate": 1.962275360166345e-05, - "loss": 0.0765, + "epoch": 3.4590821327788506, + "grad_norm": 0.36089351773262024, + "learning_rate": 6.54091786722115e-06, + "loss": 0.0532, "step": 23290 }, { - "epoch": 1.7302836774097727, - "grad_norm": 2.8185505867004395, - "learning_rate": 1.961829793554136e-05, - "loss": 0.0791, + "epoch": 3.4605673548195455, + "grad_norm": 1.0440473556518555, + "learning_rate": 6.539432645180455e-06, + "loss": 0.0503, "step": 23300 }, { - "epoch": 1.7310262884301202, - "grad_norm": 1.082022786140442, - "learning_rate": 1.961384226941928e-05, - "loss": 0.0978, + "epoch": 3.4620525768602404, + "grad_norm": 0.4468558728694916, + "learning_rate": 6.53794742313976e-06, + "loss": 0.0672, "step": 23310 }, { - "epoch": 1.7317688994504679, - "grad_norm": 1.9204188585281372, - "learning_rate": 1.9609386603297195e-05, - "loss": 0.0893, + "epoch": 3.4635377989009357, + "grad_norm": 1.0561190843582153, + "learning_rate": 6.536462201099065e-06, + "loss": 0.065, "step": 23320 }, { - "epoch": 1.7325115104708155, - "grad_norm": 1.1445153951644897, - "learning_rate": 1.9604930937175106e-05, - "loss": 0.0547, + "epoch": 3.4650230209416306, + "grad_norm": 1.0147300958633423, + "learning_rate": 6.53497697905837e-06, + "loss": 0.0623, "step": 23330 }, { - "epoch": 1.733254121491163, - "grad_norm": 0.6673332452774048, - "learning_rate": 1.9600475271053025e-05, - "loss": 0.0447, + "epoch": 3.466508242982326, + "grad_norm": 1.0339702367782593, + "learning_rate": 6.533491757017675e-06, + "loss": 0.0625, "step": 23340 }, { - "epoch": 1.7339967325115104, - "grad_norm": 4.474247455596924, - "learning_rate": 1.959601960493094e-05, - "loss": 0.11, + "epoch": 3.467993465023021, + "grad_norm": 1.0387835502624512, + "learning_rate": 6.53200653497698e-06, + "loss": 0.0713, "step": 23350 }, { - "epoch": 1.7347393435318579, - "grad_norm": 4.08743143081665, - "learning_rate": 1.959156393880885e-05, - "loss": 0.0967, + "epoch": 3.469478687063716, + "grad_norm": 0.9874463081359863, + "learning_rate": 6.530521312936285e-06, + "loss": 0.0698, "step": 23360 }, { - "epoch": 1.7354819545522056, - "grad_norm": 1.5097987651824951, - "learning_rate": 1.9587108272686766e-05, - "loss": 0.0715, + "epoch": 3.470963909104411, + "grad_norm": 0.5879045724868774, + "learning_rate": 6.52903609089559e-06, + "loss": 0.053, "step": 23370 }, { - "epoch": 1.7362245655725532, - "grad_norm": 2.6313939094543457, - "learning_rate": 1.958265260656468e-05, - "loss": 0.0836, + "epoch": 3.472449131145106, + "grad_norm": 0.4100988805294037, + "learning_rate": 6.5275508688548935e-06, + "loss": 0.0655, "step": 23380 }, { - "epoch": 1.7369671765929007, - "grad_norm": 0.9366971850395203, - "learning_rate": 1.9578196940442596e-05, - "loss": 0.0911, + "epoch": 3.4739343531858013, + "grad_norm": 1.068902611732483, + "learning_rate": 6.5260656468142e-06, + "loss": 0.0691, "step": 23390 }, { - "epoch": 1.7377097876132481, - "grad_norm": 0.614687979221344, - "learning_rate": 1.957374127432051e-05, - "loss": 0.0654, + "epoch": 3.4754195752264962, + "grad_norm": 1.1891417503356934, + "learning_rate": 6.524580424773505e-06, + "loss": 0.0615, "step": 23400 }, { - "epoch": 1.7384523986335956, - "grad_norm": 1.9698867797851562, - "learning_rate": 1.9569285608198426e-05, - "loss": 0.0922, + "epoch": 3.4769047972671916, + "grad_norm": 0.6840189695358276, + "learning_rate": 6.5230952027328085e-06, + "loss": 0.0698, "step": 23410 }, { - "epoch": 1.7391950096539432, - "grad_norm": 2.608386754989624, - "learning_rate": 1.956482994207634e-05, - "loss": 0.0963, + "epoch": 3.4783900193078865, + "grad_norm": 0.9377675652503967, + "learning_rate": 6.521609980692114e-06, + "loss": 0.0745, "step": 23420 }, { - "epoch": 1.739937620674291, - "grad_norm": 3.6688835620880127, - "learning_rate": 1.9560374275954256e-05, - "loss": 0.0771, + "epoch": 3.479875241348582, + "grad_norm": 1.2862908840179443, + "learning_rate": 6.520124758651419e-06, + "loss": 0.0667, "step": 23430 }, { - "epoch": 1.7406802316946384, - "grad_norm": 2.29097318649292, - "learning_rate": 1.955591860983217e-05, - "loss": 0.0723, + "epoch": 3.4813604633892767, + "grad_norm": 0.8998900055885315, + "learning_rate": 6.5186395366107235e-06, + "loss": 0.0741, "step": 23440 }, { - "epoch": 1.7414228427149858, - "grad_norm": 1.2678636312484741, - "learning_rate": 1.9551462943710086e-05, - "loss": 0.0709, + "epoch": 3.4828456854299716, + "grad_norm": 1.0825127363204956, + "learning_rate": 6.517154314570029e-06, + "loss": 0.0732, "step": 23450 }, { - "epoch": 1.7421654537353333, - "grad_norm": 4.124483108520508, - "learning_rate": 1.9547007277588e-05, - "loss": 0.089, + "epoch": 3.484330907470667, + "grad_norm": 0.8313888311386108, + "learning_rate": 6.515669092529334e-06, + "loss": 0.0722, "step": 23460 }, { - "epoch": 1.742908064755681, - "grad_norm": 0.28768646717071533, - "learning_rate": 1.9542551611465913e-05, - "loss": 0.0933, + "epoch": 3.485816129511362, + "grad_norm": 0.6264531016349792, + "learning_rate": 6.5141838704886384e-06, + "loss": 0.0719, "step": 23470 }, { - "epoch": 1.7436506757760286, - "grad_norm": 0.71043860912323, - "learning_rate": 1.953809594534383e-05, - "loss": 0.0806, + "epoch": 3.4873013515520572, + "grad_norm": 0.6491361856460571, + "learning_rate": 6.512698648447944e-06, + "loss": 0.0572, "step": 23480 }, { - "epoch": 1.744393286796376, - "grad_norm": 1.6145790815353394, - "learning_rate": 1.9533640279221743e-05, - "loss": 0.0659, + "epoch": 3.488786573592752, + "grad_norm": 0.5687041878700256, + "learning_rate": 6.511213426407248e-06, + "loss": 0.0584, "step": 23490 }, { - "epoch": 1.7451358978167235, - "grad_norm": 1.6282793283462524, - "learning_rate": 1.9529184613099658e-05, - "loss": 0.1154, + "epoch": 3.490271795633447, + "grad_norm": 0.7308996319770813, + "learning_rate": 6.5097282043665534e-06, + "loss": 0.064, "step": 23500 }, { - "epoch": 1.7458785088370712, - "grad_norm": 1.4098920822143555, - "learning_rate": 1.9524728946977576e-05, - "loss": 0.073, + "epoch": 3.4917570176741424, + "grad_norm": 0.7591047883033752, + "learning_rate": 6.508242982325859e-06, + "loss": 0.0808, "step": 23510 }, { - "epoch": 1.7466211198574186, - "grad_norm": 2.4502289295196533, - "learning_rate": 1.9520273280855488e-05, - "loss": 0.0688, + "epoch": 3.4932422397148373, + "grad_norm": 1.3734276294708252, + "learning_rate": 6.506757760285163e-06, + "loss": 0.0553, "step": 23520 }, { - "epoch": 1.7473637308777663, - "grad_norm": 1.5324982404708862, - "learning_rate": 1.9515817614733403e-05, - "loss": 0.0789, + "epoch": 3.4947274617555326, + "grad_norm": 0.7951934337615967, + "learning_rate": 6.5052725382444684e-06, + "loss": 0.0522, "step": 23530 }, { - "epoch": 1.7481063418981138, - "grad_norm": 1.1918566226959229, - "learning_rate": 1.9511361948611318e-05, - "loss": 0.0879, + "epoch": 3.4962126837962275, + "grad_norm": 1.0081889629364014, + "learning_rate": 6.503787316203774e-06, + "loss": 0.0606, "step": 23540 }, { - "epoch": 1.7488489529184612, - "grad_norm": 1.8699147701263428, - "learning_rate": 1.9506906282489233e-05, - "loss": 0.0677, + "epoch": 3.4976979058369224, + "grad_norm": 0.7433433532714844, + "learning_rate": 6.502302094163077e-06, + "loss": 0.0499, "step": 23550 }, { - "epoch": 1.7495915639388089, - "grad_norm": 1.7043718099594116, - "learning_rate": 1.9502450616367148e-05, - "loss": 0.0751, + "epoch": 3.4991831278776178, + "grad_norm": 0.96690434217453, + "learning_rate": 6.500816872122383e-06, + "loss": 0.0702, "step": 23560 }, { - "epoch": 1.7503341749591566, - "grad_norm": 1.2603180408477783, - "learning_rate": 1.949799495024506e-05, - "loss": 0.0724, + "epoch": 3.5006683499183127, + "grad_norm": 0.7072582244873047, + "learning_rate": 6.499331650081687e-06, + "loss": 0.0641, "step": 23570 }, { - "epoch": 1.751076785979504, - "grad_norm": 0.9619042277336121, - "learning_rate": 1.9493539284122978e-05, - "loss": 0.0815, + "epoch": 3.502153571959008, + "grad_norm": 0.7534715533256531, + "learning_rate": 6.497846428040992e-06, + "loss": 0.0663, "step": 23580 }, { - "epoch": 1.7518193969998515, - "grad_norm": 1.521238923072815, - "learning_rate": 1.9489083618000893e-05, - "loss": 0.0494, + "epoch": 3.503638793999703, + "grad_norm": 0.7846993207931519, + "learning_rate": 6.496361206000298e-06, + "loss": 0.0465, "step": 23590 }, { - "epoch": 1.752562008020199, - "grad_norm": 2.5039453506469727, - "learning_rate": 1.9484627951878804e-05, - "loss": 0.0824, + "epoch": 3.505124016040398, + "grad_norm": 0.8195766806602478, + "learning_rate": 6.494875983959602e-06, + "loss": 0.0579, "step": 23600 }, { - "epoch": 1.7533046190405466, - "grad_norm": 0.793013334274292, - "learning_rate": 1.9480172285756723e-05, - "loss": 0.0529, + "epoch": 3.506609238081093, + "grad_norm": 0.5940980911254883, + "learning_rate": 6.493390761918907e-06, + "loss": 0.054, "step": 23610 }, { - "epoch": 1.7540472300608942, - "grad_norm": 2.6276259422302246, - "learning_rate": 1.9475716619634638e-05, - "loss": 0.0624, + "epoch": 3.508094460121788, + "grad_norm": 0.36276596784591675, + "learning_rate": 6.4919055398782126e-06, + "loss": 0.0522, "step": 23620 }, { - "epoch": 1.7547898410812417, - "grad_norm": 0.7211153507232666, - "learning_rate": 1.947126095351255e-05, - "loss": 0.0859, + "epoch": 3.5095796821624834, + "grad_norm": 0.5541412234306335, + "learning_rate": 6.490420317837517e-06, + "loss": 0.0748, "step": 23630 }, { - "epoch": 1.7555324521015891, - "grad_norm": 0.7055466175079346, - "learning_rate": 1.9466805287390464e-05, - "loss": 0.0813, + "epoch": 3.5110649042031783, + "grad_norm": 1.0632083415985107, + "learning_rate": 6.488935095796822e-06, + "loss": 0.0667, "step": 23640 }, { - "epoch": 1.7562750631219366, - "grad_norm": 1.8342903852462769, - "learning_rate": 1.9462349621268383e-05, - "loss": 0.1125, + "epoch": 3.512550126243873, + "grad_norm": 0.9306591749191284, + "learning_rate": 6.4874498737561276e-06, + "loss": 0.0906, "step": 23650 }, { - "epoch": 1.7570176741422843, - "grad_norm": 1.909565806388855, - "learning_rate": 1.9457893955146294e-05, - "loss": 0.1002, + "epoch": 3.5140353482845685, + "grad_norm": 0.5102431774139404, + "learning_rate": 6.485964651715432e-06, + "loss": 0.0801, "step": 23660 }, { - "epoch": 1.757760285162632, - "grad_norm": 1.755029559135437, - "learning_rate": 1.945343828902421e-05, - "loss": 0.0785, + "epoch": 3.515520570325264, + "grad_norm": 1.087111234664917, + "learning_rate": 6.484479429674737e-06, + "loss": 0.0702, "step": 23670 }, { - "epoch": 1.7585028961829794, - "grad_norm": 1.2598732709884644, - "learning_rate": 1.9448982622902124e-05, - "loss": 0.063, + "epoch": 3.517005792365959, + "grad_norm": 0.9563620686531067, + "learning_rate": 6.482994207634042e-06, + "loss": 0.0557, "step": 23680 }, { - "epoch": 1.7592455072033268, - "grad_norm": 1.9370228052139282, - "learning_rate": 1.944452695678004e-05, - "loss": 0.0481, + "epoch": 3.5184910144066537, + "grad_norm": 0.8023037314414978, + "learning_rate": 6.481508985593347e-06, + "loss": 0.0729, "step": 23690 }, { - "epoch": 1.7599881182236743, - "grad_norm": 0.8600150942802429, - "learning_rate": 1.9440071290657954e-05, - "loss": 0.0818, + "epoch": 3.519976236447349, + "grad_norm": 2.2834434509277344, + "learning_rate": 6.480023763552652e-06, + "loss": 0.0626, "step": 23700 }, { - "epoch": 1.760730729244022, - "grad_norm": 1.0704541206359863, - "learning_rate": 1.9435615624535866e-05, - "loss": 0.0879, + "epoch": 3.521461458488044, + "grad_norm": 0.9204877018928528, + "learning_rate": 6.478538541511956e-06, + "loss": 0.0727, "step": 23710 }, - { - "epoch": 1.7614733402643696, - "grad_norm": 0.8569614291191101, - "learning_rate": 1.9431159958413784e-05, - "loss": 0.0786, + { + "epoch": 3.5229466805287393, + "grad_norm": 0.6565443277359009, + "learning_rate": 6.477053319471261e-06, + "loss": 0.057, "step": 23720 }, { - "epoch": 1.762215951284717, - "grad_norm": 2.5720906257629395, - "learning_rate": 1.94267042922917e-05, - "loss": 0.0942, + "epoch": 3.524431902569434, + "grad_norm": 1.235333800315857, + "learning_rate": 6.475568097430566e-06, + "loss": 0.0834, "step": 23730 }, { - "epoch": 1.7629585623050645, - "grad_norm": 1.6318458318710327, - "learning_rate": 1.942224862616961e-05, - "loss": 0.0819, + "epoch": 3.525917124610129, + "grad_norm": 0.5262104868888855, + "learning_rate": 6.474082875389871e-06, + "loss": 0.0619, "step": 23740 }, { - "epoch": 1.763701173325412, - "grad_norm": 0.5371285080909729, - "learning_rate": 1.941779296004753e-05, - "loss": 0.042, + "epoch": 3.5274023466508244, + "grad_norm": 0.6873980164527893, + "learning_rate": 6.472597653349176e-06, + "loss": 0.0568, "step": 23750 }, { - "epoch": 1.7644437843457597, - "grad_norm": 0.4963701665401459, - "learning_rate": 1.9413337293925444e-05, - "loss": 0.0849, + "epoch": 3.5288875686915193, + "grad_norm": 0.794663667678833, + "learning_rate": 6.471112431308481e-06, + "loss": 0.0679, "step": 23760 }, { - "epoch": 1.7651863953661073, - "grad_norm": 4.991149425506592, - "learning_rate": 1.9408881627803356e-05, - "loss": 0.0797, + "epoch": 3.5303727907322147, + "grad_norm": 0.4290243983268738, + "learning_rate": 6.469627209267786e-06, + "loss": 0.0682, "step": 23770 }, { - "epoch": 1.7659290063864548, - "grad_norm": 2.4451613426208496, - "learning_rate": 1.940442596168127e-05, - "loss": 0.0688, + "epoch": 3.5318580127729096, + "grad_norm": 0.8954272866249084, + "learning_rate": 6.468141987227091e-06, + "loss": 0.0676, "step": 23780 }, { - "epoch": 1.7666716174068022, - "grad_norm": 0.5909414887428284, - "learning_rate": 1.9399970295559186e-05, - "loss": 0.0814, + "epoch": 3.5333432348136045, + "grad_norm": 1.1637274026870728, + "learning_rate": 6.4666567651863956e-06, + "loss": 0.0587, "step": 23790 }, { - "epoch": 1.76741422842715, - "grad_norm": 3.1932735443115234, - "learning_rate": 1.93955146294371e-05, - "loss": 0.0725, + "epoch": 3.5348284568543, + "grad_norm": 1.2074244022369385, + "learning_rate": 6.465171543145701e-06, + "loss": 0.0631, "step": 23800 }, { - "epoch": 1.7681568394474974, - "grad_norm": 1.4078024625778198, - "learning_rate": 1.9391058963315016e-05, - "loss": 0.0917, + "epoch": 3.5363136788949947, + "grad_norm": 1.0475409030914307, + "learning_rate": 6.463686321105006e-06, + "loss": 0.0657, "step": 23810 }, { - "epoch": 1.768899450467845, - "grad_norm": 0.5630704164505005, - "learning_rate": 1.938660329719293e-05, - "loss": 0.049, + "epoch": 3.53779890093569, + "grad_norm": 0.9346685409545898, + "learning_rate": 6.4622010990643106e-06, + "loss": 0.059, "step": 23820 }, { - "epoch": 1.7696420614881925, - "grad_norm": 1.22433602809906, - "learning_rate": 1.9382147631070846e-05, - "loss": 0.0883, + "epoch": 3.539284122976385, + "grad_norm": 1.0918734073638916, + "learning_rate": 6.460715877023616e-06, + "loss": 0.0723, "step": 23830 }, { - "epoch": 1.77038467250854, - "grad_norm": 2.7386889457702637, - "learning_rate": 1.937769196494876e-05, - "loss": 0.0769, + "epoch": 3.54076934501708, + "grad_norm": 0.8081991672515869, + "learning_rate": 6.459230654982921e-06, + "loss": 0.0414, "step": 23840 }, { - "epoch": 1.7711272835288876, - "grad_norm": 0.52625572681427, - "learning_rate": 1.9373236298826676e-05, - "loss": 0.0807, + "epoch": 3.542254567057775, + "grad_norm": 0.7394802570343018, + "learning_rate": 6.457745432942225e-06, + "loss": 0.0583, "step": 23850 }, { - "epoch": 1.7718698945492353, - "grad_norm": 1.0019735097885132, - "learning_rate": 1.936878063270459e-05, - "loss": 0.074, + "epoch": 3.54373978909847, + "grad_norm": 0.40993183851242065, + "learning_rate": 6.456260210901531e-06, + "loss": 0.0516, "step": 23860 }, { - "epoch": 1.7726125055695827, - "grad_norm": 1.6387897729873657, - "learning_rate": 1.9364324966582506e-05, - "loss": 0.0592, + "epoch": 3.5452250111391654, + "grad_norm": 1.0917972326278687, + "learning_rate": 6.454774988860836e-06, + "loss": 0.0727, "step": 23870 }, { - "epoch": 1.7733551165899302, - "grad_norm": 1.7129108905792236, - "learning_rate": 1.9359869300460417e-05, - "loss": 0.101, + "epoch": 3.5467102331798603, + "grad_norm": 0.5183740854263306, + "learning_rate": 6.45328976682014e-06, + "loss": 0.0638, "step": 23880 }, { - "epoch": 1.7740977276102776, - "grad_norm": 2.9050674438476562, - "learning_rate": 1.9355413634338336e-05, - "loss": 0.0965, + "epoch": 3.5481954552205552, + "grad_norm": 0.5838718414306641, + "learning_rate": 6.451804544779445e-06, + "loss": 0.0729, "step": 23890 }, { - "epoch": 1.7748403386306253, - "grad_norm": 2.6938226222991943, - "learning_rate": 1.9350957968216247e-05, - "loss": 0.0713, + "epoch": 3.5496806772612506, + "grad_norm": 1.3712800741195679, + "learning_rate": 6.450319322738749e-06, + "loss": 0.0893, "step": 23900 }, { - "epoch": 1.775582949650973, - "grad_norm": 2.0255374908447266, - "learning_rate": 1.9346502302094162e-05, - "loss": 0.0711, + "epoch": 3.5511658993019455, + "grad_norm": 1.15109384059906, + "learning_rate": 6.448834100698055e-06, + "loss": 0.0761, "step": 23910 }, { - "epoch": 1.7763255606713204, - "grad_norm": 0.8787927627563477, - "learning_rate": 1.934204663597208e-05, - "loss": 0.0717, + "epoch": 3.552651121342641, + "grad_norm": 0.6892343163490295, + "learning_rate": 6.44734887865736e-06, + "loss": 0.058, "step": 23920 }, { - "epoch": 1.7770681716916679, - "grad_norm": 0.6391984224319458, - "learning_rate": 1.9337590969849992e-05, - "loss": 0.0434, + "epoch": 3.5541363433833357, + "grad_norm": 0.5948600172996521, + "learning_rate": 6.445863656616664e-06, + "loss": 0.0765, "step": 23930 }, { - "epoch": 1.7778107827120153, - "grad_norm": 2.4964632987976074, - "learning_rate": 1.9333135303727907e-05, - "loss": 0.0759, + "epoch": 3.5556215654240306, + "grad_norm": 1.3563827276229858, + "learning_rate": 6.44437843457597e-06, + "loss": 0.063, "step": 23940 }, { - "epoch": 1.778553393732363, - "grad_norm": 2.1311395168304443, - "learning_rate": 1.9328679637605822e-05, - "loss": 0.0499, + "epoch": 3.557106787464726, + "grad_norm": 1.584236741065979, + "learning_rate": 6.442893212535275e-06, + "loss": 0.0547, "step": 23950 }, { - "epoch": 1.7792960047527107, - "grad_norm": 3.056412935256958, - "learning_rate": 1.9324223971483737e-05, - "loss": 0.0652, + "epoch": 3.5585920095054213, + "grad_norm": 0.9074768424034119, + "learning_rate": 6.441407990494579e-06, + "loss": 0.0747, "step": 23960 }, { - "epoch": 1.780038615773058, - "grad_norm": 1.9002305269241333, - "learning_rate": 1.9319768305361652e-05, - "loss": 0.0528, + "epoch": 3.560077231546116, + "grad_norm": 0.729647159576416, + "learning_rate": 6.439922768453885e-06, + "loss": 0.068, "step": 23970 }, { - "epoch": 1.7807812267934056, - "grad_norm": 2.583705186843872, - "learning_rate": 1.9315312639239564e-05, - "loss": 0.0692, + "epoch": 3.561562453586811, + "grad_norm": 0.7314901351928711, + "learning_rate": 6.438437546413189e-06, + "loss": 0.0637, "step": 23980 }, { - "epoch": 1.781523837813753, - "grad_norm": 2.8918917179107666, - "learning_rate": 1.9310856973117482e-05, - "loss": 0.0405, + "epoch": 3.5630476756275065, + "grad_norm": 0.5402231812477112, + "learning_rate": 6.436952324372494e-06, + "loss": 0.0805, "step": 23990 }, { - "epoch": 1.7822664488341007, - "grad_norm": 2.4750471115112305, - "learning_rate": 1.9306401306995397e-05, - "loss": 0.0704, + "epoch": 3.5645328976682014, + "grad_norm": 0.49268588423728943, + "learning_rate": 6.4354671023318e-06, + "loss": 0.0752, "step": 24000 }, { - "epoch": 1.7830090598544484, - "grad_norm": 0.7255248427391052, - "learning_rate": 1.930194564087331e-05, - "loss": 0.0657, + "epoch": 3.5660181197088967, + "grad_norm": 0.5215333700180054, + "learning_rate": 6.433981880291103e-06, + "loss": 0.0638, "step": 24010 }, { - "epoch": 1.7837516708747958, - "grad_norm": 1.313336968421936, - "learning_rate": 1.9297489974751227e-05, - "loss": 0.0548, + "epoch": 3.5675033417495916, + "grad_norm": 0.7803683280944824, + "learning_rate": 6.4324966582504085e-06, + "loss": 0.0471, "step": 24020 }, { - "epoch": 1.7844942818951433, - "grad_norm": 1.0988185405731201, - "learning_rate": 1.9293034308629142e-05, - "loss": 0.0681, + "epoch": 3.5689885637902865, + "grad_norm": 0.8906722068786621, + "learning_rate": 6.431011436209714e-06, + "loss": 0.0708, "step": 24030 }, { - "epoch": 1.7852368929154907, - "grad_norm": 1.2511615753173828, - "learning_rate": 1.9288578642507054e-05, - "loss": 0.1, + "epoch": 3.570473785830982, + "grad_norm": 1.0567264556884766, + "learning_rate": 6.429526214169018e-06, + "loss": 0.0658, "step": 24040 }, { - "epoch": 1.7859795039358384, - "grad_norm": 0.9137929677963257, - "learning_rate": 1.928412297638497e-05, - "loss": 0.0922, + "epoch": 3.5719590078716768, + "grad_norm": 0.6956542134284973, + "learning_rate": 6.4280409921283235e-06, + "loss": 0.0808, "step": 24050 }, { - "epoch": 1.786722114956186, - "grad_norm": 1.0579396486282349, - "learning_rate": 1.9279667310262887e-05, - "loss": 0.0649, + "epoch": 3.573444229912372, + "grad_norm": 0.4545905590057373, + "learning_rate": 6.426555770087629e-06, + "loss": 0.0548, "step": 24060 }, { - "epoch": 1.7874647259765335, - "grad_norm": 2.3197665214538574, - "learning_rate": 1.92752116441408e-05, - "loss": 0.0646, + "epoch": 3.574929451953067, + "grad_norm": 0.5176657438278198, + "learning_rate": 6.425070548046933e-06, + "loss": 0.0424, "step": 24070 }, { - "epoch": 1.788207336996881, - "grad_norm": 0.7259221076965332, - "learning_rate": 1.9270755978018714e-05, - "loss": 0.0642, + "epoch": 3.576414673993762, + "grad_norm": 0.5687969326972961, + "learning_rate": 6.4235853260062385e-06, + "loss": 0.0619, "step": 24080 }, { - "epoch": 1.7889499480172286, - "grad_norm": 1.1063398122787476, - "learning_rate": 1.926630031189663e-05, - "loss": 0.077, + "epoch": 3.5778998960344572, + "grad_norm": 0.5443336367607117, + "learning_rate": 6.422100103965543e-06, + "loss": 0.06, "step": 24090 }, { - "epoch": 1.789692559037576, - "grad_norm": 0.8561720848083496, - "learning_rate": 1.9261844645774544e-05, - "loss": 0.0799, + "epoch": 3.579385118075152, + "grad_norm": 0.5380187034606934, + "learning_rate": 6.420614881924848e-06, + "loss": 0.0522, "step": 24100 }, { - "epoch": 1.7904351700579237, - "grad_norm": 2.2343969345092773, - "learning_rate": 1.925738897965246e-05, - "loss": 0.0798, + "epoch": 3.5808703401158475, + "grad_norm": 1.4146056175231934, + "learning_rate": 6.4191296598841535e-06, + "loss": 0.0713, "step": 24110 }, { - "epoch": 1.7911777810782712, - "grad_norm": 0.8606523275375366, - "learning_rate": 1.925293331353037e-05, - "loss": 0.0636, + "epoch": 3.5823555621565424, + "grad_norm": 1.016329288482666, + "learning_rate": 6.417644437843458e-06, + "loss": 0.0527, "step": 24120 }, { - "epoch": 1.7919203920986186, - "grad_norm": 1.764185905456543, - "learning_rate": 1.924847764740829e-05, - "loss": 0.072, + "epoch": 3.5838407841972373, + "grad_norm": 0.656102180480957, + "learning_rate": 6.416159215802763e-06, + "loss": 0.0422, "step": 24130 }, { - "epoch": 1.7926630031189663, - "grad_norm": 2.313272714614868, - "learning_rate": 1.9244021981286204e-05, - "loss": 0.0701, + "epoch": 3.5853260062379326, + "grad_norm": 0.3345576822757721, + "learning_rate": 6.4146739937620685e-06, + "loss": 0.0593, "step": 24140 }, { - "epoch": 1.793405614139314, - "grad_norm": 0.961453914642334, - "learning_rate": 1.9239566315164115e-05, - "loss": 0.0845, + "epoch": 3.5868112282786275, + "grad_norm": 0.9102980494499207, + "learning_rate": 6.413188771721373e-06, + "loss": 0.0616, "step": 24150 }, { - "epoch": 1.7941482251596614, - "grad_norm": 0.6591196060180664, - "learning_rate": 1.9235110649042034e-05, - "loss": 0.0578, + "epoch": 3.588296450319323, + "grad_norm": 0.9360125660896301, + "learning_rate": 6.411703549680678e-06, + "loss": 0.0526, "step": 24160 }, { - "epoch": 1.7948908361800089, - "grad_norm": 2.050917148590088, - "learning_rate": 1.923065498291995e-05, - "loss": 0.07, + "epoch": 3.5897816723600178, + "grad_norm": 1.1276384592056274, + "learning_rate": 6.4102183276399835e-06, + "loss": 0.066, "step": 24170 }, { - "epoch": 1.7956334472003563, - "grad_norm": 1.651291847229004, - "learning_rate": 1.922619931679786e-05, - "loss": 0.1065, + "epoch": 3.5912668944007127, + "grad_norm": 0.8606460690498352, + "learning_rate": 6.408733105599287e-06, + "loss": 0.075, "step": 24180 }, { - "epoch": 1.796376058220704, - "grad_norm": 1.6536940336227417, - "learning_rate": 1.9221743650675775e-05, - "loss": 0.0675, + "epoch": 3.592752116441408, + "grad_norm": 0.9443903565406799, + "learning_rate": 6.407247883558592e-06, + "loss": 0.0618, "step": 24190 }, { - "epoch": 1.7971186692410517, - "grad_norm": 1.4677518606185913, - "learning_rate": 1.921728798455369e-05, - "loss": 0.0531, + "epoch": 3.594237338482103, + "grad_norm": 0.37735462188720703, + "learning_rate": 6.405762661517897e-06, + "loss": 0.0596, "step": 24200 }, { - "epoch": 1.7978612802613991, - "grad_norm": 0.6976707577705383, - "learning_rate": 1.9212832318431605e-05, - "loss": 0.0458, + "epoch": 3.5957225605227983, + "grad_norm": 0.828709602355957, + "learning_rate": 6.404277439477202e-06, + "loss": 0.051, "step": 24210 }, { - "epoch": 1.7986038912817466, - "grad_norm": 2.0088629722595215, - "learning_rate": 1.920837665230952e-05, - "loss": 0.0756, + "epoch": 3.597207782563493, + "grad_norm": 0.9393916130065918, + "learning_rate": 6.402792217436507e-06, + "loss": 0.0636, "step": 24220 }, { - "epoch": 1.799346502302094, - "grad_norm": 1.7095671892166138, - "learning_rate": 1.9203920986187435e-05, - "loss": 0.0709, + "epoch": 3.598693004604188, + "grad_norm": 1.130899429321289, + "learning_rate": 6.401306995395812e-06, + "loss": 0.0716, "step": 24230 }, { - "epoch": 1.8000891133224417, - "grad_norm": 1.893115758895874, - "learning_rate": 1.919946532006535e-05, - "loss": 0.0676, + "epoch": 3.6001782266448834, + "grad_norm": 0.730121374130249, + "learning_rate": 6.399821773355117e-06, + "loss": 0.0686, "step": 24240 }, { - "epoch": 1.8008317243427894, - "grad_norm": 2.080127716064453, - "learning_rate": 1.9195009653943265e-05, - "loss": 0.0533, + "epoch": 3.6016634486855788, + "grad_norm": 1.095799446105957, + "learning_rate": 6.398336551314422e-06, + "loss": 0.0568, "step": 24250 }, { - "epoch": 1.8015743353631368, - "grad_norm": 1.7303770780563354, - "learning_rate": 1.919055398782118e-05, - "loss": 0.0741, + "epoch": 3.6031486707262737, + "grad_norm": 1.1297565698623657, + "learning_rate": 6.396851329273727e-06, + "loss": 0.0611, "step": 24260 }, { - "epoch": 1.8023169463834843, - "grad_norm": 1.3288146257400513, - "learning_rate": 1.9186098321699095e-05, - "loss": 0.0812, + "epoch": 3.6046338927669686, + "grad_norm": 1.272932529449463, + "learning_rate": 6.395366107233032e-06, + "loss": 0.0617, "step": 24270 }, { - "epoch": 1.8030595574038317, - "grad_norm": 2.545034408569336, - "learning_rate": 1.918164265557701e-05, - "loss": 0.0959, + "epoch": 3.606119114807664, + "grad_norm": 1.3354920148849487, + "learning_rate": 6.393880885192337e-06, + "loss": 0.0724, "step": 24280 }, { - "epoch": 1.8038021684241794, - "grad_norm": 2.7964491844177246, - "learning_rate": 1.9177186989454922e-05, - "loss": 0.0649, + "epoch": 3.607604336848359, + "grad_norm": 0.8653399348258972, + "learning_rate": 6.392395663151642e-06, + "loss": 0.072, "step": 24290 }, { - "epoch": 1.804544779444527, - "grad_norm": 1.9604982137680054, - "learning_rate": 1.917273132333284e-05, - "loss": 0.0733, + "epoch": 3.609089558889054, + "grad_norm": 0.9461974501609802, + "learning_rate": 6.390910441110947e-06, + "loss": 0.0675, "step": 24300 }, { - "epoch": 1.8052873904648745, - "grad_norm": 2.917266607284546, - "learning_rate": 1.9168275657210752e-05, - "loss": 0.0903, + "epoch": 3.610574780929749, + "grad_norm": 0.8618866205215454, + "learning_rate": 6.389425219070251e-06, + "loss": 0.0591, "step": 24310 }, { - "epoch": 1.806030001485222, - "grad_norm": 2.3615570068359375, - "learning_rate": 1.9163819991088667e-05, - "loss": 0.065, + "epoch": 3.612060002970444, + "grad_norm": 0.4727649390697479, + "learning_rate": 6.387939997029556e-06, + "loss": 0.0763, "step": 24320 }, { - "epoch": 1.8067726125055694, - "grad_norm": 1.7897320985794067, - "learning_rate": 1.9159364324966585e-05, - "loss": 0.086, + "epoch": 3.6135452250111393, + "grad_norm": 1.1720751523971558, + "learning_rate": 6.386454774988862e-06, + "loss": 0.0532, "step": 24330 }, { - "epoch": 1.807515223525917, - "grad_norm": 1.5693241357803345, - "learning_rate": 1.9154908658844497e-05, - "loss": 0.0876, + "epoch": 3.615030447051834, + "grad_norm": 0.5803553462028503, + "learning_rate": 6.384969552948166e-06, + "loss": 0.0632, "step": 24340 }, { - "epoch": 1.8082578345462648, - "grad_norm": 1.0281766653060913, - "learning_rate": 1.9150452992722412e-05, - "loss": 0.0841, + "epoch": 3.6165156690925295, + "grad_norm": 0.7322667241096497, + "learning_rate": 6.383484330907471e-06, + "loss": 0.0493, "step": 24350 }, { - "epoch": 1.8090004455666122, - "grad_norm": 1.0320008993148804, - "learning_rate": 1.9145997326600327e-05, - "loss": 0.0852, + "epoch": 3.6180008911332244, + "grad_norm": 0.9475997090339661, + "learning_rate": 6.381999108866776e-06, + "loss": 0.0617, "step": 24360 }, { - "epoch": 1.8097430565869597, - "grad_norm": 0.7011651396751404, - "learning_rate": 1.9141541660478242e-05, - "loss": 0.0549, + "epoch": 3.6194861131739193, + "grad_norm": 0.8482145667076111, + "learning_rate": 6.380513886826081e-06, + "loss": 0.0771, "step": 24370 }, { - "epoch": 1.8104856676073073, - "grad_norm": 2.5355286598205566, - "learning_rate": 1.9137085994356157e-05, - "loss": 0.0809, + "epoch": 3.6209713352146147, + "grad_norm": 0.8984361290931702, + "learning_rate": 6.379028664785386e-06, + "loss": 0.0663, "step": 24380 }, { - "epoch": 1.8112282786276548, - "grad_norm": 1.371387243270874, - "learning_rate": 1.9132630328234072e-05, - "loss": 0.0568, + "epoch": 3.6224565572553096, + "grad_norm": 0.8019669651985168, + "learning_rate": 6.377543442744691e-06, + "loss": 0.0582, "step": 24390 }, { - "epoch": 1.8119708896480025, - "grad_norm": 1.14397132396698, - "learning_rate": 1.9128174662111987e-05, - "loss": 0.08, + "epoch": 3.623941779296005, + "grad_norm": 0.3070809543132782, + "learning_rate": 6.376058220703996e-06, + "loss": 0.0809, "step": 24400 }, { - "epoch": 1.81271350066835, - "grad_norm": 0.4540915787220001, - "learning_rate": 1.9123718995989902e-05, - "loss": 0.0759, + "epoch": 3.6254270013367, + "grad_norm": 0.7207964062690735, + "learning_rate": 6.374572998663301e-06, + "loss": 0.0469, "step": 24410 }, { - "epoch": 1.8134561116886974, - "grad_norm": 0.6488813757896423, - "learning_rate": 1.9119263329867814e-05, - "loss": 0.0805, + "epoch": 3.6269122233773947, + "grad_norm": 0.755179226398468, + "learning_rate": 6.373087776622605e-06, + "loss": 0.0554, "step": 24420 }, { - "epoch": 1.814198722709045, - "grad_norm": 2.155545949935913, - "learning_rate": 1.9114807663745732e-05, - "loss": 0.06, + "epoch": 3.62839744541809, + "grad_norm": 0.5037167072296143, + "learning_rate": 6.371602554581911e-06, + "loss": 0.0734, "step": 24430 }, { - "epoch": 1.8149413337293927, - "grad_norm": 2.4752142429351807, - "learning_rate": 1.9110351997623647e-05, - "loss": 0.0823, + "epoch": 3.629882667458785, + "grad_norm": 0.5806566476821899, + "learning_rate": 6.370117332541216e-06, + "loss": 0.0608, "step": 24440 }, { - "epoch": 1.8156839447497402, - "grad_norm": 1.4653511047363281, - "learning_rate": 1.910589633150156e-05, - "loss": 0.0863, + "epoch": 3.6313678894994803, + "grad_norm": 1.1839208602905273, + "learning_rate": 6.36863211050052e-06, + "loss": 0.0766, "step": 24450 }, { - "epoch": 1.8164265557700876, - "grad_norm": 1.1317205429077148, - "learning_rate": 1.9101440665379474e-05, - "loss": 0.0554, + "epoch": 3.632853111540175, + "grad_norm": 0.4069625437259674, + "learning_rate": 6.367146888459826e-06, + "loss": 0.0609, "step": 24460 }, { - "epoch": 1.817169166790435, - "grad_norm": 1.3294923305511475, - "learning_rate": 1.9096984999257392e-05, - "loss": 0.0619, + "epoch": 3.63433833358087, + "grad_norm": 0.9564564824104309, + "learning_rate": 6.365661666419131e-06, + "loss": 0.0662, "step": 24470 }, { - "epoch": 1.8179117778107827, - "grad_norm": 2.8235435485839844, - "learning_rate": 1.9092529333135304e-05, - "loss": 0.0769, + "epoch": 3.6358235556215655, + "grad_norm": 0.5861666202545166, + "learning_rate": 6.3641764443784345e-06, + "loss": 0.0597, "step": 24480 }, { - "epoch": 1.8186543888311304, - "grad_norm": 0.2612536549568176, - "learning_rate": 1.908807366701322e-05, - "loss": 0.0782, + "epoch": 3.6373087776622604, + "grad_norm": 1.0000677108764648, + "learning_rate": 6.36269122233774e-06, + "loss": 0.0735, "step": 24490 }, { - "epoch": 1.8193969998514778, - "grad_norm": 2.1067092418670654, - "learning_rate": 1.9083618000891133e-05, - "loss": 0.0641, + "epoch": 3.6387939997029557, + "grad_norm": 0.9394515156745911, + "learning_rate": 6.361206000297044e-06, + "loss": 0.0565, "step": 24500 }, { - "epoch": 1.8201396108718253, - "grad_norm": 1.507546067237854, - "learning_rate": 1.907916233476905e-05, - "loss": 0.0673, + "epoch": 3.6402792217436506, + "grad_norm": 0.7486881017684937, + "learning_rate": 6.3597207782563495e-06, + "loss": 0.068, "step": 24510 }, { - "epoch": 1.8208822218921727, - "grad_norm": 1.1641823053359985, - "learning_rate": 1.9074706668646963e-05, - "loss": 0.0735, + "epoch": 3.6417644437843455, + "grad_norm": 0.7040836215019226, + "learning_rate": 6.358235556215655e-06, + "loss": 0.0592, "step": 24520 }, { - "epoch": 1.8216248329125204, - "grad_norm": 1.6520384550094604, - "learning_rate": 1.9070251002524875e-05, - "loss": 0.0714, + "epoch": 3.643249665825041, + "grad_norm": 1.6331576108932495, + "learning_rate": 6.356750334174959e-06, + "loss": 0.0806, "step": 24530 }, { - "epoch": 1.822367443932868, - "grad_norm": 1.908644676208496, - "learning_rate": 1.9065795336402793e-05, - "loss": 0.0808, + "epoch": 3.644734887865736, + "grad_norm": 0.8690558075904846, + "learning_rate": 6.3552651121342645e-06, + "loss": 0.0699, "step": 24540 }, { - "epoch": 1.8231100549532155, - "grad_norm": 1.606765866279602, - "learning_rate": 1.906133967028071e-05, - "loss": 0.0944, + "epoch": 3.646220109906431, + "grad_norm": 0.4748252332210541, + "learning_rate": 6.35377989009357e-06, + "loss": 0.0452, "step": 24550 }, { - "epoch": 1.823852665973563, - "grad_norm": 1.8764336109161377, - "learning_rate": 1.905688400415862e-05, - "loss": 0.105, + "epoch": 3.647705331947126, + "grad_norm": 0.7027367949485779, + "learning_rate": 6.352294668052874e-06, + "loss": 0.0549, "step": 24560 }, { - "epoch": 1.8245952769939104, - "grad_norm": 1.009965419769287, - "learning_rate": 1.905242833803654e-05, - "loss": 0.039, + "epoch": 3.6491905539878213, + "grad_norm": 0.8721972703933716, + "learning_rate": 6.3508094460121795e-06, + "loss": 0.0716, "step": 24570 }, { - "epoch": 1.8253378880142581, - "grad_norm": 1.2469267845153809, - "learning_rate": 1.9047972671914453e-05, - "loss": 0.0539, + "epoch": 3.6506757760285162, + "grad_norm": 0.5169384479522705, + "learning_rate": 6.349324223971485e-06, + "loss": 0.0529, "step": 24580 }, { - "epoch": 1.8260804990346058, - "grad_norm": 2.306645393371582, - "learning_rate": 1.9043517005792365e-05, - "loss": 0.0849, + "epoch": 3.6521609980692116, + "grad_norm": 0.9389125108718872, + "learning_rate": 6.347839001930789e-06, + "loss": 0.0567, "step": 24590 }, { - "epoch": 1.8268231100549532, - "grad_norm": 2.0731003284454346, - "learning_rate": 1.903906133967028e-05, - "loss": 0.082, + "epoch": 3.6536462201099065, + "grad_norm": 0.5857188701629639, + "learning_rate": 6.3463537798900945e-06, + "loss": 0.0657, "step": 24600 }, { - "epoch": 1.8275657210753007, - "grad_norm": 2.53838849067688, - "learning_rate": 1.9034605673548195e-05, - "loss": 0.0964, + "epoch": 3.6551314421506014, + "grad_norm": 1.0817548036575317, + "learning_rate": 6.344868557849398e-06, + "loss": 0.0675, "step": 24610 }, { - "epoch": 1.8283083320956481, - "grad_norm": 0.8338577747344971, - "learning_rate": 1.903015000742611e-05, - "loss": 0.0842, + "epoch": 3.6566166641912967, + "grad_norm": 0.20750167965888977, + "learning_rate": 6.343383335808704e-06, + "loss": 0.0729, "step": 24620 }, { - "epoch": 1.8290509431159958, - "grad_norm": 0.40346524119377136, - "learning_rate": 1.9025694341304025e-05, - "loss": 0.0786, + "epoch": 3.6581018862319916, + "grad_norm": 0.8091875910758972, + "learning_rate": 6.3418981137680095e-06, + "loss": 0.077, "step": 24630 }, { - "epoch": 1.8297935541363435, - "grad_norm": 3.7028415203094482, - "learning_rate": 1.902123867518194e-05, - "loss": 0.0967, + "epoch": 3.659587108272687, + "grad_norm": 1.0680159330368042, + "learning_rate": 6.340412891727313e-06, + "loss": 0.0823, "step": 24640 }, { - "epoch": 1.830536165156691, - "grad_norm": 1.1305569410324097, - "learning_rate": 1.9016783009059855e-05, - "loss": 0.0596, + "epoch": 3.661072330313382, + "grad_norm": 0.6819460988044739, + "learning_rate": 6.338927669686618e-06, + "loss": 0.0765, "step": 24650 }, { - "epoch": 1.8312787761770384, - "grad_norm": 1.63505220413208, - "learning_rate": 1.901232734293777e-05, - "loss": 0.0985, + "epoch": 3.6625575523540768, + "grad_norm": 0.5348358750343323, + "learning_rate": 6.337442447645924e-06, + "loss": 0.0616, "step": 24660 }, { - "epoch": 1.832021387197386, - "grad_norm": 1.5454314947128296, - "learning_rate": 1.9007871676815685e-05, - "loss": 0.0677, + "epoch": 3.664042774394772, + "grad_norm": 0.5871132016181946, + "learning_rate": 6.335957225605228e-06, + "loss": 0.0572, "step": 24670 }, { - "epoch": 1.8327639982177335, - "grad_norm": 1.4274262189865112, - "learning_rate": 1.90034160106936e-05, - "loss": 0.0681, + "epoch": 3.665527996435467, + "grad_norm": 1.407729983329773, + "learning_rate": 6.334472003564533e-06, + "loss": 0.0809, "step": 24680 }, { - "epoch": 1.8335066092380812, - "grad_norm": 0.888826847076416, - "learning_rate": 1.8998960344571515e-05, - "loss": 0.0922, + "epoch": 3.6670132184761624, + "grad_norm": 1.2429784536361694, + "learning_rate": 6.332986781523839e-06, + "loss": 0.077, "step": 24690 }, { - "epoch": 1.8342492202584286, - "grad_norm": 0.6009930968284607, - "learning_rate": 1.8994504678449427e-05, - "loss": 0.08, + "epoch": 3.6684984405168573, + "grad_norm": 0.5828059911727905, + "learning_rate": 6.331501559483143e-06, + "loss": 0.0546, "step": 24700 }, { - "epoch": 1.834991831278776, - "grad_norm": 1.8012096881866455, - "learning_rate": 1.8990049012327345e-05, - "loss": 0.0726, + "epoch": 3.669983662557552, + "grad_norm": 0.8209594488143921, + "learning_rate": 6.330016337442448e-06, + "loss": 0.0577, "step": 24710 }, { - "epoch": 1.8357344422991237, - "grad_norm": 2.0512189865112305, - "learning_rate": 1.8985593346205257e-05, - "loss": 0.089, + "epoch": 3.6714688845982475, + "grad_norm": 0.982406497001648, + "learning_rate": 6.328531115401753e-06, + "loss": 0.048, "step": 24720 }, { - "epoch": 1.8364770533194714, - "grad_norm": 1.3309962749481201, - "learning_rate": 1.898113768008317e-05, - "loss": 0.042, + "epoch": 3.6729541066389424, + "grad_norm": 0.6951733827590942, + "learning_rate": 6.327045893361058e-06, + "loss": 0.0392, "step": 24730 }, { - "epoch": 1.8372196643398189, - "grad_norm": 2.834207057952881, - "learning_rate": 1.897668201396109e-05, - "loss": 0.0774, + "epoch": 3.6744393286796377, + "grad_norm": 0.7873642444610596, + "learning_rate": 6.325560671320363e-06, + "loss": 0.0755, "step": 24740 }, { - "epoch": 1.8379622753601663, - "grad_norm": 0.6929059028625488, - "learning_rate": 1.8972226347839e-05, - "loss": 0.0601, + "epoch": 3.6759245507203326, + "grad_norm": 0.5620073080062866, + "learning_rate": 6.324075449279668e-06, + "loss": 0.0514, "step": 24750 }, { - "epoch": 1.8387048863805138, - "grad_norm": 2.1518747806549072, - "learning_rate": 1.8967770681716917e-05, - "loss": 0.0647, + "epoch": 3.6774097727610275, + "grad_norm": 0.9963449835777283, + "learning_rate": 6.322590227238973e-06, + "loss": 0.0903, "step": 24760 }, { - "epoch": 1.8394474974008614, - "grad_norm": 1.8092671632766724, - "learning_rate": 1.896331501559483e-05, - "loss": 0.0596, + "epoch": 3.678894994801723, + "grad_norm": 0.5815417766571045, + "learning_rate": 6.321105005198278e-06, + "loss": 0.0651, "step": 24770 }, { - "epoch": 1.8401901084212091, - "grad_norm": 1.499583125114441, - "learning_rate": 1.8958859349472747e-05, - "loss": 0.0625, + "epoch": 3.680380216842418, + "grad_norm": 0.6440846920013428, + "learning_rate": 6.319619783157582e-06, + "loss": 0.0617, "step": 24780 }, { - "epoch": 1.8409327194415566, - "grad_norm": 1.1313135623931885, - "learning_rate": 1.895440368335066e-05, - "loss": 0.0761, + "epoch": 3.681865438883113, + "grad_norm": 1.67220139503479, + "learning_rate": 6.318134561116888e-06, + "loss": 0.0821, "step": 24790 }, { - "epoch": 1.841675330461904, - "grad_norm": 3.332144260406494, - "learning_rate": 1.8949948017228577e-05, - "loss": 0.1059, + "epoch": 3.683350660923808, + "grad_norm": 0.5515984296798706, + "learning_rate": 6.316649339076193e-06, + "loss": 0.0559, "step": 24800 }, { - "epoch": 1.8424179414822515, - "grad_norm": 1.193617820739746, - "learning_rate": 1.894549235110649e-05, - "loss": 0.0897, + "epoch": 3.684835882964503, + "grad_norm": 0.6719434857368469, + "learning_rate": 6.315164117035497e-06, + "loss": 0.048, "step": 24810 }, { - "epoch": 1.8431605525025991, - "grad_norm": 1.0771690607070923, - "learning_rate": 1.8941036684984407e-05, - "loss": 0.0709, + "epoch": 3.6863211050051983, + "grad_norm": 0.9868667721748352, + "learning_rate": 6.313678894994802e-06, + "loss": 0.0769, "step": 24820 }, { - "epoch": 1.8439031635229468, - "grad_norm": 2.919710874557495, - "learning_rate": 1.8936581018862318e-05, - "loss": 0.0729, + "epoch": 3.6878063270458936, + "grad_norm": 0.6065626740455627, + "learning_rate": 6.312193672954107e-06, + "loss": 0.0749, "step": 24830 }, { - "epoch": 1.8446457745432943, - "grad_norm": 1.2123440504074097, - "learning_rate": 1.8932125352740237e-05, - "loss": 0.0586, + "epoch": 3.6892915490865885, + "grad_norm": 0.5721997618675232, + "learning_rate": 6.310708450913412e-06, + "loss": 0.0541, "step": 24840 }, { - "epoch": 1.8453883855636417, - "grad_norm": 2.8051271438598633, - "learning_rate": 1.892766968661815e-05, - "loss": 0.1141, + "epoch": 3.6907767711272834, + "grad_norm": 0.5374056100845337, + "learning_rate": 6.309223228872717e-06, + "loss": 0.058, "step": 24850 }, { - "epoch": 1.8461309965839892, - "grad_norm": 1.1732330322265625, - "learning_rate": 1.8923214020496063e-05, - "loss": 0.0702, + "epoch": 3.6922619931679788, + "grad_norm": 0.7645929455757141, + "learning_rate": 6.307738006832022e-06, + "loss": 0.0908, "step": 24860 }, { - "epoch": 1.8468736076043368, - "grad_norm": 1.468248963356018, - "learning_rate": 1.8918758354373978e-05, - "loss": 0.0755, + "epoch": 3.6937472152086737, + "grad_norm": 0.4349495470523834, + "learning_rate": 6.306252784791327e-06, + "loss": 0.0519, "step": 24870 }, { - "epoch": 1.8476162186246845, - "grad_norm": 1.1212787628173828, - "learning_rate": 1.8914302688251896e-05, - "loss": 0.0733, + "epoch": 3.695232437249369, + "grad_norm": 0.8713021278381348, + "learning_rate": 6.304767562750632e-06, + "loss": 0.0636, "step": 24880 }, { - "epoch": 1.848358829645032, - "grad_norm": 1.3834295272827148, - "learning_rate": 1.8909847022129808e-05, - "loss": 0.0582, + "epoch": 3.696717659290064, + "grad_norm": 1.642775535583496, + "learning_rate": 6.303282340709937e-06, + "loss": 0.0604, "step": 24890 }, { - "epoch": 1.8491014406653794, - "grad_norm": 1.426999807357788, - "learning_rate": 1.8905391356007723e-05, - "loss": 0.0763, + "epoch": 3.698202881330759, + "grad_norm": 1.0799657106399536, + "learning_rate": 6.301797118669242e-06, + "loss": 0.0684, "step": 24900 }, { - "epoch": 1.8498440516857269, - "grad_norm": 1.5629621744155884, - "learning_rate": 1.890093568988564e-05, - "loss": 0.0593, + "epoch": 3.699688103371454, + "grad_norm": 1.1717464923858643, + "learning_rate": 6.300311896628547e-06, + "loss": 0.0724, "step": 24910 }, { - "epoch": 1.8505866627060745, - "grad_norm": 1.6968507766723633, - "learning_rate": 1.8896480023763553e-05, - "loss": 0.1121, + "epoch": 3.701173325412149, + "grad_norm": 0.6183547973632812, + "learning_rate": 6.298826674587852e-06, + "loss": 0.0655, "step": 24920 }, { - "epoch": 1.8513292737264222, - "grad_norm": 1.8145391941070557, - "learning_rate": 1.8892024357641468e-05, - "loss": 0.0594, + "epoch": 3.7026585474528444, + "grad_norm": 1.1880210638046265, + "learning_rate": 6.297341452547157e-06, + "loss": 0.0579, "step": 24930 }, { - "epoch": 1.8520718847467696, - "grad_norm": 2.452359914779663, - "learning_rate": 1.888756869151938e-05, - "loss": 0.0626, + "epoch": 3.7041437694935393, + "grad_norm": 0.7819348573684692, + "learning_rate": 6.2958562305064605e-06, + "loss": 0.0859, "step": 24940 }, { - "epoch": 1.852814495767117, - "grad_norm": 0.7949011921882629, - "learning_rate": 1.8883113025397298e-05, - "loss": 0.0816, + "epoch": 3.705628991534234, + "grad_norm": 0.2400822639465332, + "learning_rate": 6.294371008465766e-06, + "loss": 0.0696, "step": 24950 }, { - "epoch": 1.8535571067874648, - "grad_norm": 1.0505071878433228, - "learning_rate": 1.8878657359275213e-05, - "loss": 0.0874, + "epoch": 3.7071142135749295, + "grad_norm": 0.9348524212837219, + "learning_rate": 6.292885786425071e-06, + "loss": 0.0821, "step": 24960 }, { - "epoch": 1.8542997178078122, - "grad_norm": 1.7781516313552856, - "learning_rate": 1.8874201693153125e-05, - "loss": 0.0677, + "epoch": 3.7085994356156244, + "grad_norm": 0.6501826047897339, + "learning_rate": 6.2914005643843755e-06, + "loss": 0.0694, "step": 24970 }, { - "epoch": 1.85504232882816, - "grad_norm": 1.9626563787460327, - "learning_rate": 1.8869746027031043e-05, - "loss": 0.0692, + "epoch": 3.71008465765632, + "grad_norm": 0.9616579413414001, + "learning_rate": 6.289915342343681e-06, + "loss": 0.0566, "step": 24980 }, { - "epoch": 1.8557849398485073, - "grad_norm": 0.7430385947227478, - "learning_rate": 1.8865290360908958e-05, - "loss": 0.0372, + "epoch": 3.7115698796970147, + "grad_norm": 0.6568595170974731, + "learning_rate": 6.288430120302986e-06, + "loss": 0.0726, "step": 24990 }, { - "epoch": 1.8565275508688548, - "grad_norm": 2.076448440551758, - "learning_rate": 1.886083469478687e-05, - "loss": 0.0702, + "epoch": 3.7130551017377096, + "grad_norm": 0.4263269305229187, + "learning_rate": 6.2869448982622904e-06, + "loss": 0.0647, "step": 25000 }, { - "epoch": 1.8572701618892025, - "grad_norm": 1.5963934659957886, - "learning_rate": 1.8856379028664788e-05, - "loss": 0.0672, + "epoch": 3.714540323778405, + "grad_norm": 0.9449558258056641, + "learning_rate": 6.285459676221596e-06, + "loss": 0.0471, "step": 25010 }, { - "epoch": 1.8580127729095501, - "grad_norm": 2.167839765548706, - "learning_rate": 1.88519233625427e-05, - "loss": 0.0642, + "epoch": 3.7160255458191, + "grad_norm": 0.7658859491348267, + "learning_rate": 6.2839744541809e-06, + "loss": 0.0626, "step": 25020 }, { - "epoch": 1.8587553839298976, - "grad_norm": 1.1535776853561401, - "learning_rate": 1.8847467696420615e-05, - "loss": 0.1037, + "epoch": 3.717510767859795, + "grad_norm": 1.5539830923080444, + "learning_rate": 6.2824892321402054e-06, + "loss": 0.0663, "step": 25030 }, { - "epoch": 1.859497994950245, - "grad_norm": 0.5450434684753418, - "learning_rate": 1.884301203029853e-05, - "loss": 0.0663, + "epoch": 3.71899598990049, + "grad_norm": 1.5915319919586182, + "learning_rate": 6.281004010099511e-06, + "loss": 0.0729, "step": 25040 }, { - "epoch": 1.8602406059705925, - "grad_norm": 1.2037556171417236, - "learning_rate": 1.8838556364176445e-05, - "loss": 0.0856, + "epoch": 3.720481211941185, + "grad_norm": 0.37841907143592834, + "learning_rate": 6.279518788058815e-06, + "loss": 0.0725, "step": 25050 }, { - "epoch": 1.8609832169909402, - "grad_norm": 1.5233439207077026, - "learning_rate": 1.883410069805436e-05, - "loss": 0.0765, + "epoch": 3.7219664339818803, + "grad_norm": 1.779516339302063, + "learning_rate": 6.2780335660181204e-06, + "loss": 0.0609, "step": 25060 }, { - "epoch": 1.8617258280112878, - "grad_norm": 1.9642084836959839, - "learning_rate": 1.8829645031932275e-05, - "loss": 0.0701, + "epoch": 3.723451656022575, + "grad_norm": 0.7953817844390869, + "learning_rate": 6.276548343977426e-06, + "loss": 0.0695, "step": 25070 }, { - "epoch": 1.8624684390316353, - "grad_norm": 4.366384983062744, - "learning_rate": 1.882518936581019e-05, - "loss": 0.0879, + "epoch": 3.7249368780632706, + "grad_norm": 0.8810813426971436, + "learning_rate": 6.27506312193673e-06, + "loss": 0.0714, "step": 25080 }, { - "epoch": 1.8632110500519827, - "grad_norm": 1.4830248355865479, - "learning_rate": 1.8820733699688105e-05, - "loss": 0.0753, + "epoch": 3.7264221001039655, + "grad_norm": 1.2131332159042358, + "learning_rate": 6.2735778998960354e-06, + "loss": 0.0642, "step": 25090 }, { - "epoch": 1.8639536610723302, - "grad_norm": 0.9438735842704773, - "learning_rate": 1.881627803356602e-05, - "loss": 0.0621, + "epoch": 3.7279073221446604, + "grad_norm": 1.101547360420227, + "learning_rate": 6.272092677855341e-06, + "loss": 0.0596, "step": 25100 }, { - "epoch": 1.8646962720926779, - "grad_norm": 1.960681438446045, - "learning_rate": 1.881182236744393e-05, - "loss": 0.102, + "epoch": 3.7293925441853557, + "grad_norm": 0.7129486203193665, + "learning_rate": 6.270607455814644e-06, + "loss": 0.0804, "step": 25110 }, { - "epoch": 1.8654388831130255, - "grad_norm": 0.6813110709190369, - "learning_rate": 1.880736670132185e-05, - "loss": 0.0515, + "epoch": 3.730877766226051, + "grad_norm": 0.33758345246315, + "learning_rate": 6.26912223377395e-06, + "loss": 0.0565, "step": 25120 }, { - "epoch": 1.866181494133373, - "grad_norm": 2.467663049697876, - "learning_rate": 1.880291103519976e-05, - "loss": 0.0824, + "epoch": 3.732362988266746, + "grad_norm": 0.6880179047584534, + "learning_rate": 6.267637011733254e-06, + "loss": 0.0703, "step": 25130 }, { - "epoch": 1.8669241051537204, - "grad_norm": 0.4229584336280823, - "learning_rate": 1.8798455369077676e-05, - "loss": 0.0507, + "epoch": 3.733848210307441, + "grad_norm": 0.6616323590278625, + "learning_rate": 6.266151789692559e-06, + "loss": 0.0585, "step": 25140 }, { - "epoch": 1.8676667161740679, - "grad_norm": 1.0609776973724365, - "learning_rate": 1.8793999702955595e-05, - "loss": 0.0637, + "epoch": 3.735333432348136, + "grad_norm": 0.4032650887966156, + "learning_rate": 6.2646665676518646e-06, + "loss": 0.0564, "step": 25150 }, { - "epoch": 1.8684093271944155, - "grad_norm": 1.177756905555725, - "learning_rate": 1.8789544036833506e-05, - "loss": 0.0679, + "epoch": 3.736818654388831, + "grad_norm": 0.6478084325790405, + "learning_rate": 6.263181345611169e-06, + "loss": 0.0631, "step": 25160 }, { - "epoch": 1.8691519382147632, - "grad_norm": 1.098395586013794, - "learning_rate": 1.878508837071142e-05, - "loss": 0.0546, + "epoch": 3.7383038764295264, + "grad_norm": 0.5424610376358032, + "learning_rate": 6.261696123570474e-06, + "loss": 0.0668, "step": 25170 }, { - "epoch": 1.8698945492351107, - "grad_norm": 0.5572521686553955, - "learning_rate": 1.8780632704589336e-05, - "loss": 0.0842, + "epoch": 3.7397890984702213, + "grad_norm": 0.5494372844696045, + "learning_rate": 6.2602109015297796e-06, + "loss": 0.075, "step": 25180 }, { - "epoch": 1.8706371602554581, - "grad_norm": 1.3587162494659424, - "learning_rate": 1.877617703846725e-05, - "loss": 0.0654, + "epoch": 3.7412743205109162, + "grad_norm": 0.7170408964157104, + "learning_rate": 6.258725679489084e-06, + "loss": 0.0684, "step": 25190 }, { - "epoch": 1.8713797712758056, - "grad_norm": 4.091668605804443, - "learning_rate": 1.8771721372345166e-05, - "loss": 0.1023, + "epoch": 3.7427595425516116, + "grad_norm": 1.0184485912322998, + "learning_rate": 6.257240457448389e-06, + "loss": 0.0612, "step": 25200 }, { - "epoch": 1.8721223822961532, - "grad_norm": 2.425302267074585, - "learning_rate": 1.876726570622308e-05, - "loss": 0.0869, + "epoch": 3.7442447645923065, + "grad_norm": 1.0246021747589111, + "learning_rate": 6.2557552354076946e-06, + "loss": 0.0739, "step": 25210 }, { - "epoch": 1.872864993316501, - "grad_norm": 0.6244649291038513, - "learning_rate": 1.8762810040100996e-05, - "loss": 0.0898, + "epoch": 3.745729986633002, + "grad_norm": 0.57315993309021, + "learning_rate": 6.254270013366999e-06, + "loss": 0.056, "step": 25220 }, { - "epoch": 1.8736076043368484, - "grad_norm": 0.962476372718811, - "learning_rate": 1.875835437397891e-05, - "loss": 0.0778, + "epoch": 3.7472152086736967, + "grad_norm": 1.1567462682724, + "learning_rate": 6.252784791326304e-06, + "loss": 0.0506, "step": 25230 }, { - "epoch": 1.8743502153571958, - "grad_norm": 3.0341858863830566, - "learning_rate": 1.8753898707856823e-05, - "loss": 0.0679, + "epoch": 3.7487004307143916, + "grad_norm": 0.8292871117591858, + "learning_rate": 6.251299569285608e-06, + "loss": 0.074, "step": 25240 }, { - "epoch": 1.8750928263775435, - "grad_norm": 1.5342912673950195, - "learning_rate": 1.874944304173474e-05, - "loss": 0.0834, + "epoch": 3.750185652755087, + "grad_norm": 0.37577736377716064, + "learning_rate": 6.249814347244913e-06, + "loss": 0.0611, "step": 25250 }, { - "epoch": 1.875835437397891, - "grad_norm": 2.197174549102783, - "learning_rate": 1.8744987375612656e-05, - "loss": 0.0582, + "epoch": 3.751670874795782, + "grad_norm": 0.8664168119430542, + "learning_rate": 6.248329125204219e-06, + "loss": 0.0701, "step": 25260 }, { - "epoch": 1.8765780484182386, - "grad_norm": 1.7695233821868896, - "learning_rate": 1.8740531709490568e-05, - "loss": 0.0797, + "epoch": 3.753156096836477, + "grad_norm": 0.5670032501220703, + "learning_rate": 6.246843903163523e-06, + "loss": 0.0905, "step": 25270 }, { - "epoch": 1.877320659438586, - "grad_norm": 1.5914796590805054, - "learning_rate": 1.8736076043368483e-05, - "loss": 0.089, + "epoch": 3.754641318877172, + "grad_norm": 1.0470136404037476, + "learning_rate": 6.245358681122828e-06, + "loss": 0.0677, "step": 25280 }, { - "epoch": 1.8780632704589335, - "grad_norm": 3.123690128326416, - "learning_rate": 1.87316203772464e-05, - "loss": 0.069, + "epoch": 3.756126540917867, + "grad_norm": 0.6110045313835144, + "learning_rate": 6.243873459082133e-06, + "loss": 0.0588, "step": 25290 }, { - "epoch": 1.8788058814792812, - "grad_norm": 2.2254252433776855, - "learning_rate": 1.8727164711124313e-05, - "loss": 0.1029, + "epoch": 3.7576117629585624, + "grad_norm": 0.8389524221420288, + "learning_rate": 6.242388237041438e-06, + "loss": 0.0741, "step": 25300 }, { - "epoch": 1.8795484924996289, - "grad_norm": 1.571058750152588, - "learning_rate": 1.8722709045002228e-05, - "loss": 0.0705, + "epoch": 3.7590969849992573, + "grad_norm": 1.4117416143417358, + "learning_rate": 6.240903015000743e-06, + "loss": 0.0738, "step": 25310 }, { - "epoch": 1.8802911035199763, - "grad_norm": 1.3334083557128906, - "learning_rate": 1.8718253378880146e-05, - "loss": 0.0961, + "epoch": 3.7605822070399526, + "grad_norm": 0.7682881355285645, + "learning_rate": 6.239417792960048e-06, + "loss": 0.0679, "step": 25320 }, { - "epoch": 1.8810337145403238, - "grad_norm": 3.364617109298706, - "learning_rate": 1.8713797712758058e-05, - "loss": 0.0876, + "epoch": 3.7620674290806475, + "grad_norm": 0.460290789604187, + "learning_rate": 6.237932570919353e-06, + "loss": 0.0719, "step": 25330 }, { - "epoch": 1.8817763255606712, - "grad_norm": 0.7864534258842468, - "learning_rate": 1.8709342046635973e-05, - "loss": 0.0615, + "epoch": 3.7635526511213424, + "grad_norm": 0.7808231711387634, + "learning_rate": 6.236447348878658e-06, + "loss": 0.072, "step": 25340 }, { - "epoch": 1.8825189365810189, - "grad_norm": 2.1435587406158447, - "learning_rate": 1.8704886380513884e-05, - "loss": 0.0666, + "epoch": 3.7650378731620378, + "grad_norm": 0.7019503712654114, + "learning_rate": 6.2349621268379626e-06, + "loss": 0.0669, "step": 25350 }, { - "epoch": 1.8832615476013665, - "grad_norm": 1.164170503616333, - "learning_rate": 1.8700430714391803e-05, - "loss": 0.0619, + "epoch": 3.7665230952027327, + "grad_norm": 0.4001297354698181, + "learning_rate": 6.233476904797268e-06, + "loss": 0.0469, "step": 25360 }, { - "epoch": 1.884004158621714, - "grad_norm": 2.059136390686035, - "learning_rate": 1.8695975048269718e-05, - "loss": 0.0787, + "epoch": 3.768008317243428, + "grad_norm": 0.43783921003341675, + "learning_rate": 6.231991682756573e-06, + "loss": 0.0635, "step": 25370 }, { - "epoch": 1.8847467696420614, - "grad_norm": 0.4554833173751831, - "learning_rate": 1.869151938214763e-05, - "loss": 0.0513, + "epoch": 3.769493539284123, + "grad_norm": 2.0623133182525635, + "learning_rate": 6.2305064607158776e-06, + "loss": 0.0614, "step": 25380 }, { - "epoch": 1.885489380662409, - "grad_norm": 0.9922448396682739, - "learning_rate": 1.8687063716025548e-05, - "loss": 0.0634, + "epoch": 3.770978761324818, + "grad_norm": 0.8735755681991577, + "learning_rate": 6.229021238675183e-06, + "loss": 0.0732, "step": 25390 }, { - "epoch": 1.8862319916827566, - "grad_norm": 1.9916400909423828, - "learning_rate": 1.8682608049903463e-05, - "loss": 0.0695, + "epoch": 3.772463983365513, + "grad_norm": 0.5950528979301453, + "learning_rate": 6.227536016634488e-06, + "loss": 0.0434, "step": 25400 }, { - "epoch": 1.8869746027031042, - "grad_norm": 0.31023046374320984, - "learning_rate": 1.8678152383781374e-05, - "loss": 0.0799, + "epoch": 3.7739492054062085, + "grad_norm": 1.145756483078003, + "learning_rate": 6.226050794593792e-06, + "loss": 0.0663, "step": 25410 }, { - "epoch": 1.8877172137234517, - "grad_norm": 1.627617597579956, - "learning_rate": 1.8673696717659293e-05, - "loss": 0.0685, + "epoch": 3.7754344274469034, + "grad_norm": 0.9675944447517395, + "learning_rate": 6.224565572553097e-06, + "loss": 0.051, "step": 25420 }, { - "epoch": 1.8884598247437991, - "grad_norm": 1.0137081146240234, - "learning_rate": 1.8669241051537208e-05, - "loss": 0.0887, + "epoch": 3.7769196494875983, + "grad_norm": 0.4051510989665985, + "learning_rate": 6.223080350512402e-06, + "loss": 0.0643, "step": 25430 }, { - "epoch": 1.8892024357641466, - "grad_norm": 1.8060729503631592, - "learning_rate": 1.866478538541512e-05, - "loss": 0.0443, + "epoch": 3.7784048715282936, + "grad_norm": 1.5336406230926514, + "learning_rate": 6.221595128471707e-06, + "loss": 0.0639, "step": 25440 }, { - "epoch": 1.8899450467844943, - "grad_norm": 1.671414852142334, - "learning_rate": 1.8660329719293034e-05, - "loss": 0.078, + "epoch": 3.7798900935689885, + "grad_norm": 1.1730071306228638, + "learning_rate": 6.220109906431012e-06, + "loss": 0.0547, "step": 25450 }, { - "epoch": 1.890687657804842, - "grad_norm": 1.3879966735839844, - "learning_rate": 1.865587405317095e-05, - "loss": 0.0741, + "epoch": 3.781375315609684, + "grad_norm": 0.4078196585178375, + "learning_rate": 6.218624684390316e-06, + "loss": 0.0613, "step": 25460 }, { - "epoch": 1.8914302688251894, - "grad_norm": 2.2087647914886475, - "learning_rate": 1.8651418387048864e-05, - "loss": 0.0602, + "epoch": 3.7828605376503788, + "grad_norm": 0.5529675483703613, + "learning_rate": 6.217139462349622e-06, + "loss": 0.0591, "step": 25470 }, { - "epoch": 1.8921728798455368, - "grad_norm": 1.051397681236267, - "learning_rate": 1.864696272092678e-05, - "loss": 0.0936, + "epoch": 3.7843457596910737, + "grad_norm": 0.8793727159500122, + "learning_rate": 6.215654240308927e-06, + "loss": 0.0599, "step": 25480 }, { - "epoch": 1.8929154908658843, - "grad_norm": 1.432411789894104, - "learning_rate": 1.8642507054804694e-05, - "loss": 0.0838, + "epoch": 3.785830981731769, + "grad_norm": 0.46623972058296204, + "learning_rate": 6.214169018268231e-06, + "loss": 0.0668, "step": 25490 }, { - "epoch": 1.893658101886232, - "grad_norm": 2.1286797523498535, - "learning_rate": 1.863805138868261e-05, - "loss": 0.0594, + "epoch": 3.787316203772464, + "grad_norm": 0.8581152558326721, + "learning_rate": 6.212683796227537e-06, + "loss": 0.0548, "step": 25500 }, { - "epoch": 1.8944007129065796, - "grad_norm": 2.0686354637145996, - "learning_rate": 1.8633595722560524e-05, - "loss": 0.0601, + "epoch": 3.7888014258131593, + "grad_norm": 1.0149903297424316, + "learning_rate": 6.211198574186842e-06, + "loss": 0.0669, "step": 25510 }, { - "epoch": 1.895143323926927, - "grad_norm": 1.1248515844345093, - "learning_rate": 1.8629140056438436e-05, - "loss": 0.0595, + "epoch": 3.790286647853854, + "grad_norm": 0.6500771045684814, + "learning_rate": 6.209713352146146e-06, + "loss": 0.0825, "step": 25520 }, { - "epoch": 1.8958859349472745, - "grad_norm": 1.444861650466919, - "learning_rate": 1.8624684390316354e-05, - "loss": 0.0484, + "epoch": 3.791771869894549, + "grad_norm": 0.389143705368042, + "learning_rate": 6.208228130105452e-06, + "loss": 0.0635, "step": 25530 }, { - "epoch": 1.8966285459676222, - "grad_norm": 0.935176432132721, - "learning_rate": 1.8620228724194266e-05, - "loss": 0.0826, + "epoch": 3.7932570919352444, + "grad_norm": 0.9267625212669373, + "learning_rate": 6.206742908064755e-06, + "loss": 0.0611, "step": 25540 }, { - "epoch": 1.8973711569879697, - "grad_norm": 1.5523897409439087, - "learning_rate": 1.861577305807218e-05, - "loss": 0.0676, + "epoch": 3.7947423139759393, + "grad_norm": 0.9734875559806824, + "learning_rate": 6.205257686024061e-06, + "loss": 0.0596, "step": 25550 }, { - "epoch": 1.8981137680083173, - "grad_norm": 0.7804394364356995, - "learning_rate": 1.86113173919501e-05, - "loss": 0.0848, + "epoch": 3.7962275360166347, + "grad_norm": 1.2023221254348755, + "learning_rate": 6.203772463983367e-06, + "loss": 0.0666, "step": 25560 }, { - "epoch": 1.8988563790286648, - "grad_norm": 2.2378127574920654, - "learning_rate": 1.860686172582801e-05, - "loss": 0.0884, + "epoch": 3.7977127580573296, + "grad_norm": 0.41107192635536194, + "learning_rate": 6.20228724194267e-06, + "loss": 0.0587, "step": 25570 }, { - "epoch": 1.8995989900490122, - "grad_norm": 0.5532150268554688, - "learning_rate": 1.8602406059705926e-05, - "loss": 0.0641, + "epoch": 3.7991979800980245, + "grad_norm": 0.35149720311164856, + "learning_rate": 6.2008020199019755e-06, + "loss": 0.0672, "step": 25580 }, { - "epoch": 1.90034160106936, - "grad_norm": 0.7949833869934082, - "learning_rate": 1.859795039358384e-05, - "loss": 0.0543, + "epoch": 3.80068320213872, + "grad_norm": 0.6217876076698303, + "learning_rate": 6.199316797861281e-06, + "loss": 0.0551, "step": 25590 }, { - "epoch": 1.9010842120897076, - "grad_norm": 1.0134397745132446, - "learning_rate": 1.8593494727461756e-05, - "loss": 0.0702, + "epoch": 3.8021684241794147, + "grad_norm": 0.991555392742157, + "learning_rate": 6.197831575820585e-06, + "loss": 0.0549, "step": 25600 }, { - "epoch": 1.901826823110055, - "grad_norm": 1.708309531211853, - "learning_rate": 1.858903906133967e-05, - "loss": 0.0884, + "epoch": 3.80365364622011, + "grad_norm": 0.6766214370727539, + "learning_rate": 6.1963463537798905e-06, + "loss": 0.0671, "step": 25610 }, { - "epoch": 1.9025694341304025, - "grad_norm": 0.8126017451286316, - "learning_rate": 1.8584583395217586e-05, - "loss": 0.0615, + "epoch": 3.805138868260805, + "grad_norm": 0.981841504573822, + "learning_rate": 6.194861131739196e-06, + "loss": 0.0826, "step": 25620 }, { - "epoch": 1.90331204515075, - "grad_norm": 1.7196837663650513, - "learning_rate": 1.85801277290955e-05, - "loss": 0.0671, + "epoch": 3.8066240903015, + "grad_norm": 0.48015373945236206, + "learning_rate": 6.1933759096985e-06, + "loss": 0.0626, "step": 25630 }, { - "epoch": 1.9040546561710976, - "grad_norm": 2.439037561416626, - "learning_rate": 1.8575672062973416e-05, - "loss": 0.0789, + "epoch": 3.808109312342195, + "grad_norm": 1.0952637195587158, + "learning_rate": 6.1918906876578055e-06, + "loss": 0.0672, "step": 25640 }, { - "epoch": 1.9047972671914453, - "grad_norm": 0.7953950762748718, - "learning_rate": 1.8571216396851327e-05, - "loss": 0.0709, + "epoch": 3.80959453438289, + "grad_norm": 0.9464254379272461, + "learning_rate": 6.19040546561711e-06, + "loss": 0.0747, "step": 25650 }, { - "epoch": 1.9055398782117927, - "grad_norm": 1.4773277044296265, - "learning_rate": 1.8566760730729246e-05, - "loss": 0.0926, + "epoch": 3.8110797564235854, + "grad_norm": 0.8909411430358887, + "learning_rate": 6.188920243576415e-06, + "loss": 0.0625, "step": 25660 }, { - "epoch": 1.9062824892321402, - "grad_norm": 2.0613648891448975, - "learning_rate": 1.856230506460716e-05, - "loss": 0.0926, + "epoch": 3.8125649784642803, + "grad_norm": 0.5844570994377136, + "learning_rate": 6.1874350215357205e-06, + "loss": 0.0595, "step": 25670 }, { - "epoch": 1.9070251002524876, - "grad_norm": 1.0905838012695312, - "learning_rate": 1.8557849398485072e-05, - "loss": 0.0546, + "epoch": 3.8140502005049752, + "grad_norm": 0.5538705587387085, + "learning_rate": 6.185949799495025e-06, + "loss": 0.0721, "step": 25680 }, { - "epoch": 1.9077677112728353, - "grad_norm": 0.6521821618080139, - "learning_rate": 1.8553393732362987e-05, - "loss": 0.0817, + "epoch": 3.8155354225456706, + "grad_norm": 1.0891708135604858, + "learning_rate": 6.18446457745433e-06, + "loss": 0.0582, "step": 25690 }, { - "epoch": 1.908510322293183, - "grad_norm": 1.17433762550354, - "learning_rate": 1.8548938066240906e-05, - "loss": 0.0703, + "epoch": 3.817020644586366, + "grad_norm": 1.3899599313735962, + "learning_rate": 6.1829793554136355e-06, + "loss": 0.066, "step": 25700 }, { - "epoch": 1.9092529333135304, - "grad_norm": 1.6955013275146484, - "learning_rate": 1.8544482400118817e-05, - "loss": 0.0973, + "epoch": 3.818505866627061, + "grad_norm": 1.2233537435531616, + "learning_rate": 6.181494133372939e-06, + "loss": 0.0601, "step": 25710 }, { - "epoch": 1.9099955443338779, - "grad_norm": 2.2255401611328125, - "learning_rate": 1.8540026733996732e-05, - "loss": 0.0678, + "epoch": 3.8199910886677557, + "grad_norm": 1.2534723281860352, + "learning_rate": 6.180008911332244e-06, + "loss": 0.0591, "step": 25720 }, { - "epoch": 1.9107381553542253, - "grad_norm": 1.3039476871490479, - "learning_rate": 1.853557106787465e-05, - "loss": 0.0667, + "epoch": 3.821476310708451, + "grad_norm": 0.7622737288475037, + "learning_rate": 6.1785236892915505e-06, + "loss": 0.0481, "step": 25730 }, { - "epoch": 1.911480766374573, - "grad_norm": 0.46905070543289185, - "learning_rate": 1.8531115401752562e-05, - "loss": 0.0575, + "epoch": 3.822961532749146, + "grad_norm": 0.5585414171218872, + "learning_rate": 6.177038467250854e-06, + "loss": 0.0593, "step": 25740 }, { - "epoch": 1.9122233773949207, - "grad_norm": 0.5787822008132935, - "learning_rate": 1.8526659735630477e-05, - "loss": 0.0515, + "epoch": 3.8244467547898413, + "grad_norm": 0.9674488306045532, + "learning_rate": 6.175553245210159e-06, + "loss": 0.0751, "step": 25750 }, { - "epoch": 1.912965988415268, - "grad_norm": 1.4693628549575806, - "learning_rate": 1.852220406950839e-05, - "loss": 0.0929, + "epoch": 3.825931976830536, + "grad_norm": 0.8868407607078552, + "learning_rate": 6.174068023169464e-06, + "loss": 0.0672, "step": 25760 }, { - "epoch": 1.9137085994356156, - "grad_norm": 1.5744353532791138, - "learning_rate": 1.8517748403386307e-05, - "loss": 0.0589, + "epoch": 3.827417198871231, + "grad_norm": 0.5498301386833191, + "learning_rate": 6.172582801128769e-06, + "loss": 0.0674, "step": 25770 }, { - "epoch": 1.914451210455963, - "grad_norm": 1.8583874702453613, - "learning_rate": 1.8513292737264222e-05, - "loss": 0.0586, + "epoch": 3.8289024209119265, + "grad_norm": 0.7588393092155457, + "learning_rate": 6.171097579088074e-06, + "loss": 0.0926, "step": 25780 }, { - "epoch": 1.9151938214763107, - "grad_norm": 2.5291054248809814, - "learning_rate": 1.8508837071142134e-05, - "loss": 0.0623, + "epoch": 3.8303876429526214, + "grad_norm": 0.8187506794929504, + "learning_rate": 6.169612357047379e-06, + "loss": 0.0668, "step": 25790 }, { - "epoch": 1.9159364324966583, - "grad_norm": 0.9900248050689697, - "learning_rate": 1.8504381405020052e-05, - "loss": 0.0273, + "epoch": 3.8318728649933167, + "grad_norm": 1.0678049325942993, + "learning_rate": 6.168127135006684e-06, + "loss": 0.0522, "step": 25800 }, { - "epoch": 1.9166790435170058, - "grad_norm": 1.1880460977554321, - "learning_rate": 1.8499925738897967e-05, - "loss": 0.0901, + "epoch": 3.8333580870340116, + "grad_norm": 0.4664575755596161, + "learning_rate": 6.166641912965989e-06, + "loss": 0.0593, "step": 25810 }, { - "epoch": 1.9174216545373532, - "grad_norm": 1.0495351552963257, - "learning_rate": 1.849547007277588e-05, - "loss": 0.0592, + "epoch": 3.8348433090747065, + "grad_norm": 0.5488105416297913, + "learning_rate": 6.165156690925294e-06, + "loss": 0.0727, "step": 25820 }, { - "epoch": 1.918164265557701, - "grad_norm": 2.6768858432769775, - "learning_rate": 1.8491014406653797e-05, - "loss": 0.0908, + "epoch": 3.836328531115402, + "grad_norm": 1.337288737297058, + "learning_rate": 6.163671468884599e-06, + "loss": 0.0738, "step": 25830 }, { - "epoch": 1.9189068765780484, - "grad_norm": 1.1184509992599487, - "learning_rate": 1.8486558740531712e-05, - "loss": 0.0594, + "epoch": 3.8378137531560967, + "grad_norm": 0.7804925441741943, + "learning_rate": 6.162186246843904e-06, + "loss": 0.0574, "step": 25840 }, { - "epoch": 1.919649487598396, - "grad_norm": 4.1254401206970215, - "learning_rate": 1.8482103074409624e-05, - "loss": 0.0834, + "epoch": 3.839298975196792, + "grad_norm": 0.784286379814148, + "learning_rate": 6.160701024803209e-06, + "loss": 0.0615, "step": 25850 }, { - "epoch": 1.9203920986187435, - "grad_norm": 0.6216328144073486, - "learning_rate": 1.847764740828754e-05, - "loss": 0.0647, + "epoch": 3.840784197237487, + "grad_norm": 0.9319252371788025, + "learning_rate": 6.159215802762514e-06, + "loss": 0.0705, "step": 25860 }, { - "epoch": 1.921134709639091, - "grad_norm": 2.5887441635131836, - "learning_rate": 1.8473191742165454e-05, - "loss": 0.063, + "epoch": 3.842269419278182, + "grad_norm": 1.4271937608718872, + "learning_rate": 6.157730580721818e-06, + "loss": 0.065, "step": 25870 }, { - "epoch": 1.9218773206594386, - "grad_norm": 1.4074640274047852, - "learning_rate": 1.846873607604337e-05, - "loss": 0.0643, + "epoch": 3.8437546413188772, + "grad_norm": 1.0302149057388306, + "learning_rate": 6.156245358681123e-06, + "loss": 0.0713, "step": 25880 }, { - "epoch": 1.9226199316797863, - "grad_norm": 1.3061528205871582, - "learning_rate": 1.8464280409921284e-05, - "loss": 0.0518, + "epoch": 3.845239863359572, + "grad_norm": 0.7671197056770325, + "learning_rate": 6.154760136640428e-06, + "loss": 0.0585, "step": 25890 }, { - "epoch": 1.9233625427001337, - "grad_norm": 4.136571884155273, - "learning_rate": 1.84598247437992e-05, - "loss": 0.0546, + "epoch": 3.8467250854002675, + "grad_norm": 0.9330741763114929, + "learning_rate": 6.153274914599733e-06, + "loss": 0.0688, "step": 25900 }, { - "epoch": 1.9241051537204812, - "grad_norm": 2.307090997695923, - "learning_rate": 1.8455369077677114e-05, - "loss": 0.0685, + "epoch": 3.8482103074409624, + "grad_norm": 1.258764624595642, + "learning_rate": 6.151789692559038e-06, + "loss": 0.0862, "step": 25910 }, { - "epoch": 1.9248477647408286, - "grad_norm": 1.6441222429275513, - "learning_rate": 1.845091341155503e-05, - "loss": 0.0886, + "epoch": 3.8496955294816573, + "grad_norm": 0.9332870841026306, + "learning_rate": 6.150304470518343e-06, + "loss": 0.0856, "step": 25920 }, { - "epoch": 1.9255903757611763, - "grad_norm": 0.6175203323364258, - "learning_rate": 1.844645774543294e-05, - "loss": 0.0401, + "epoch": 3.8511807515223526, + "grad_norm": 0.8132265210151672, + "learning_rate": 6.148819248477648e-06, + "loss": 0.0626, "step": 25930 }, { - "epoch": 1.926332986781524, - "grad_norm": 0.5794946551322937, - "learning_rate": 1.844200207931086e-05, - "loss": 0.0627, + "epoch": 3.8526659735630475, + "grad_norm": 0.781076192855835, + "learning_rate": 6.147334026436953e-06, + "loss": 0.0516, "step": 25940 }, { - "epoch": 1.9270755978018714, - "grad_norm": 0.15005835890769958, - "learning_rate": 1.843754641318877e-05, - "loss": 0.0369, + "epoch": 3.854151195603743, + "grad_norm": 0.8629059791564941, + "learning_rate": 6.145848804396257e-06, + "loss": 0.0451, "step": 25950 }, { - "epoch": 1.9278182088222189, - "grad_norm": 2.209848403930664, - "learning_rate": 1.8433090747066685e-05, - "loss": 0.0724, + "epoch": 3.8556364176444378, + "grad_norm": 0.7136142253875732, + "learning_rate": 6.144363582355563e-06, + "loss": 0.0697, "step": 25960 }, { - "epoch": 1.9285608198425663, - "grad_norm": 1.4631694555282593, - "learning_rate": 1.8428635080944604e-05, - "loss": 0.0872, + "epoch": 3.8571216396851327, + "grad_norm": 1.0320565700531006, + "learning_rate": 6.142878360314868e-06, + "loss": 0.0697, "step": 25970 }, { - "epoch": 1.929303430862914, - "grad_norm": 1.996635913848877, - "learning_rate": 1.8424179414822515e-05, - "loss": 0.0592, + "epoch": 3.858606861725828, + "grad_norm": 0.40204280614852905, + "learning_rate": 6.141393138274172e-06, + "loss": 0.0508, "step": 25980 }, { - "epoch": 1.9300460418832617, - "grad_norm": 1.6855813264846802, - "learning_rate": 1.841972374870043e-05, - "loss": 0.0902, + "epoch": 3.8600920837665234, + "grad_norm": 0.9110078811645508, + "learning_rate": 6.139907916233478e-06, + "loss": 0.0751, "step": 25990 }, { - "epoch": 1.9307886529036091, - "grad_norm": 1.024795413017273, - "learning_rate": 1.8415268082578345e-05, - "loss": 0.0532, + "epoch": 3.8615773058072183, + "grad_norm": 1.3291436433792114, + "learning_rate": 6.138422694192783e-06, + "loss": 0.0579, "step": 26000 }, { - "epoch": 1.9315312639239566, - "grad_norm": 0.7905906438827515, - "learning_rate": 1.841081241645626e-05, - "loss": 0.0524, + "epoch": 3.863062527847913, + "grad_norm": 1.4911785125732422, + "learning_rate": 6.1369374721520865e-06, + "loss": 0.0796, "step": 26010 }, { - "epoch": 1.932273874944304, - "grad_norm": 1.406113624572754, - "learning_rate": 1.8406356750334175e-05, - "loss": 0.0471, + "epoch": 3.8645477498886085, + "grad_norm": 0.4904058873653412, + "learning_rate": 6.135452250111393e-06, + "loss": 0.0502, "step": 26020 }, { - "epoch": 1.9330164859646517, - "grad_norm": 2.197995185852051, - "learning_rate": 1.840190108421209e-05, - "loss": 0.0518, + "epoch": 3.8660329719293034, + "grad_norm": 0.5274474024772644, + "learning_rate": 6.133967028070698e-06, + "loss": 0.0784, "step": 26030 }, { - "epoch": 1.9337590969849994, - "grad_norm": 0.5076370239257812, - "learning_rate": 1.8397445418090005e-05, - "loss": 0.056, + "epoch": 3.8675181939699987, + "grad_norm": 0.5127277970314026, + "learning_rate": 6.1324818060300015e-06, + "loss": 0.0738, "step": 26040 }, { - "epoch": 1.9345017080053468, - "grad_norm": 0.6380198001861572, - "learning_rate": 1.839298975196792e-05, - "loss": 0.0787, + "epoch": 3.8690034160106936, + "grad_norm": 1.0338975191116333, + "learning_rate": 6.130996583989307e-06, + "loss": 0.0605, "step": 26050 }, { - "epoch": 1.9352443190256943, - "grad_norm": 0.7328625321388245, - "learning_rate": 1.8388534085845832e-05, - "loss": 0.064, + "epoch": 3.8704886380513885, + "grad_norm": 0.818281888961792, + "learning_rate": 6.129511361948611e-06, + "loss": 0.0635, "step": 26060 }, { - "epoch": 1.9359869300460417, - "grad_norm": 1.652590274810791, - "learning_rate": 1.838407841972375e-05, - "loss": 0.0843, + "epoch": 3.871973860092084, + "grad_norm": 0.5960169434547424, + "learning_rate": 6.1280261399079165e-06, + "loss": 0.0827, "step": 26070 }, { - "epoch": 1.9367295410663894, - "grad_norm": 3.7497518062591553, - "learning_rate": 1.8379622753601665e-05, - "loss": 0.0686, + "epoch": 3.873459082132779, + "grad_norm": 1.0408084392547607, + "learning_rate": 6.126540917867222e-06, + "loss": 0.0479, "step": 26080 }, { - "epoch": 1.937472152086737, - "grad_norm": 0.787777304649353, - "learning_rate": 1.8375167087479577e-05, - "loss": 0.1322, + "epoch": 3.874944304173474, + "grad_norm": 1.4356499910354614, + "learning_rate": 6.125055695826526e-06, + "loss": 0.07, "step": 26090 }, { - "epoch": 1.9382147631070845, - "grad_norm": 2.129948616027832, - "learning_rate": 1.8370711421357492e-05, - "loss": 0.0428, + "epoch": 3.876429526214169, + "grad_norm": 0.6846652626991272, + "learning_rate": 6.1235704737858315e-06, + "loss": 0.0552, "step": 26100 }, { - "epoch": 1.938957374127432, - "grad_norm": 3.463418960571289, - "learning_rate": 1.836625575523541e-05, - "loss": 0.0811, + "epoch": 3.877914748254864, + "grad_norm": 0.6793112754821777, + "learning_rate": 6.122085251745137e-06, + "loss": 0.0655, "step": 26110 }, { - "epoch": 1.9396999851477796, - "grad_norm": 0.8304588794708252, - "learning_rate": 1.8361800089113322e-05, - "loss": 0.0744, + "epoch": 3.8793999702955593, + "grad_norm": 0.874123215675354, + "learning_rate": 6.120600029704441e-06, + "loss": 0.0608, "step": 26120 }, { - "epoch": 1.940442596168127, - "grad_norm": 0.47356998920440674, - "learning_rate": 1.8357344422991237e-05, - "loss": 0.0558, + "epoch": 3.880885192336254, + "grad_norm": 1.1491456031799316, + "learning_rate": 6.1191148076637465e-06, + "loss": 0.0554, "step": 26130 }, { - "epoch": 1.9411852071884748, - "grad_norm": 1.1219099760055542, - "learning_rate": 1.8352888756869155e-05, - "loss": 0.0752, + "epoch": 3.8823704143769495, + "grad_norm": 0.9040345549583435, + "learning_rate": 6.117629585623052e-06, + "loss": 0.0579, "step": 26140 }, { - "epoch": 1.9419278182088222, - "grad_norm": 1.6041889190673828, - "learning_rate": 1.8348433090747067e-05, - "loss": 0.0737, + "epoch": 3.8838556364176444, + "grad_norm": 0.6160182952880859, + "learning_rate": 6.116144363582356e-06, + "loss": 0.0575, "step": 26150 }, { - "epoch": 1.9426704292291697, - "grad_norm": 0.80296790599823, - "learning_rate": 1.8343977424624982e-05, - "loss": 0.074, + "epoch": 3.8853408584583393, + "grad_norm": 1.2270127534866333, + "learning_rate": 6.1146591415416615e-06, + "loss": 0.0522, "step": 26160 }, { - "epoch": 1.9434130402495173, - "grad_norm": 2.8766558170318604, - "learning_rate": 1.8339521758502893e-05, - "loss": 0.0874, + "epoch": 3.8868260804990347, + "grad_norm": 0.9320999979972839, + "learning_rate": 6.113173919500965e-06, + "loss": 0.0774, "step": 26170 }, { - "epoch": 1.944155651269865, - "grad_norm": 0.7357403635978699, - "learning_rate": 1.8335066092380812e-05, - "loss": 0.0765, + "epoch": 3.8883113025397296, + "grad_norm": 0.8286004066467285, + "learning_rate": 6.11168869746027e-06, + "loss": 0.0503, "step": 26180 }, { - "epoch": 1.9448982622902125, - "grad_norm": 2.0172839164733887, - "learning_rate": 1.8330610426258727e-05, - "loss": 0.0734, + "epoch": 3.889796524580425, + "grad_norm": 0.9038022756576538, + "learning_rate": 6.110203475419576e-06, + "loss": 0.0749, "step": 26190 }, { - "epoch": 1.94564087331056, - "grad_norm": 1.475164771080017, - "learning_rate": 1.832615476013664e-05, - "loss": 0.092, + "epoch": 3.89128174662112, + "grad_norm": 0.6100136041641235, + "learning_rate": 6.10871825337888e-06, + "loss": 0.0528, "step": 26200 }, { - "epoch": 1.9463834843309074, - "grad_norm": 0.9614417552947998, - "learning_rate": 1.8321699094014557e-05, - "loss": 0.0785, + "epoch": 3.8927669686618147, + "grad_norm": 1.1859416961669922, + "learning_rate": 6.107233031338185e-06, + "loss": 0.0621, "step": 26210 }, { - "epoch": 1.947126095351255, - "grad_norm": 2.393979549407959, - "learning_rate": 1.8317243427892472e-05, - "loss": 0.0762, + "epoch": 3.89425219070251, + "grad_norm": 0.968455970287323, + "learning_rate": 6.105747809297491e-06, + "loss": 0.0569, "step": 26220 }, { - "epoch": 1.9478687063716027, - "grad_norm": 2.239128589630127, - "learning_rate": 1.8312787761770383e-05, - "loss": 0.0603, + "epoch": 3.895737412743205, + "grad_norm": 1.1066442728042603, + "learning_rate": 6.104262587256795e-06, + "loss": 0.0631, "step": 26230 }, { - "epoch": 1.9486113173919501, - "grad_norm": 0.7804839015007019, - "learning_rate": 1.8308332095648302e-05, - "loss": 0.0607, + "epoch": 3.8972226347839003, + "grad_norm": 0.8778436779975891, + "learning_rate": 6.1027773652161e-06, + "loss": 0.0619, "step": 26240 }, { - "epoch": 1.9493539284122976, - "grad_norm": 1.400680422782898, - "learning_rate": 1.8303876429526217e-05, - "loss": 0.0694, + "epoch": 3.898707856824595, + "grad_norm": 0.9055481553077698, + "learning_rate": 6.101292143175406e-06, + "loss": 0.0501, "step": 26250 }, { - "epoch": 1.950096539432645, - "grad_norm": 1.6606298685073853, - "learning_rate": 1.829942076340413e-05, - "loss": 0.0692, + "epoch": 3.90019307886529, + "grad_norm": 0.7718532681465149, + "learning_rate": 6.09980692113471e-06, + "loss": 0.0688, "step": 26260 }, { - "epoch": 1.9508391504529927, - "grad_norm": 1.0099084377288818, - "learning_rate": 1.8294965097282043e-05, - "loss": 0.0588, + "epoch": 3.9016783009059854, + "grad_norm": 0.8786125183105469, + "learning_rate": 6.098321699094015e-06, + "loss": 0.0654, "step": 26270 }, { - "epoch": 1.9515817614733404, - "grad_norm": 1.2126892805099487, - "learning_rate": 1.829050943115996e-05, - "loss": 0.0827, + "epoch": 3.903163522946681, + "grad_norm": 0.44965681433677673, + "learning_rate": 6.09683647705332e-06, + "loss": 0.0662, "step": 26280 }, { - "epoch": 1.9523243724936878, - "grad_norm": 2.0662033557891846, - "learning_rate": 1.8286053765037873e-05, - "loss": 0.057, + "epoch": 3.9046487449873757, + "grad_norm": 0.6755138039588928, + "learning_rate": 6.095351255012625e-06, + "loss": 0.0705, "step": 26290 }, { - "epoch": 1.9530669835140353, - "grad_norm": 1.9331927299499512, - "learning_rate": 1.828159809891579e-05, - "loss": 0.073, + "epoch": 3.9061339670280706, + "grad_norm": 1.2151280641555786, + "learning_rate": 6.09386603297193e-06, + "loss": 0.0818, "step": 26300 }, { - "epoch": 1.9538095945343827, - "grad_norm": 2.7068533897399902, - "learning_rate": 1.8277142432793703e-05, - "loss": 0.0848, + "epoch": 3.907619189068766, + "grad_norm": 0.6397557258605957, + "learning_rate": 6.092380810931235e-06, + "loss": 0.0674, "step": 26310 }, { - "epoch": 1.9545522055547304, - "grad_norm": 0.645256519317627, - "learning_rate": 1.827268676667162e-05, - "loss": 0.0731, + "epoch": 3.909104411109461, + "grad_norm": 1.2610584497451782, + "learning_rate": 6.09089558889054e-06, + "loss": 0.0693, "step": 26320 }, { - "epoch": 1.955294816575078, - "grad_norm": 0.46004560589790344, - "learning_rate": 1.8268231100549533e-05, - "loss": 0.0556, + "epoch": 3.910589633150156, + "grad_norm": 0.6538786292076111, + "learning_rate": 6.089410366849845e-06, + "loss": 0.0702, "step": 26330 }, { - "epoch": 1.9560374275954255, - "grad_norm": 2.7907826900482178, - "learning_rate": 1.8263775434427445e-05, - "loss": 0.076, + "epoch": 3.912074855190851, + "grad_norm": 0.6008743643760681, + "learning_rate": 6.087925144809149e-06, + "loss": 0.0579, "step": 26340 }, { - "epoch": 1.956780038615773, - "grad_norm": 0.48506757616996765, - "learning_rate": 1.8259319768305363e-05, - "loss": 0.0669, + "epoch": 3.913560077231546, + "grad_norm": 0.9000145792961121, + "learning_rate": 6.086439922768454e-06, + "loss": 0.0758, "step": 26350 }, { - "epoch": 1.9575226496361204, - "grad_norm": 2.199068546295166, - "learning_rate": 1.825486410218328e-05, - "loss": 0.0996, + "epoch": 3.9150452992722413, + "grad_norm": 0.43842408061027527, + "learning_rate": 6.0849547007277595e-06, + "loss": 0.0753, "step": 26360 }, { - "epoch": 1.958265260656468, - "grad_norm": 1.1990214586257935, - "learning_rate": 1.825040843606119e-05, - "loss": 0.0453, + "epoch": 3.916530521312936, + "grad_norm": 0.8281410932540894, + "learning_rate": 6.083469478687064e-06, + "loss": 0.0577, "step": 26370 }, { - "epoch": 1.9590078716768158, - "grad_norm": 1.2882795333862305, - "learning_rate": 1.824595276993911e-05, - "loss": 0.064, + "epoch": 3.9180157433536316, + "grad_norm": 0.7675429582595825, + "learning_rate": 6.081984256646369e-06, + "loss": 0.0612, "step": 26380 }, { - "epoch": 1.9597504826971632, - "grad_norm": 0.6526997089385986, - "learning_rate": 1.824149710381702e-05, - "loss": 0.0817, + "epoch": 3.9195009653943265, + "grad_norm": 0.4206182658672333, + "learning_rate": 6.080499034605674e-06, + "loss": 0.0548, "step": 26390 }, { - "epoch": 1.9604930937175107, - "grad_norm": 0.8225412368774414, - "learning_rate": 1.8237041437694935e-05, - "loss": 0.0611, + "epoch": 3.9209861874350214, + "grad_norm": 0.7839239239692688, + "learning_rate": 6.079013812564979e-06, + "loss": 0.0602, "step": 26400 }, { - "epoch": 1.9612357047378584, - "grad_norm": 0.5846786499023438, - "learning_rate": 1.8232585771572853e-05, - "loss": 0.0877, + "epoch": 3.9224714094757167, + "grad_norm": 0.8620349168777466, + "learning_rate": 6.077528590524284e-06, + "loss": 0.0631, "step": 26410 }, { - "epoch": 1.9619783157582058, - "grad_norm": 3.2029032707214355, - "learning_rate": 1.8228130105450765e-05, - "loss": 0.0773, + "epoch": 3.9239566315164116, + "grad_norm": 0.6361775994300842, + "learning_rate": 6.076043368483589e-06, + "loss": 0.055, "step": 26420 }, { - "epoch": 1.9627209267785535, - "grad_norm": 1.3183096647262573, - "learning_rate": 1.822367443932868e-05, - "loss": 0.052, + "epoch": 3.925441853557107, + "grad_norm": 0.4495631158351898, + "learning_rate": 6.074558146442894e-06, + "loss": 0.053, "step": 26430 }, { - "epoch": 1.963463537798901, - "grad_norm": 0.731730043888092, - "learning_rate": 1.8219218773206595e-05, - "loss": 0.081, + "epoch": 3.926927075597802, + "grad_norm": 0.644170343875885, + "learning_rate": 6.073072924402199e-06, + "loss": 0.0694, "step": 26440 }, { - "epoch": 1.9642061488192484, - "grad_norm": 1.890268087387085, - "learning_rate": 1.821476310708451e-05, - "loss": 0.0529, + "epoch": 3.9284122976384968, + "grad_norm": 0.987910270690918, + "learning_rate": 6.071587702361504e-06, + "loss": 0.0593, "step": 26450 }, { - "epoch": 1.964948759839596, - "grad_norm": 1.2270501852035522, - "learning_rate": 1.8210307440962425e-05, - "loss": 0.0869, + "epoch": 3.929897519679192, + "grad_norm": 1.5340524911880493, + "learning_rate": 6.070102480320809e-06, + "loss": 0.0793, "step": 26460 }, { - "epoch": 1.9656913708599437, - "grad_norm": 0.9441844820976257, - "learning_rate": 1.8205851774840337e-05, - "loss": 0.0552, + "epoch": 3.931382741719887, + "grad_norm": 0.858149528503418, + "learning_rate": 6.0686172582801125e-06, + "loss": 0.0695, "step": 26470 }, { - "epoch": 1.9664339818802912, - "grad_norm": 1.5903592109680176, - "learning_rate": 1.8201396108718255e-05, - "loss": 0.0693, + "epoch": 3.9328679637605823, + "grad_norm": 0.4782363474369049, + "learning_rate": 6.067132036239418e-06, + "loss": 0.0689, "step": 26480 }, { - "epoch": 1.9671765929006386, - "grad_norm": 3.176476001739502, - "learning_rate": 1.819694044259617e-05, - "loss": 0.0863, + "epoch": 3.9343531858012772, + "grad_norm": 1.1806187629699707, + "learning_rate": 6.065646814198724e-06, + "loss": 0.0556, "step": 26490 }, { - "epoch": 1.967919203920986, - "grad_norm": 0.41649898886680603, - "learning_rate": 1.819248477647408e-05, - "loss": 0.0417, + "epoch": 3.935838407841972, + "grad_norm": 0.8682441711425781, + "learning_rate": 6.0641615921580275e-06, + "loss": 0.064, "step": 26500 }, { - "epoch": 1.9686618149413337, - "grad_norm": 1.182588815689087, - "learning_rate": 1.8188029110351997e-05, - "loss": 0.0692, + "epoch": 3.9373236298826675, + "grad_norm": 0.6038737297058105, + "learning_rate": 6.062676370117333e-06, + "loss": 0.0628, "step": 26510 }, { - "epoch": 1.9694044259616814, - "grad_norm": 1.535315990447998, - "learning_rate": 1.8183573444229915e-05, - "loss": 0.056, + "epoch": 3.9388088519233624, + "grad_norm": 0.9265310764312744, + "learning_rate": 6.061191148076638e-06, + "loss": 0.0579, "step": 26520 }, { - "epoch": 1.9701470369820289, - "grad_norm": 0.7827800512313843, - "learning_rate": 1.8179117778107826e-05, - "loss": 0.0994, + "epoch": 3.9402940739640577, + "grad_norm": 1.1472411155700684, + "learning_rate": 6.0597059260359424e-06, + "loss": 0.0435, "step": 26530 }, { - "epoch": 1.9708896480023763, - "grad_norm": 1.4649193286895752, - "learning_rate": 1.817466211198574e-05, - "loss": 0.0624, + "epoch": 3.9417792960047526, + "grad_norm": 0.5956252217292786, + "learning_rate": 6.058220703995248e-06, + "loss": 0.0651, "step": 26540 }, { - "epoch": 1.9716322590227238, - "grad_norm": 2.715514898300171, - "learning_rate": 1.817020644586366e-05, - "loss": 0.0686, + "epoch": 3.9432645180454475, + "grad_norm": 1.012588381767273, + "learning_rate": 6.056735481954553e-06, + "loss": 0.0659, "step": 26550 }, { - "epoch": 1.9723748700430714, - "grad_norm": 2.133049964904785, - "learning_rate": 1.816575077974157e-05, - "loss": 0.0645, + "epoch": 3.944749740086143, + "grad_norm": 0.8449077606201172, + "learning_rate": 6.0552502599138574e-06, + "loss": 0.0476, "step": 26560 }, { - "epoch": 1.973117481063419, - "grad_norm": 1.230670690536499, - "learning_rate": 1.8161295113619486e-05, - "loss": 0.0873, + "epoch": 3.946234962126838, + "grad_norm": 1.4247989654541016, + "learning_rate": 6.053765037873163e-06, + "loss": 0.0605, "step": 26570 }, { - "epoch": 1.9738600920837666, - "grad_norm": 1.5158164501190186, - "learning_rate": 1.8156839447497398e-05, - "loss": 0.0826, + "epoch": 3.947720184167533, + "grad_norm": 0.7127845883369446, + "learning_rate": 6.052279815832467e-06, + "loss": 0.0606, "step": 26580 }, { - "epoch": 1.974602703104114, - "grad_norm": 2.380052089691162, - "learning_rate": 1.8152383781375316e-05, - "loss": 0.0839, + "epoch": 3.949205406208228, + "grad_norm": 0.822958767414093, + "learning_rate": 6.0507945937917724e-06, + "loss": 0.0768, "step": 26590 }, { - "epoch": 1.9753453141244615, - "grad_norm": 0.9779214262962341, - "learning_rate": 1.814792811525323e-05, - "loss": 0.1189, + "epoch": 3.9506906282489234, + "grad_norm": 1.0008773803710938, + "learning_rate": 6.049309371751078e-06, + "loss": 0.066, "step": 26600 }, { - "epoch": 1.9760879251448091, - "grad_norm": 0.7717707753181458, - "learning_rate": 1.8143472449131143e-05, - "loss": 0.0651, + "epoch": 3.9521758502896183, + "grad_norm": 0.5226492881774902, + "learning_rate": 6.047824149710382e-06, + "loss": 0.0507, "step": 26610 }, { - "epoch": 1.9768305361651568, - "grad_norm": 1.0977226495742798, - "learning_rate": 1.813901678300906e-05, - "loss": 0.052, + "epoch": 3.9536610723303136, + "grad_norm": 0.5270611047744751, + "learning_rate": 6.0463389276696874e-06, + "loss": 0.0596, "step": 26620 }, { - "epoch": 1.9775731471855043, - "grad_norm": 1.963529348373413, - "learning_rate": 1.8134561116886976e-05, - "loss": 0.06, + "epoch": 3.9551462943710085, + "grad_norm": 0.9116948843002319, + "learning_rate": 6.044853705628993e-06, + "loss": 0.0562, "step": 26630 }, { - "epoch": 1.9783157582058517, - "grad_norm": 0.4046013653278351, - "learning_rate": 1.8130105450764888e-05, - "loss": 0.0571, + "epoch": 3.9566315164117034, + "grad_norm": 0.8059374690055847, + "learning_rate": 6.043368483588296e-06, + "loss": 0.0457, "step": 26640 }, { - "epoch": 1.9790583692261992, - "grad_norm": 1.8156684637069702, - "learning_rate": 1.8125649784642806e-05, - "loss": 0.0798, + "epoch": 3.9581167384523988, + "grad_norm": 0.9144610166549683, + "learning_rate": 6.041883261547602e-06, + "loss": 0.0628, "step": 26650 }, { - "epoch": 1.9798009802465468, - "grad_norm": 0.9352402687072754, - "learning_rate": 1.812119411852072e-05, - "loss": 0.0706, + "epoch": 3.9596019604930937, + "grad_norm": 0.5333276987075806, + "learning_rate": 6.040398039506908e-06, + "loss": 0.0497, "step": 26660 }, { - "epoch": 1.9805435912668945, - "grad_norm": 2.6892099380493164, - "learning_rate": 1.8116738452398633e-05, - "loss": 0.0716, + "epoch": 3.961087182533789, + "grad_norm": 1.1043226718902588, + "learning_rate": 6.038912817466211e-06, + "loss": 0.0534, "step": 26670 }, { - "epoch": 1.981286202287242, - "grad_norm": 1.3051759004592896, - "learning_rate": 1.8112282786276548e-05, - "loss": 0.0775, + "epoch": 3.962572404574484, + "grad_norm": 0.7918885350227356, + "learning_rate": 6.0374275954255166e-06, + "loss": 0.062, "step": 26680 }, { - "epoch": 1.9820288133075894, - "grad_norm": 0.3577052056789398, - "learning_rate": 1.8107827120154463e-05, - "loss": 0.0396, + "epoch": 3.964057626615179, + "grad_norm": 1.52443528175354, + "learning_rate": 6.035942373384821e-06, + "loss": 0.0582, "step": 26690 }, { - "epoch": 1.982771424327937, - "grad_norm": 1.0343585014343262, - "learning_rate": 1.8103371454032378e-05, - "loss": 0.064, + "epoch": 3.965542848655874, + "grad_norm": 0.8226548433303833, + "learning_rate": 6.034457151344126e-06, + "loss": 0.0812, "step": 26700 }, { - "epoch": 1.9835140353482845, - "grad_norm": 2.462855100631714, - "learning_rate": 1.8098915787910293e-05, - "loss": 0.0882, + "epoch": 3.967028070696569, + "grad_norm": 0.8545466661453247, + "learning_rate": 6.0329719293034316e-06, + "loss": 0.0554, "step": 26710 }, { - "epoch": 1.9842566463686322, - "grad_norm": 2.172545909881592, - "learning_rate": 1.8094460121788208e-05, - "loss": 0.0716, + "epoch": 3.9685132927372644, + "grad_norm": 0.7938420176506042, + "learning_rate": 6.031486707262736e-06, + "loss": 0.0591, "step": 26720 }, { - "epoch": 1.9849992573889796, - "grad_norm": 1.9032946825027466, - "learning_rate": 1.8090004455666123e-05, - "loss": 0.0691, + "epoch": 3.9699985147779593, + "grad_norm": 1.5926915407180786, + "learning_rate": 6.030001485222041e-06, + "loss": 0.0677, "step": 26730 }, { - "epoch": 1.985741868409327, - "grad_norm": 0.6433393359184265, - "learning_rate": 1.8085548789544038e-05, - "loss": 0.0452, + "epoch": 3.971483736818654, + "grad_norm": 0.5737379789352417, + "learning_rate": 6.0285162631813466e-06, + "loss": 0.0417, "step": 26740 }, { - "epoch": 1.9864844794296748, - "grad_norm": 2.729414939880371, - "learning_rate": 1.808109312342195e-05, - "loss": 0.0704, + "epoch": 3.9729689588593495, + "grad_norm": 0.7223270535469055, + "learning_rate": 6.027031041140651e-06, + "loss": 0.0727, "step": 26750 }, { - "epoch": 1.9872270904500224, - "grad_norm": 1.0757064819335938, - "learning_rate": 1.8076637457299868e-05, - "loss": 0.0853, + "epoch": 3.9744541809000444, + "grad_norm": 0.7630795836448669, + "learning_rate": 6.025545819099956e-06, + "loss": 0.0925, "step": 26760 }, { - "epoch": 1.9879697014703699, - "grad_norm": 0.4232407212257385, - "learning_rate": 1.8072181791177783e-05, - "loss": 0.0757, + "epoch": 3.9759394029407398, + "grad_norm": 1.0278931856155396, + "learning_rate": 6.0240605970592616e-06, + "loss": 0.0719, "step": 26770 }, { - "epoch": 1.9887123124907173, - "grad_norm": 1.7221150398254395, - "learning_rate": 1.8067726125055695e-05, - "loss": 0.0709, + "epoch": 3.9774246249814347, + "grad_norm": 1.7548854351043701, + "learning_rate": 6.022575375018566e-06, + "loss": 0.074, "step": 26780 }, { - "epoch": 1.9894549235110648, - "grad_norm": 1.9497733116149902, - "learning_rate": 1.8063270458933613e-05, - "loss": 0.0886, + "epoch": 3.9789098470221296, + "grad_norm": 0.629660427570343, + "learning_rate": 6.021090152977871e-06, + "loss": 0.0706, "step": 26790 }, { - "epoch": 1.9901975345314125, - "grad_norm": 2.333503484725952, - "learning_rate": 1.8058814792811525e-05, - "loss": 0.0528, + "epoch": 3.980395069062825, + "grad_norm": 0.7200632095336914, + "learning_rate": 6.019604930937175e-06, + "loss": 0.0597, "step": 26800 }, { - "epoch": 1.9909401455517601, - "grad_norm": 0.809281051158905, - "learning_rate": 1.805435912668944e-05, - "loss": 0.0683, + "epoch": 3.98188029110352, + "grad_norm": 0.6580965518951416, + "learning_rate": 6.01811970889648e-06, + "loss": 0.055, "step": 26810 }, { - "epoch": 1.9916827565721076, - "grad_norm": 3.6550369262695312, - "learning_rate": 1.8049903460567358e-05, - "loss": 0.0725, + "epoch": 3.983365513144215, + "grad_norm": 0.9431211352348328, + "learning_rate": 6.016634486855785e-06, + "loss": 0.0658, "step": 26820 }, { - "epoch": 1.992425367592455, - "grad_norm": 2.8974320888519287, - "learning_rate": 1.804544779444527e-05, - "loss": 0.0892, + "epoch": 3.98485073518491, + "grad_norm": 1.132131576538086, + "learning_rate": 6.01514926481509e-06, + "loss": 0.0537, "step": 26830 }, { - "epoch": 1.9931679786128025, - "grad_norm": 0.9259861707687378, - "learning_rate": 1.8040992128323185e-05, - "loss": 0.0665, + "epoch": 3.986335957225605, + "grad_norm": 1.0278289318084717, + "learning_rate": 6.013664042774395e-06, + "loss": 0.0734, "step": 26840 }, { - "epoch": 1.9939105896331502, - "grad_norm": 2.125751495361328, - "learning_rate": 1.80365364622011e-05, - "loss": 0.0634, + "epoch": 3.9878211792663003, + "grad_norm": 0.5455945134162903, + "learning_rate": 6.0121788207337e-06, + "loss": 0.0656, "step": 26850 }, { - "epoch": 1.9946532006534978, - "grad_norm": 1.0825103521347046, - "learning_rate": 1.8032080796079015e-05, - "loss": 0.0708, + "epoch": 3.9893064013069957, + "grad_norm": 0.41397568583488464, + "learning_rate": 6.010693598693005e-06, + "loss": 0.0749, "step": 26860 }, { - "epoch": 1.9953958116738453, - "grad_norm": 1.3801538944244385, - "learning_rate": 1.802762512995693e-05, - "loss": 0.0787, + "epoch": 3.9907916233476906, + "grad_norm": 1.662298560142517, + "learning_rate": 6.00920837665231e-06, + "loss": 0.0708, "step": 26870 }, { - "epoch": 1.9961384226941927, - "grad_norm": 0.7324304580688477, - "learning_rate": 1.8023169463834845e-05, - "loss": 0.0689, + "epoch": 3.9922768453883855, + "grad_norm": 0.9174128174781799, + "learning_rate": 6.007723154611615e-06, + "loss": 0.0524, "step": 26880 }, { - "epoch": 1.9968810337145402, - "grad_norm": 0.9306546449661255, - "learning_rate": 1.801871379771276e-05, - "loss": 0.0895, + "epoch": 3.993762067429081, + "grad_norm": 1.349379539489746, + "learning_rate": 6.00623793257092e-06, + "loss": 0.0649, "step": 26890 }, { - "epoch": 1.9976236447348878, - "grad_norm": 3.5003910064697266, - "learning_rate": 1.8014258131590675e-05, - "loss": 0.057, + "epoch": 3.9952472894697757, + "grad_norm": 0.9090511798858643, + "learning_rate": 6.004752710530225e-06, + "loss": 0.082, "step": 26900 }, { - "epoch": 1.9983662557552355, - "grad_norm": 1.8445905447006226, - "learning_rate": 1.8009802465468586e-05, - "loss": 0.0806, + "epoch": 3.996732511510471, + "grad_norm": 1.657594084739685, + "learning_rate": 6.0032674884895296e-06, + "loss": 0.0625, "step": 26910 }, { - "epoch": 1.999108866775583, - "grad_norm": 2.1891441345214844, - "learning_rate": 1.80053467993465e-05, - "loss": 0.0742, + "epoch": 3.998217733551166, + "grad_norm": 0.5109291672706604, + "learning_rate": 6.001782266448835e-06, + "loss": 0.0517, "step": 26920 }, { - "epoch": 1.9998514777959304, - "grad_norm": 2.6680564880371094, - "learning_rate": 1.800089113322442e-05, - "loss": 0.0612, + "epoch": 3.999702955591861, + "grad_norm": 0.8310036659240723, + "learning_rate": 6.00029704440814e-06, + "loss": 0.0534, "step": 26930 }, { - "epoch": 2.0, - "eval_f1": 0.0, - "eval_loss": 0.05881134420633316, - "eval_runtime": 790.8114, - "eval_samples_per_second": 48.076, - "eval_steps_per_second": 3.006, + "epoch": 4.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.05716124549508095, + "eval_runtime": 211.3669, + "eval_samples_per_second": 179.872, + "eval_steps_per_second": 5.625, "step": 26932 }, { - "epoch": 2.000594088816278, - "grad_norm": 1.1317378282546997, - "learning_rate": 1.799643546710233e-05, - "loss": 0.0591, + "epoch": 4.001188177632556, + "grad_norm": 0.7958173751831055, + "learning_rate": 5.998811822367444e-06, + "loss": 0.0648, "step": 26940 }, { - "epoch": 2.0013366998366258, - "grad_norm": 1.214632272720337, - "learning_rate": 1.7991979800980246e-05, - "loss": 0.0469, + "epoch": 4.0026733996732515, + "grad_norm": 1.0557328462600708, + "learning_rate": 5.99732660032675e-06, + "loss": 0.0706, "step": 26950 }, { - "epoch": 2.002079310856973, - "grad_norm": 0.5876008868217468, - "learning_rate": 1.7987524134858164e-05, - "loss": 0.0565, + "epoch": 4.004158621713946, + "grad_norm": 0.9724434018135071, + "learning_rate": 5.995841378286055e-06, + "loss": 0.0643, "step": 26960 }, { - "epoch": 2.0028219218773207, - "grad_norm": 0.7250917553901672, - "learning_rate": 1.7983068468736076e-05, - "loss": 0.0597, + "epoch": 4.005643843754641, + "grad_norm": 0.3466469645500183, + "learning_rate": 5.994356156245359e-06, + "loss": 0.0644, "step": 26970 }, { - "epoch": 2.003564532897668, - "grad_norm": 1.3954814672470093, - "learning_rate": 1.797861280261399e-05, - "loss": 0.098, + "epoch": 4.007129065795336, + "grad_norm": 1.2057231664657593, + "learning_rate": 5.992870934204664e-06, + "loss": 0.0583, "step": 26980 }, { - "epoch": 2.0043071439180156, - "grad_norm": 0.8022347092628479, - "learning_rate": 1.7974157136491906e-05, - "loss": 0.0634, + "epoch": 4.008614287836031, + "grad_norm": 0.6493140459060669, + "learning_rate": 5.991385712163968e-06, + "loss": 0.054, "step": 26990 }, { - "epoch": 2.0050497549383635, - "grad_norm": 1.9625482559204102, - "learning_rate": 1.796970147036982e-05, - "loss": 0.0774, + "epoch": 4.010099509876727, + "grad_norm": 1.1638193130493164, + "learning_rate": 5.989900490123274e-06, + "loss": 0.0723, "step": 27000 }, { - "epoch": 2.005792365958711, - "grad_norm": 1.0475565195083618, - "learning_rate": 1.7965245804247736e-05, - "loss": 0.0798, + "epoch": 4.011584731917422, + "grad_norm": 1.2872439622879028, + "learning_rate": 5.988415268082579e-06, + "loss": 0.062, "step": 27010 }, { - "epoch": 2.0065349769790584, - "grad_norm": 1.3007713556289673, - "learning_rate": 1.7960790138125648e-05, - "loss": 0.0721, + "epoch": 4.013069953958117, + "grad_norm": 0.550545871257782, + "learning_rate": 5.986930046041883e-06, + "loss": 0.0556, "step": 27020 }, { - "epoch": 2.007277587999406, - "grad_norm": 1.5825034379959106, - "learning_rate": 1.7956334472003566e-05, - "loss": 0.0741, + "epoch": 4.014555175998812, + "grad_norm": 0.6696489453315735, + "learning_rate": 5.985444824001189e-06, + "loss": 0.0567, "step": 27030 }, { - "epoch": 2.0080201990197533, - "grad_norm": 1.4288562536239624, - "learning_rate": 1.795187880588148e-05, - "loss": 0.0606, + "epoch": 4.0160403980395065, + "grad_norm": 0.6009332537651062, + "learning_rate": 5.983959601960494e-06, + "loss": 0.065, "step": 27040 }, { - "epoch": 2.008762810040101, - "grad_norm": 1.6191655397415161, - "learning_rate": 1.7947423139759393e-05, - "loss": 0.0893, + "epoch": 4.017525620080202, + "grad_norm": 0.652777373790741, + "learning_rate": 5.982474379919798e-06, + "loss": 0.0667, "step": 27050 }, { - "epoch": 2.0095054210604486, - "grad_norm": 3.0551598072052, - "learning_rate": 1.794296747363731e-05, - "loss": 0.0609, + "epoch": 4.019010842120897, + "grad_norm": 1.0050688982009888, + "learning_rate": 5.980989157879104e-06, + "loss": 0.0524, "step": 27060 }, { - "epoch": 2.010248032080796, - "grad_norm": 0.9543463587760925, - "learning_rate": 1.7938511807515226e-05, - "loss": 0.0685, + "epoch": 4.020496064161592, + "grad_norm": 0.7190169095993042, + "learning_rate": 5.979503935838409e-06, + "loss": 0.0644, "step": 27070 }, { - "epoch": 2.0109906431011435, - "grad_norm": 1.9315248727798462, - "learning_rate": 1.7934056141393138e-05, - "loss": 0.0581, + "epoch": 4.021981286202287, + "grad_norm": 0.6970319151878357, + "learning_rate": 5.978018713797713e-06, + "loss": 0.0602, "step": 27080 }, { - "epoch": 2.011733254121491, - "grad_norm": 1.0615206956863403, - "learning_rate": 1.7929600475271053e-05, - "loss": 0.0834, + "epoch": 4.023466508242982, + "grad_norm": 0.42795783281326294, + "learning_rate": 5.976533491757019e-06, + "loss": 0.0629, "step": 27090 }, { - "epoch": 2.012475865141839, - "grad_norm": 3.055593729019165, - "learning_rate": 1.7925144809148968e-05, - "loss": 0.0777, + "epoch": 4.024951730283678, + "grad_norm": 0.6158421635627747, + "learning_rate": 5.975048269716322e-06, + "loss": 0.0759, "step": 27100 }, { - "epoch": 2.0132184761621863, - "grad_norm": 0.9650170207023621, - "learning_rate": 1.7920689143026883e-05, - "loss": 0.0563, + "epoch": 4.026436952324373, + "grad_norm": 1.4070188999176025, + "learning_rate": 5.9735630476756275e-06, + "loss": 0.0618, "step": 27110 }, { - "epoch": 2.0139610871825337, - "grad_norm": 1.2124733924865723, - "learning_rate": 1.7916233476904798e-05, - "loss": 0.0975, + "epoch": 4.0279221743650675, + "grad_norm": 0.845020055770874, + "learning_rate": 5.972077825634933e-06, + "loss": 0.0675, "step": 27120 }, { - "epoch": 2.014703698202881, - "grad_norm": 2.321781635284424, - "learning_rate": 1.7911777810782713e-05, - "loss": 0.0769, + "epoch": 4.029407396405762, + "grad_norm": 0.945316731929779, + "learning_rate": 5.970592603594237e-06, + "loss": 0.0632, "step": 27130 }, { - "epoch": 2.015446309223229, - "grad_norm": 1.0764652490615845, - "learning_rate": 1.7907322144660628e-05, - "loss": 0.0526, + "epoch": 4.030892618446458, + "grad_norm": 0.66912442445755, + "learning_rate": 5.9691073815535425e-06, + "loss": 0.0911, "step": 27140 }, { - "epoch": 2.0161889202435765, - "grad_norm": 1.4031742811203003, - "learning_rate": 1.7902866478538543e-05, - "loss": 0.061, + "epoch": 4.032377840487153, + "grad_norm": 0.8626250624656677, + "learning_rate": 5.967622159512848e-06, + "loss": 0.0583, "step": 27150 }, { - "epoch": 2.016931531263924, - "grad_norm": 0.9712595343589783, - "learning_rate": 1.7898410812416454e-05, - "loss": 0.0627, + "epoch": 4.033863062527848, + "grad_norm": 0.6361261606216431, + "learning_rate": 5.966136937472152e-06, + "loss": 0.0744, "step": 27160 }, { - "epoch": 2.0176741422842714, - "grad_norm": 1.150699496269226, - "learning_rate": 1.7893955146294373e-05, - "loss": 0.0839, + "epoch": 4.035348284568543, + "grad_norm": 0.5740983486175537, + "learning_rate": 5.9646517154314575e-06, + "loss": 0.0663, "step": 27170 }, { - "epoch": 2.018416753304619, - "grad_norm": 1.0646690130233765, - "learning_rate": 1.7889499480172288e-05, - "loss": 0.084, + "epoch": 4.036833506609238, + "grad_norm": 1.0047610998153687, + "learning_rate": 5.963166493390763e-06, + "loss": 0.0693, "step": 27180 }, { - "epoch": 2.019159364324967, - "grad_norm": 1.4827255010604858, - "learning_rate": 1.78850438140502e-05, - "loss": 0.0448, + "epoch": 4.038318728649934, + "grad_norm": 0.9337833523750305, + "learning_rate": 5.961681271350067e-06, + "loss": 0.0608, "step": 27190 }, { - "epoch": 2.0199019753453142, - "grad_norm": 1.7980319261550903, - "learning_rate": 1.7880588147928118e-05, - "loss": 0.0918, + "epoch": 4.0398039506906285, + "grad_norm": 0.8169094324111938, + "learning_rate": 5.9601960493093725e-06, + "loss": 0.0591, "step": 27200 }, { - "epoch": 2.0206445863656617, - "grad_norm": 1.5512464046478271, - "learning_rate": 1.787613248180603e-05, - "loss": 0.0685, + "epoch": 4.041289172731323, + "grad_norm": 0.7001012563705444, + "learning_rate": 5.958710827268677e-06, + "loss": 0.0627, "step": 27210 }, { - "epoch": 2.021387197386009, - "grad_norm": 1.1397250890731812, - "learning_rate": 1.7871676815683944e-05, - "loss": 0.0731, + "epoch": 4.042774394772018, + "grad_norm": 0.5708398222923279, + "learning_rate": 5.957225605227982e-06, + "loss": 0.0448, "step": 27220 }, { - "epoch": 2.0221298084063566, - "grad_norm": 2.0807673931121826, - "learning_rate": 1.7867221149561863e-05, - "loss": 0.0851, + "epoch": 4.044259616812713, + "grad_norm": 0.5236473083496094, + "learning_rate": 5.9557403831872875e-06, + "loss": 0.0596, "step": 27230 }, { - "epoch": 2.0228724194267045, - "grad_norm": 3.3200225830078125, - "learning_rate": 1.7862765483439774e-05, - "loss": 0.0615, + "epoch": 4.045744838853409, + "grad_norm": 0.7374882698059082, + "learning_rate": 5.954255161146591e-06, + "loss": 0.0631, "step": 27240 }, { - "epoch": 2.023615030447052, - "grad_norm": 1.3116739988327026, - "learning_rate": 1.785830981731769e-05, - "loss": 0.0569, + "epoch": 4.047230060894104, + "grad_norm": 0.6501604318618774, + "learning_rate": 5.952769939105897e-06, + "loss": 0.0814, "step": 27250 }, { - "epoch": 2.0243576414673994, - "grad_norm": 1.8456593751907349, - "learning_rate": 1.7853854151195604e-05, - "loss": 0.0873, + "epoch": 4.048715282934799, + "grad_norm": 0.6356256604194641, + "learning_rate": 5.9512847170652025e-06, + "loss": 0.0734, "step": 27260 }, { - "epoch": 2.025100252487747, - "grad_norm": 1.7277987003326416, - "learning_rate": 1.784939848507352e-05, - "loss": 0.0576, + "epoch": 4.050200504975494, + "grad_norm": 1.1191198825836182, + "learning_rate": 5.949799495024506e-06, + "loss": 0.0536, "step": 27270 }, { - "epoch": 2.0258428635080943, - "grad_norm": 0.4091399013996124, - "learning_rate": 1.7844942818951434e-05, - "loss": 0.0437, + "epoch": 4.051685727016189, + "grad_norm": 1.2817082405090332, + "learning_rate": 5.948314272983811e-06, + "loss": 0.0651, "step": 27280 }, { - "epoch": 2.026585474528442, - "grad_norm": 3.058016300201416, - "learning_rate": 1.784048715282935e-05, - "loss": 0.0525, + "epoch": 4.053170949056884, + "grad_norm": 0.793324887752533, + "learning_rate": 5.946829050943117e-06, + "loss": 0.0629, "step": 27290 }, { - "epoch": 2.0273280855487896, - "grad_norm": 3.718642234802246, - "learning_rate": 1.7836031486707264e-05, - "loss": 0.0712, + "epoch": 4.054656171097579, + "grad_norm": 0.8034148216247559, + "learning_rate": 5.945343828902421e-06, + "loss": 0.0558, "step": 27300 }, { - "epoch": 2.028070696569137, - "grad_norm": 2.157290458679199, - "learning_rate": 1.783157582058518e-05, - "loss": 0.0617, + "epoch": 4.056141393138274, + "grad_norm": 0.8231066465377808, + "learning_rate": 5.943858606861726e-06, + "loss": 0.0643, "step": 27310 }, { - "epoch": 2.0288133075894845, - "grad_norm": 2.4551494121551514, - "learning_rate": 1.782712015446309e-05, - "loss": 0.0888, + "epoch": 4.057626615178969, + "grad_norm": 0.7635292410850525, + "learning_rate": 5.942373384821031e-06, + "loss": 0.076, "step": 27320 }, { - "epoch": 2.029555918609832, - "grad_norm": 1.5772738456726074, - "learning_rate": 1.7822664488341006e-05, - "loss": 0.0713, + "epoch": 4.059111837219664, + "grad_norm": 0.9660631418228149, + "learning_rate": 5.940888162780336e-06, + "loss": 0.0491, "step": 27330 }, { - "epoch": 2.03029852963018, - "grad_norm": 0.9461155533790588, - "learning_rate": 1.7818208822218924e-05, - "loss": 0.0612, + "epoch": 4.06059705926036, + "grad_norm": 1.2343653440475464, + "learning_rate": 5.939402940739641e-06, + "loss": 0.089, "step": 27340 }, { - "epoch": 2.0310411406505273, - "grad_norm": 1.6049461364746094, - "learning_rate": 1.7813753156096836e-05, - "loss": 0.063, + "epoch": 4.062082281301055, + "grad_norm": 1.1457990407943726, + "learning_rate": 5.937917718698946e-06, + "loss": 0.0544, "step": 27350 }, { - "epoch": 2.0317837516708748, - "grad_norm": 0.41540223360061646, - "learning_rate": 1.780929748997475e-05, - "loss": 0.064, + "epoch": 4.0635675033417495, + "grad_norm": 0.5646716356277466, + "learning_rate": 5.936432496658251e-06, + "loss": 0.0587, "step": 27360 }, { - "epoch": 2.032526362691222, - "grad_norm": 1.894874095916748, - "learning_rate": 1.780484182385267e-05, - "loss": 0.0669, + "epoch": 4.065052725382444, + "grad_norm": 0.5194732546806335, + "learning_rate": 5.934947274617556e-06, + "loss": 0.0533, "step": 27370 }, { - "epoch": 2.0332689737115697, - "grad_norm": 0.4419223964214325, - "learning_rate": 1.780038615773058e-05, - "loss": 0.0402, + "epoch": 4.066537947423139, + "grad_norm": 1.2401492595672607, + "learning_rate": 5.933462052576861e-06, + "loss": 0.0585, "step": 27380 }, { - "epoch": 2.0340115847319176, - "grad_norm": 0.962150514125824, - "learning_rate": 1.7795930491608496e-05, - "loss": 0.0849, + "epoch": 4.068023169463835, + "grad_norm": 0.6900237202644348, + "learning_rate": 5.931976830536166e-06, + "loss": 0.0612, "step": 27390 }, { - "epoch": 2.034754195752265, - "grad_norm": 1.1691762208938599, - "learning_rate": 1.779147482548641e-05, - "loss": 0.0736, + "epoch": 4.06950839150453, + "grad_norm": 1.1152747869491577, + "learning_rate": 5.93049160849547e-06, + "loss": 0.0562, "step": 27400 }, { - "epoch": 2.0354968067726125, - "grad_norm": 1.5563279390335083, - "learning_rate": 1.7787019159364326e-05, - "loss": 0.0569, + "epoch": 4.070993613545225, + "grad_norm": 1.0916218757629395, + "learning_rate": 5.929006386454775e-06, + "loss": 0.0718, "step": 27410 }, { - "epoch": 2.03623941779296, - "grad_norm": 2.58371639251709, - "learning_rate": 1.778256349324224e-05, - "loss": 0.0672, + "epoch": 4.07247883558592, + "grad_norm": 0.3414757549762726, + "learning_rate": 5.927521164414081e-06, + "loss": 0.0556, "step": 27420 }, { - "epoch": 2.0369820288133074, - "grad_norm": 0.5150777101516724, - "learning_rate": 1.7778107827120152e-05, - "loss": 0.0833, + "epoch": 4.073964057626615, + "grad_norm": 0.3224826753139496, + "learning_rate": 5.926035942373385e-06, + "loss": 0.0722, "step": 27430 }, { - "epoch": 2.0377246398336553, - "grad_norm": 2.1422712802886963, - "learning_rate": 1.777365216099807e-05, - "loss": 0.0657, + "epoch": 4.0754492796673105, + "grad_norm": 0.6068642139434814, + "learning_rate": 5.92455072033269e-06, + "loss": 0.0645, "step": 27440 }, { - "epoch": 2.0384672508540027, - "grad_norm": 1.660101294517517, - "learning_rate": 1.7769196494875986e-05, - "loss": 0.0678, + "epoch": 4.076934501708005, + "grad_norm": 0.46677640080451965, + "learning_rate": 5.923065498291995e-06, + "loss": 0.0769, "step": 27450 }, { - "epoch": 2.03920986187435, - "grad_norm": 2.03857159614563, - "learning_rate": 1.7764740828753897e-05, - "loss": 0.0801, + "epoch": 4.0784197237487, + "grad_norm": 0.8329014778137207, + "learning_rate": 5.9215802762513e-06, + "loss": 0.0581, "step": 27460 }, { - "epoch": 2.0399524728946976, - "grad_norm": 1.208047866821289, - "learning_rate": 1.7760285162631816e-05, - "loss": 0.0321, + "epoch": 4.079904945789395, + "grad_norm": 0.8308506011962891, + "learning_rate": 5.920095054210605e-06, + "loss": 0.0844, "step": 27470 }, { - "epoch": 2.0406950839150455, - "grad_norm": 0.9345359206199646, - "learning_rate": 1.775582949650973e-05, - "loss": 0.0667, + "epoch": 4.081390167830091, + "grad_norm": 0.7580461502075195, + "learning_rate": 5.91860983216991e-06, + "loss": 0.0577, "step": 27480 }, { - "epoch": 2.041437694935393, - "grad_norm": 2.416853427886963, - "learning_rate": 1.7751373830387642e-05, - "loss": 0.089, + "epoch": 4.082875389870786, + "grad_norm": 0.7593045234680176, + "learning_rate": 5.917124610129215e-06, + "loss": 0.0696, "step": 27490 }, { - "epoch": 2.0421803059557404, - "grad_norm": 0.6501384377479553, - "learning_rate": 1.7746918164265557e-05, - "loss": 0.0976, + "epoch": 4.084360611911481, + "grad_norm": 1.2238883972167969, + "learning_rate": 5.91563938808852e-06, + "loss": 0.0808, "step": 27500 }, { - "epoch": 2.042922916976088, - "grad_norm": 1.3015940189361572, - "learning_rate": 1.7742462498143472e-05, - "loss": 0.0723, + "epoch": 4.085845833952176, + "grad_norm": 0.7654632329940796, + "learning_rate": 5.914154166047824e-06, + "loss": 0.0598, "step": 27510 }, { - "epoch": 2.0436655279964353, - "grad_norm": 1.5806890726089478, - "learning_rate": 1.7738006832021387e-05, - "loss": 0.0675, + "epoch": 4.087331055992871, + "grad_norm": 1.2114931344985962, + "learning_rate": 5.91266894400713e-06, + "loss": 0.0651, "step": 27520 }, { - "epoch": 2.044408139016783, - "grad_norm": 1.8144307136535645, - "learning_rate": 1.7733551165899302e-05, - "loss": 0.0506, + "epoch": 4.088816278033566, + "grad_norm": 0.4927174746990204, + "learning_rate": 5.911183721966435e-06, + "loss": 0.0679, "step": 27530 }, { - "epoch": 2.0451507500371306, - "grad_norm": 1.104903221130371, - "learning_rate": 1.7729095499777217e-05, - "loss": 0.0819, + "epoch": 4.090301500074261, + "grad_norm": 1.2465150356292725, + "learning_rate": 5.909698499925739e-06, + "loss": 0.0568, "step": 27540 }, { - "epoch": 2.045893361057478, - "grad_norm": 2.584608554840088, - "learning_rate": 1.7724639833655132e-05, - "loss": 0.0699, + "epoch": 4.091786722114956, + "grad_norm": 0.47399216890335083, + "learning_rate": 5.908213277885045e-06, + "loss": 0.0677, "step": 27550 }, { - "epoch": 2.0466359720778255, - "grad_norm": 2.7305595874786377, - "learning_rate": 1.7720184167533047e-05, - "loss": 0.0954, + "epoch": 4.093271944155651, + "grad_norm": 0.7320268750190735, + "learning_rate": 5.90672805584435e-06, + "loss": 0.0872, "step": 27560 }, { - "epoch": 2.047378583098173, - "grad_norm": 1.475791573524475, - "learning_rate": 1.771572850141096e-05, - "loss": 0.0682, + "epoch": 4.094757166196346, + "grad_norm": 0.4289158582687378, + "learning_rate": 5.9052428338036535e-06, + "loss": 0.0718, "step": 27570 }, { - "epoch": 2.048121194118521, - "grad_norm": 0.9141472578048706, - "learning_rate": 1.7711272835288877e-05, - "loss": 0.0583, + "epoch": 4.096242388237042, + "grad_norm": 0.8013712763786316, + "learning_rate": 5.903757611762959e-06, + "loss": 0.0584, "step": 27580 }, { - "epoch": 2.0488638051388683, - "grad_norm": 1.4412683248519897, - "learning_rate": 1.7706817169166792e-05, - "loss": 0.0739, + "epoch": 4.097727610277737, + "grad_norm": 0.7357133030891418, + "learning_rate": 5.902272389722264e-06, + "loss": 0.0568, "step": 27590 }, { - "epoch": 2.049606416159216, - "grad_norm": 2.051802158355713, - "learning_rate": 1.7702361503044704e-05, - "loss": 0.0864, + "epoch": 4.099212832318432, + "grad_norm": 0.7965511083602905, + "learning_rate": 5.9007871676815685e-06, + "loss": 0.0616, "step": 27600 }, { - "epoch": 2.0503490271795632, - "grad_norm": 1.61028254032135, - "learning_rate": 1.7697905836922622e-05, - "loss": 0.0379, + "epoch": 4.1006980543591265, + "grad_norm": 0.9441985487937927, + "learning_rate": 5.899301945640874e-06, + "loss": 0.0695, "step": 27610 }, { - "epoch": 2.0510916381999107, - "grad_norm": 1.2123854160308838, - "learning_rate": 1.7693450170800534e-05, - "loss": 0.0821, + "epoch": 4.102183276399821, + "grad_norm": 0.781085193157196, + "learning_rate": 5.897816723600178e-06, + "loss": 0.0538, "step": 27620 }, { - "epoch": 2.0518342492202586, - "grad_norm": 2.0213537216186523, - "learning_rate": 1.768899450467845e-05, - "loss": 0.0724, + "epoch": 4.103668498440517, + "grad_norm": 0.7707614302635193, + "learning_rate": 5.8963315015594835e-06, + "loss": 0.0568, "step": 27630 }, { - "epoch": 2.052576860240606, - "grad_norm": 1.507546067237854, - "learning_rate": 1.7684538838556367e-05, - "loss": 0.0844, + "epoch": 4.105153720481212, + "grad_norm": 0.6148474216461182, + "learning_rate": 5.894846279518789e-06, + "loss": 0.0533, "step": 27640 }, { - "epoch": 2.0533194712609535, - "grad_norm": 1.3332023620605469, - "learning_rate": 1.768008317243428e-05, - "loss": 0.0679, + "epoch": 4.106638942521907, + "grad_norm": 0.46605074405670166, + "learning_rate": 5.893361057478093e-06, + "loss": 0.0554, "step": 27650 }, { - "epoch": 2.054062082281301, - "grad_norm": 1.5484570264816284, - "learning_rate": 1.7675627506312194e-05, - "loss": 0.0898, + "epoch": 4.108124164562602, + "grad_norm": 1.4958701133728027, + "learning_rate": 5.8918758354373985e-06, + "loss": 0.0696, "step": 27660 }, { - "epoch": 2.0548046933016484, - "grad_norm": 1.267923355102539, - "learning_rate": 1.767117184019011e-05, - "loss": 0.1073, + "epoch": 4.109609386603297, + "grad_norm": 0.6936403512954712, + "learning_rate": 5.890390613396704e-06, + "loss": 0.0623, "step": 27670 }, { - "epoch": 2.0555473043219963, - "grad_norm": 2.02040958404541, - "learning_rate": 1.7666716174068024e-05, - "loss": 0.0747, + "epoch": 4.111094608643993, + "grad_norm": 1.3368117809295654, + "learning_rate": 5.888905391356008e-06, + "loss": 0.0742, "step": 27680 }, { - "epoch": 2.0562899153423437, - "grad_norm": 0.6762095093727112, - "learning_rate": 1.766226050794594e-05, - "loss": 0.0501, + "epoch": 4.1125798306846875, + "grad_norm": 1.0717170238494873, + "learning_rate": 5.8874201693153135e-06, + "loss": 0.0521, "step": 27690 }, { - "epoch": 2.057032526362691, - "grad_norm": 2.8425498008728027, - "learning_rate": 1.7657804841823854e-05, - "loss": 0.0708, + "epoch": 4.114065052725382, + "grad_norm": 1.2059624195098877, + "learning_rate": 5.885934947274619e-06, + "loss": 0.0775, "step": 27700 }, { - "epoch": 2.0577751373830386, - "grad_norm": 2.2198352813720703, - "learning_rate": 1.765334917570177e-05, - "loss": 0.0625, + "epoch": 4.115550274766077, + "grad_norm": 0.5689356923103333, + "learning_rate": 5.884449725233923e-06, + "loss": 0.0597, "step": 27710 }, { - "epoch": 2.0585177484033865, - "grad_norm": 0.9430508613586426, - "learning_rate": 1.7648893509579684e-05, - "loss": 0.0844, + "epoch": 4.117035496806773, + "grad_norm": 1.1349416971206665, + "learning_rate": 5.8829645031932285e-06, + "loss": 0.0588, "step": 27720 }, { - "epoch": 2.059260359423734, - "grad_norm": 3.4453015327453613, - "learning_rate": 1.7644437843457595e-05, - "loss": 0.0821, + "epoch": 4.118520718847468, + "grad_norm": 0.9291647672653198, + "learning_rate": 5.881479281152532e-06, + "loss": 0.0506, "step": 27730 }, { - "epoch": 2.0600029704440814, - "grad_norm": 1.2415636777877808, - "learning_rate": 1.763998217733551e-05, - "loss": 0.0574, + "epoch": 4.120005940888163, + "grad_norm": 0.9654160141944885, + "learning_rate": 5.879994059111837e-06, + "loss": 0.0616, "step": 27740 }, { - "epoch": 2.060745581464429, - "grad_norm": 1.088160753250122, - "learning_rate": 1.763552651121343e-05, - "loss": 0.0639, + "epoch": 4.121491162928858, + "grad_norm": 0.6678839921951294, + "learning_rate": 5.878508837071143e-06, + "loss": 0.0505, "step": 27750 }, { - "epoch": 2.0614881924847763, - "grad_norm": 0.9937611818313599, - "learning_rate": 1.763107084509134e-05, - "loss": 0.0905, + "epoch": 4.122976384969553, + "grad_norm": 0.7414665222167969, + "learning_rate": 5.877023615030447e-06, + "loss": 0.0543, "step": 27760 }, { - "epoch": 2.062230803505124, - "grad_norm": 0.7093477845191956, - "learning_rate": 1.7626615178969255e-05, - "loss": 0.0714, + "epoch": 4.124461607010248, + "grad_norm": 1.0471031665802002, + "learning_rate": 5.875538392989752e-06, + "loss": 0.0598, "step": 27770 }, { - "epoch": 2.0629734145254717, - "grad_norm": 2.274669885635376, - "learning_rate": 1.7622159512847174e-05, - "loss": 0.0692, + "epoch": 4.125946829050943, + "grad_norm": 0.4374360740184784, + "learning_rate": 5.874053170949058e-06, + "loss": 0.0503, "step": 27780 }, { - "epoch": 2.063716025545819, - "grad_norm": 1.7703497409820557, - "learning_rate": 1.7617703846725085e-05, - "loss": 0.0621, + "epoch": 4.127432051091638, + "grad_norm": 1.4006041288375854, + "learning_rate": 5.872567948908362e-06, + "loss": 0.0502, "step": 27790 }, { - "epoch": 2.0644586365661666, - "grad_norm": 0.3393421173095703, - "learning_rate": 1.7613248180603e-05, - "loss": 0.0528, + "epoch": 4.128917273132333, + "grad_norm": 0.6150054335594177, + "learning_rate": 5.871082726867667e-06, + "loss": 0.0639, "step": 27800 }, { - "epoch": 2.065201247586514, - "grad_norm": 1.3570091724395752, - "learning_rate": 1.760879251448092e-05, - "loss": 0.0585, + "epoch": 4.130402495173028, + "grad_norm": 0.6856205463409424, + "learning_rate": 5.869597504826973e-06, + "loss": 0.0635, "step": 27810 }, { - "epoch": 2.065943858606862, - "grad_norm": 1.6953606605529785, - "learning_rate": 1.760433684835883e-05, - "loss": 0.0678, + "epoch": 4.131887717213724, + "grad_norm": 0.5962873101234436, + "learning_rate": 5.868112282786277e-06, + "loss": 0.0538, "step": 27820 }, { - "epoch": 2.0666864696272094, - "grad_norm": 0.6332347393035889, - "learning_rate": 1.7599881182236745e-05, - "loss": 0.0474, + "epoch": 4.133372939254419, + "grad_norm": 1.0894391536712646, + "learning_rate": 5.866627060745582e-06, + "loss": 0.053, "step": 27830 }, { - "epoch": 2.067429080647557, - "grad_norm": 4.120887756347656, - "learning_rate": 1.7595425516114657e-05, - "loss": 0.1038, + "epoch": 4.134858161295114, + "grad_norm": 0.7894336581230164, + "learning_rate": 5.865141838704887e-06, + "loss": 0.0634, "step": 27840 }, { - "epoch": 2.0681716916679043, - "grad_norm": 2.6775243282318115, - "learning_rate": 1.7590969849992575e-05, - "loss": 0.0786, + "epoch": 4.1363433833358085, + "grad_norm": 0.5546037554740906, + "learning_rate": 5.863656616664192e-06, + "loss": 0.057, "step": 27850 }, { - "epoch": 2.0689143026882517, - "grad_norm": 1.4522796869277954, - "learning_rate": 1.758651418387049e-05, - "loss": 0.0603, + "epoch": 4.137828605376503, + "grad_norm": 0.5174780488014221, + "learning_rate": 5.862171394623497e-06, + "loss": 0.0778, "step": 27860 }, { - "epoch": 2.0696569137085996, - "grad_norm": 1.7313041687011719, - "learning_rate": 1.7582058517748402e-05, - "loss": 0.0938, + "epoch": 4.139313827417199, + "grad_norm": 0.5153068900108337, + "learning_rate": 5.860686172582801e-06, + "loss": 0.0575, "step": 27870 }, { - "epoch": 2.070399524728947, - "grad_norm": 0.7014159560203552, - "learning_rate": 1.757760285162632e-05, - "loss": 0.0905, + "epoch": 4.140799049457894, + "grad_norm": 0.936219334602356, + "learning_rate": 5.859200950542106e-06, + "loss": 0.0567, "step": 27880 }, { - "epoch": 2.0711421357492945, - "grad_norm": 0.5226468443870544, - "learning_rate": 1.7573147185504235e-05, - "loss": 0.0669, + "epoch": 4.142284271498589, + "grad_norm": 0.8427369594573975, + "learning_rate": 5.857715728501412e-06, + "loss": 0.0647, "step": 27890 }, { - "epoch": 2.071884746769642, - "grad_norm": 0.4912814795970917, - "learning_rate": 1.7568691519382147e-05, - "loss": 0.0562, + "epoch": 4.143769493539284, + "grad_norm": 0.8495630621910095, + "learning_rate": 5.856230506460716e-06, + "loss": 0.0648, "step": 27900 }, { - "epoch": 2.0726273577899894, - "grad_norm": 0.46344590187072754, - "learning_rate": 1.7564235853260062e-05, - "loss": 0.0722, + "epoch": 4.145254715579979, + "grad_norm": 1.1603466272354126, + "learning_rate": 5.854745284420021e-06, + "loss": 0.0584, "step": 27910 }, { - "epoch": 2.0733699688103373, - "grad_norm": 0.5316863656044006, - "learning_rate": 1.7559780187137977e-05, - "loss": 0.0782, + "epoch": 4.146739937620675, + "grad_norm": 0.9675669074058533, + "learning_rate": 5.853260062379326e-06, + "loss": 0.051, "step": 27920 }, { - "epoch": 2.0741125798306848, - "grad_norm": 1.1357983350753784, - "learning_rate": 1.7555324521015892e-05, - "loss": 0.0679, + "epoch": 4.1482251596613695, + "grad_norm": 0.7146968841552734, + "learning_rate": 5.851774840338631e-06, + "loss": 0.0536, "step": 27930 }, { - "epoch": 2.074855190851032, - "grad_norm": 0.679740309715271, - "learning_rate": 1.7550868854893807e-05, - "loss": 0.0618, + "epoch": 4.149710381702064, + "grad_norm": 0.6189327239990234, + "learning_rate": 5.850289618297936e-06, + "loss": 0.0807, "step": 27940 }, { - "epoch": 2.0755978018713797, - "grad_norm": 1.716489553451538, - "learning_rate": 1.7546413188771722e-05, - "loss": 0.1001, + "epoch": 4.151195603742759, + "grad_norm": 1.1867696046829224, + "learning_rate": 5.848804396257241e-06, + "loss": 0.0652, "step": 27950 }, { - "epoch": 2.076340412891727, - "grad_norm": 1.4694855213165283, - "learning_rate": 1.7541957522649637e-05, - "loss": 0.068, + "epoch": 4.152680825783454, + "grad_norm": 1.0013492107391357, + "learning_rate": 5.847319174216546e-06, + "loss": 0.0767, "step": 27960 }, { - "epoch": 2.077083023912075, - "grad_norm": 2.162365436553955, - "learning_rate": 1.7537501856527552e-05, - "loss": 0.0555, + "epoch": 4.15416604782415, + "grad_norm": 0.9447083473205566, + "learning_rate": 5.845833952175851e-06, + "loss": 0.0527, "step": 27970 }, { - "epoch": 2.0778256349324224, - "grad_norm": 1.0880649089813232, - "learning_rate": 1.7533046190405463e-05, - "loss": 0.0558, + "epoch": 4.155651269864845, + "grad_norm": 0.6696498990058899, + "learning_rate": 5.844348730135156e-06, + "loss": 0.0426, "step": 27980 }, { - "epoch": 2.07856824595277, - "grad_norm": 2.1188676357269287, - "learning_rate": 1.7528590524283382e-05, - "loss": 0.0862, + "epoch": 4.15713649190554, + "grad_norm": 0.5020086169242859, + "learning_rate": 5.842863508094461e-06, + "loss": 0.0635, "step": 27990 }, { - "epoch": 2.0793108569731173, - "grad_norm": 1.4988460540771484, - "learning_rate": 1.7524134858161297e-05, - "loss": 0.0816, + "epoch": 4.158621713946235, + "grad_norm": 0.3135513961315155, + "learning_rate": 5.841378286053766e-06, + "loss": 0.0669, "step": 28000 }, { - "epoch": 2.080053467993465, - "grad_norm": 0.9901124238967896, - "learning_rate": 1.751967919203921e-05, - "loss": 0.0772, + "epoch": 4.16010693598693, + "grad_norm": 0.532892107963562, + "learning_rate": 5.839893064013071e-06, + "loss": 0.0479, "step": 28010 }, { - "epoch": 2.0807960790138127, - "grad_norm": 1.7967792749404907, - "learning_rate": 1.7515223525917127e-05, - "loss": 0.062, + "epoch": 4.161592158027625, + "grad_norm": 0.9354525804519653, + "learning_rate": 5.838407841972376e-06, + "loss": 0.0716, "step": 28020 }, { - "epoch": 2.08153869003416, - "grad_norm": 1.3273664712905884, - "learning_rate": 1.751076785979504e-05, - "loss": 0.0599, + "epoch": 4.16307738006832, + "grad_norm": 0.5970202684402466, + "learning_rate": 5.8369226199316795e-06, + "loss": 0.0754, "step": 28030 }, { - "epoch": 2.0822813010545076, - "grad_norm": 2.504648447036743, - "learning_rate": 1.7506312193672953e-05, - "loss": 0.0437, + "epoch": 4.164562602109015, + "grad_norm": 0.3913607597351074, + "learning_rate": 5.835437397890985e-06, + "loss": 0.0508, "step": 28040 }, { - "epoch": 2.083023912074855, - "grad_norm": 1.477177381515503, - "learning_rate": 1.7501856527550872e-05, - "loss": 0.0927, + "epoch": 4.16604782414971, + "grad_norm": 0.6784725785255432, + "learning_rate": 5.83395217585029e-06, + "loss": 0.065, "step": 28050 }, { - "epoch": 2.083766523095203, - "grad_norm": 1.7123514413833618, - "learning_rate": 1.7497400861428783e-05, - "loss": 0.0999, + "epoch": 4.167533046190406, + "grad_norm": 0.5708526968955994, + "learning_rate": 5.8324669538095944e-06, + "loss": 0.0527, "step": 28060 }, { - "epoch": 2.0845091341155504, - "grad_norm": 0.7901507616043091, - "learning_rate": 1.74929451953067e-05, - "loss": 0.0681, + "epoch": 4.169018268231101, + "grad_norm": 0.23578904569149017, + "learning_rate": 5.8309817317689e-06, + "loss": 0.0572, "step": 28070 }, { - "epoch": 2.085251745135898, - "grad_norm": 0.9315057396888733, - "learning_rate": 1.7488489529184613e-05, - "loss": 0.045, + "epoch": 4.170503490271796, + "grad_norm": 0.5932421088218689, + "learning_rate": 5.829496509728205e-06, + "loss": 0.0754, "step": 28080 }, { - "epoch": 2.0859943561562453, - "grad_norm": 0.80745929479599, - "learning_rate": 1.748403386306253e-05, - "loss": 0.0729, + "epoch": 4.171988712312491, + "grad_norm": 0.3044351041316986, + "learning_rate": 5.8280112876875094e-06, + "loss": 0.0674, "step": 28090 }, { - "epoch": 2.0867369671765927, - "grad_norm": 1.3841748237609863, - "learning_rate": 1.7479578196940443e-05, - "loss": 0.0651, + "epoch": 4.1734739343531855, + "grad_norm": 0.8977261185646057, + "learning_rate": 5.826526065646815e-06, + "loss": 0.0568, "step": 28100 }, { - "epoch": 2.0874795781969406, - "grad_norm": 4.800222873687744, - "learning_rate": 1.747512253081836e-05, - "loss": 0.0764, + "epoch": 4.174959156393881, + "grad_norm": 0.9061443209648132, + "learning_rate": 5.82504084360612e-06, + "loss": 0.0864, "step": 28110 }, { - "epoch": 2.088222189217288, - "grad_norm": 1.6602140665054321, - "learning_rate": 1.7470666864696273e-05, - "loss": 0.0666, + "epoch": 4.176444378434576, + "grad_norm": 0.8516085147857666, + "learning_rate": 5.8235556215654244e-06, + "loss": 0.0659, "step": 28120 }, { - "epoch": 2.0889648002376355, - "grad_norm": 1.5725599527359009, - "learning_rate": 1.7466211198574188e-05, - "loss": 0.0759, + "epoch": 4.177929600475271, + "grad_norm": 0.7346743941307068, + "learning_rate": 5.82207039952473e-06, + "loss": 0.0557, "step": 28130 }, { - "epoch": 2.089707411257983, - "grad_norm": 2.319767713546753, - "learning_rate": 1.74617555324521e-05, - "loss": 0.1133, + "epoch": 4.179414822515966, + "grad_norm": 0.5565342903137207, + "learning_rate": 5.820585177484034e-06, + "loss": 0.0747, "step": 28140 }, { - "epoch": 2.0904500222783304, - "grad_norm": 2.916980266571045, - "learning_rate": 1.7457299866330015e-05, - "loss": 0.0703, + "epoch": 4.180900044556661, + "grad_norm": 1.4357632398605347, + "learning_rate": 5.8190999554433394e-06, + "loss": 0.0723, "step": 28150 }, { - "epoch": 2.0911926332986783, - "grad_norm": 2.074702501296997, - "learning_rate": 1.7452844200207933e-05, - "loss": 0.0736, + "epoch": 4.182385266597357, + "grad_norm": 0.7222462296485901, + "learning_rate": 5.817614733402645e-06, + "loss": 0.0552, "step": 28160 }, { - "epoch": 2.0919352443190258, - "grad_norm": 1.4027667045593262, - "learning_rate": 1.7448388534085845e-05, - "loss": 0.0675, + "epoch": 4.1838704886380516, + "grad_norm": 1.0323468446731567, + "learning_rate": 5.816129511361948e-06, + "loss": 0.0525, "step": 28170 }, { - "epoch": 2.0926778553393732, - "grad_norm": 2.333289861679077, - "learning_rate": 1.744393286796376e-05, - "loss": 0.0573, + "epoch": 4.1853557106787465, + "grad_norm": 0.4670025110244751, + "learning_rate": 5.8146442893212544e-06, + "loss": 0.0661, "step": 28180 }, { - "epoch": 2.0934204663597207, - "grad_norm": 1.7453340291976929, - "learning_rate": 1.7439477201841678e-05, - "loss": 0.0922, + "epoch": 4.186840932719441, + "grad_norm": 1.2583537101745605, + "learning_rate": 5.81315906728056e-06, + "loss": 0.0655, "step": 28190 }, { - "epoch": 2.094163077380068, - "grad_norm": 0.6930978894233704, - "learning_rate": 1.743502153571959e-05, - "loss": 0.0847, + "epoch": 4.188326154760136, + "grad_norm": 0.6650807857513428, + "learning_rate": 5.811673845239863e-06, + "loss": 0.0748, "step": 28200 }, { - "epoch": 2.094905688400416, - "grad_norm": 1.3762452602386475, - "learning_rate": 1.7430565869597505e-05, - "loss": 0.1109, + "epoch": 4.189811376800832, + "grad_norm": 1.1885240077972412, + "learning_rate": 5.8101886231991686e-06, + "loss": 0.0543, "step": 28210 }, { - "epoch": 2.0956482994207635, - "grad_norm": 0.3616078197956085, - "learning_rate": 1.7426110203475423e-05, - "loss": 0.0753, + "epoch": 4.191296598841527, + "grad_norm": 0.7857943773269653, + "learning_rate": 5.808703401158474e-06, + "loss": 0.0827, "step": 28220 }, { - "epoch": 2.096390910441111, - "grad_norm": 2.1487832069396973, - "learning_rate": 1.7421654537353335e-05, - "loss": 0.0828, + "epoch": 4.192781820882222, + "grad_norm": 0.4552346169948578, + "learning_rate": 5.807218179117778e-06, + "loss": 0.0584, "step": 28230 }, { - "epoch": 2.0971335214614584, - "grad_norm": 0.9581325054168701, - "learning_rate": 1.741719887123125e-05, - "loss": 0.0691, + "epoch": 4.194267042922917, + "grad_norm": 0.7268344163894653, + "learning_rate": 5.8057329570770836e-06, + "loss": 0.0529, "step": 28240 }, { - "epoch": 2.097876132481806, - "grad_norm": 2.8964858055114746, - "learning_rate": 1.741274320510916e-05, - "loss": 0.0917, + "epoch": 4.195752264963612, + "grad_norm": 1.4268473386764526, + "learning_rate": 5.804247735036388e-06, + "loss": 0.0554, "step": 28250 }, { - "epoch": 2.0986187435021537, - "grad_norm": 0.576937198638916, - "learning_rate": 1.740828753898708e-05, - "loss": 0.0624, + "epoch": 4.197237487004307, + "grad_norm": 0.6411811709403992, + "learning_rate": 5.802762512995693e-06, + "loss": 0.0548, "step": 28260 }, { - "epoch": 2.099361354522501, - "grad_norm": 1.5471432209014893, - "learning_rate": 1.7403831872864995e-05, - "loss": 0.0873, + "epoch": 4.198722709045002, + "grad_norm": 0.8503941297531128, + "learning_rate": 5.8012772909549986e-06, + "loss": 0.0649, "step": 28270 }, { - "epoch": 2.1001039655428486, - "grad_norm": 1.0015627145767212, - "learning_rate": 1.7399376206742906e-05, - "loss": 0.0765, + "epoch": 4.200207931085697, + "grad_norm": 0.8192022442817688, + "learning_rate": 5.799792068914303e-06, + "loss": 0.0422, "step": 28280 }, { - "epoch": 2.100846576563196, - "grad_norm": 2.5197479724884033, - "learning_rate": 1.7394920540620825e-05, - "loss": 0.0679, + "epoch": 4.201693153126392, + "grad_norm": 0.9261953234672546, + "learning_rate": 5.798306846873608e-06, + "loss": 0.0596, "step": 28290 }, { - "epoch": 2.101589187583544, - "grad_norm": 1.3474407196044922, - "learning_rate": 1.739046487449874e-05, - "loss": 0.0497, + "epoch": 4.203178375167088, + "grad_norm": 0.4752441346645355, + "learning_rate": 5.7968216248329136e-06, + "loss": 0.0722, "step": 28300 }, { - "epoch": 2.1023317986038914, - "grad_norm": 1.8976625204086304, - "learning_rate": 1.738600920837665e-05, - "loss": 0.0885, + "epoch": 4.204663597207783, + "grad_norm": 0.9527250528335571, + "learning_rate": 5.795336402792218e-06, + "loss": 0.0729, "step": 28310 }, { - "epoch": 2.103074409624239, - "grad_norm": 3.648165464401245, - "learning_rate": 1.7381553542254566e-05, - "loss": 0.0964, + "epoch": 4.206148819248478, + "grad_norm": 1.0327355861663818, + "learning_rate": 5.793851180751523e-06, + "loss": 0.0486, "step": 28320 }, { - "epoch": 2.1038170206445863, - "grad_norm": 0.9834181070327759, - "learning_rate": 1.7377097876132485e-05, - "loss": 0.0524, + "epoch": 4.207634041289173, + "grad_norm": 0.6200602054595947, + "learning_rate": 5.7923659587108285e-06, + "loss": 0.0771, "step": 28330 }, { - "epoch": 2.1045596316649338, - "grad_norm": 2.057588815689087, - "learning_rate": 1.7372642210010396e-05, - "loss": 0.0609, + "epoch": 4.2091192633298675, + "grad_norm": 0.8519974946975708, + "learning_rate": 5.790880736670132e-06, + "loss": 0.0919, "step": 28340 }, { - "epoch": 2.1053022426852817, - "grad_norm": 1.9514938592910767, - "learning_rate": 1.736818654388831e-05, - "loss": 0.09, + "epoch": 4.210604485370563, + "grad_norm": 0.9221513271331787, + "learning_rate": 5.789395514629437e-06, + "loss": 0.0481, "step": 28350 }, { - "epoch": 2.106044853705629, - "grad_norm": 0.6397399306297302, - "learning_rate": 1.7363730877766226e-05, - "loss": 0.0805, + "epoch": 4.212089707411258, + "grad_norm": 0.6180176734924316, + "learning_rate": 5.787910292588742e-06, + "loss": 0.0488, "step": 28360 }, { - "epoch": 2.1067874647259766, - "grad_norm": 0.7287691831588745, - "learning_rate": 1.735927521164414e-05, - "loss": 0.0517, + "epoch": 4.213574929451953, + "grad_norm": 0.6846516728401184, + "learning_rate": 5.786425070548047e-06, + "loss": 0.0772, "step": 28370 }, { - "epoch": 2.107530075746324, - "grad_norm": 0.5326383113861084, - "learning_rate": 1.7354819545522056e-05, - "loss": 0.0314, + "epoch": 4.215060151492648, + "grad_norm": 0.6172364950180054, + "learning_rate": 5.784939848507352e-06, + "loss": 0.06, "step": 28380 }, { - "epoch": 2.1082726867666715, - "grad_norm": 0.252254843711853, - "learning_rate": 1.735036387939997e-05, - "loss": 0.0581, + "epoch": 4.216545373533343, + "grad_norm": 0.6358994841575623, + "learning_rate": 5.783454626466657e-06, + "loss": 0.0586, "step": 28390 }, { - "epoch": 2.1090152977870193, - "grad_norm": 3.9282045364379883, - "learning_rate": 1.7345908213277886e-05, - "loss": 0.0943, + "epoch": 4.218030595574039, + "grad_norm": 1.1505266427993774, + "learning_rate": 5.781969404425962e-06, + "loss": 0.0718, "step": 28400 }, { - "epoch": 2.109757908807367, - "grad_norm": 2.1069839000701904, - "learning_rate": 1.73414525471558e-05, - "loss": 0.0704, + "epoch": 4.219515817614734, + "grad_norm": 1.0869107246398926, + "learning_rate": 5.780484182385267e-06, + "loss": 0.0634, "step": 28410 }, { - "epoch": 2.1105005198277142, - "grad_norm": 1.8486924171447754, - "learning_rate": 1.7336996881033713e-05, - "loss": 0.07, + "epoch": 4.2210010396554285, + "grad_norm": 0.9144407510757446, + "learning_rate": 5.778998960344572e-06, + "loss": 0.0774, "step": 28420 }, { - "epoch": 2.1112431308480617, - "grad_norm": 1.9672880172729492, - "learning_rate": 1.733254121491163e-05, - "loss": 0.0827, + "epoch": 4.222486261696123, + "grad_norm": 1.1511156558990479, + "learning_rate": 5.777513738303877e-06, + "loss": 0.0768, "step": 28430 }, { - "epoch": 2.111985741868409, - "grad_norm": 0.7764392495155334, - "learning_rate": 1.7328085548789543e-05, - "loss": 0.056, + "epoch": 4.223971483736818, + "grad_norm": 0.756784200668335, + "learning_rate": 5.7760285162631816e-06, + "loss": 0.0727, "step": 28440 }, { - "epoch": 2.112728352888757, - "grad_norm": 1.0631473064422607, - "learning_rate": 1.7323629882667458e-05, - "loss": 0.0485, + "epoch": 4.225456705777514, + "grad_norm": 0.6658867001533508, + "learning_rate": 5.774543294222487e-06, + "loss": 0.0757, "step": 28450 }, { - "epoch": 2.1134709639091045, - "grad_norm": 1.6349713802337646, - "learning_rate": 1.7319174216545376e-05, - "loss": 0.0778, + "epoch": 4.226941927818209, + "grad_norm": 1.061873197555542, + "learning_rate": 5.773058072181792e-06, + "loss": 0.0628, "step": 28460 }, { - "epoch": 2.114213574929452, - "grad_norm": 2.0537021160125732, - "learning_rate": 1.7314718550423288e-05, - "loss": 0.072, + "epoch": 4.228427149858904, + "grad_norm": 1.0419481992721558, + "learning_rate": 5.7715728501410965e-06, + "loss": 0.0654, "step": 28470 }, { - "epoch": 2.1149561859497994, - "grad_norm": 2.0460641384124756, - "learning_rate": 1.7310262884301203e-05, - "loss": 0.0889, + "epoch": 4.229912371899599, + "grad_norm": 0.7116315364837646, + "learning_rate": 5.770087628100402e-06, + "loss": 0.0696, "step": 28480 }, { - "epoch": 2.115698796970147, - "grad_norm": 1.6083009243011475, - "learning_rate": 1.7305807218179118e-05, - "loss": 0.0685, + "epoch": 4.231397593940294, + "grad_norm": 0.6918877959251404, + "learning_rate": 5.768602406059707e-06, + "loss": 0.0511, "step": 28490 }, { - "epoch": 2.1164414079904947, - "grad_norm": 2.218975305557251, - "learning_rate": 1.7301351552057033e-05, - "loss": 0.0664, + "epoch": 4.2328828159809895, + "grad_norm": 0.6803731322288513, + "learning_rate": 5.767117184019011e-06, + "loss": 0.0639, "step": 28500 }, { - "epoch": 2.117184019010842, - "grad_norm": 1.3092341423034668, - "learning_rate": 1.7296895885934948e-05, - "loss": 0.0673, + "epoch": 4.234368038021684, + "grad_norm": 1.3853286504745483, + "learning_rate": 5.765631961978316e-06, + "loss": 0.0749, "step": 28510 }, { - "epoch": 2.1179266300311896, - "grad_norm": 1.1454136371612549, - "learning_rate": 1.7292440219812863e-05, - "loss": 0.0891, + "epoch": 4.235853260062379, + "grad_norm": 0.3931192457675934, + "learning_rate": 5.764146739937621e-06, + "loss": 0.0612, "step": 28520 }, { - "epoch": 2.118669241051537, - "grad_norm": 0.9033706188201904, - "learning_rate": 1.7287984553690778e-05, - "loss": 0.0671, + "epoch": 4.237338482103074, + "grad_norm": 1.1293772459030151, + "learning_rate": 5.762661517896926e-06, + "loss": 0.0627, "step": 28530 }, { - "epoch": 2.1194118520718845, - "grad_norm": 1.2205688953399658, - "learning_rate": 1.7283528887568693e-05, - "loss": 0.0726, + "epoch": 4.238823704143769, + "grad_norm": 0.9891557693481445, + "learning_rate": 5.761176295856231e-06, + "loss": 0.07, "step": 28540 }, { - "epoch": 2.1201544630922324, - "grad_norm": 1.5144611597061157, - "learning_rate": 1.7279073221446605e-05, - "loss": 0.0493, + "epoch": 4.240308926184465, + "grad_norm": 0.9113637208938599, + "learning_rate": 5.759691073815535e-06, + "loss": 0.0579, "step": 28550 }, { - "epoch": 2.12089707411258, - "grad_norm": 0.9755750894546509, - "learning_rate": 1.727461755532452e-05, - "loss": 0.07, + "epoch": 4.24179414822516, + "grad_norm": 0.3572257161140442, + "learning_rate": 5.758205851774841e-06, + "loss": 0.0655, "step": 28560 }, { - "epoch": 2.1216396851329273, - "grad_norm": 1.5496515035629272, - "learning_rate": 1.7270161889202438e-05, - "loss": 0.0532, + "epoch": 4.243279370265855, + "grad_norm": 0.4821026027202606, + "learning_rate": 5.756720629734146e-06, + "loss": 0.0341, "step": 28570 }, { - "epoch": 2.122382296153275, - "grad_norm": 1.8168680667877197, - "learning_rate": 1.726570622308035e-05, - "loss": 0.0682, + "epoch": 4.24476459230655, + "grad_norm": 0.6576929092407227, + "learning_rate": 5.75523540769345e-06, + "loss": 0.0759, "step": 28580 }, { - "epoch": 2.1231249071736222, - "grad_norm": 1.872641682624817, - "learning_rate": 1.7261250556958264e-05, - "loss": 0.0572, + "epoch": 4.2462498143472445, + "grad_norm": 0.7048593759536743, + "learning_rate": 5.753750185652756e-06, + "loss": 0.0598, "step": 28590 }, { - "epoch": 2.12386751819397, - "grad_norm": 1.2901333570480347, - "learning_rate": 1.7256794890836183e-05, - "loss": 0.0416, + "epoch": 4.24773503638794, + "grad_norm": 0.4831177592277527, + "learning_rate": 5.752264963612061e-06, + "loss": 0.0613, "step": 28600 }, { - "epoch": 2.1246101292143176, - "grad_norm": 1.7562663555145264, - "learning_rate": 1.7252339224714094e-05, - "loss": 0.0695, + "epoch": 4.249220258428635, + "grad_norm": 0.7408661842346191, + "learning_rate": 5.750779741571365e-06, + "loss": 0.0561, "step": 28610 }, { - "epoch": 2.125352740234665, - "grad_norm": 0.7023272514343262, - "learning_rate": 1.724788355859201e-05, - "loss": 0.047, + "epoch": 4.25070548046933, + "grad_norm": 1.274364948272705, + "learning_rate": 5.749294519530671e-06, + "loss": 0.0842, "step": 28620 }, { - "epoch": 2.1260953512550125, - "grad_norm": 2.1506128311157227, - "learning_rate": 1.7243427892469928e-05, - "loss": 0.0718, + "epoch": 4.252190702510025, + "grad_norm": 0.9360983371734619, + "learning_rate": 5.747809297489976e-06, + "loss": 0.0676, "step": 28630 }, { - "epoch": 2.1268379622753604, - "grad_norm": 1.4215508699417114, - "learning_rate": 1.723897222634784e-05, - "loss": 0.0607, + "epoch": 4.253675924550721, + "grad_norm": 1.290618896484375, + "learning_rate": 5.7463240754492795e-06, + "loss": 0.0531, "step": 28640 }, { - "epoch": 2.127580573295708, - "grad_norm": 0.4577394723892212, - "learning_rate": 1.7234516560225754e-05, - "loss": 0.0454, + "epoch": 4.255161146591416, + "grad_norm": 0.827001690864563, + "learning_rate": 5.744838853408586e-06, + "loss": 0.0509, "step": 28650 }, { - "epoch": 2.1283231843160553, - "grad_norm": 1.4944130182266235, - "learning_rate": 1.7230060894103666e-05, - "loss": 0.077, + "epoch": 4.2566463686321105, + "grad_norm": 0.7342594861984253, + "learning_rate": 5.743353631367889e-06, + "loss": 0.0664, "step": 28660 }, { - "epoch": 2.1290657953364027, - "grad_norm": 1.4956315755844116, - "learning_rate": 1.7225605227981584e-05, - "loss": 0.0619, + "epoch": 4.2581315906728054, + "grad_norm": 1.440019130706787, + "learning_rate": 5.7418684093271945e-06, + "loss": 0.0868, "step": 28670 }, { - "epoch": 2.12980840635675, - "grad_norm": 1.8317357301712036, - "learning_rate": 1.72211495618595e-05, - "loss": 0.0803, + "epoch": 4.2596168127135, + "grad_norm": 0.7774254679679871, + "learning_rate": 5.7403831872865e-06, + "loss": 0.0623, "step": 28680 }, { - "epoch": 2.130551017377098, - "grad_norm": 0.5078200101852417, - "learning_rate": 1.721669389573741e-05, - "loss": 0.0654, + "epoch": 4.261102034754196, + "grad_norm": 0.99187171459198, + "learning_rate": 5.738897965245804e-06, + "loss": 0.0704, "step": 28690 }, { - "epoch": 2.1312936283974455, - "grad_norm": 1.6214134693145752, - "learning_rate": 1.721223822961533e-05, - "loss": 0.0542, + "epoch": 4.262587256794891, + "grad_norm": 1.1952239274978638, + "learning_rate": 5.7374127432051095e-06, + "loss": 0.0571, "step": 28700 }, { - "epoch": 2.132036239417793, - "grad_norm": 2.1985042095184326, - "learning_rate": 1.7207782563493244e-05, - "loss": 0.0543, + "epoch": 4.264072478835586, + "grad_norm": 0.7059710621833801, + "learning_rate": 5.735927521164415e-06, + "loss": 0.0551, "step": 28710 }, { - "epoch": 2.1327788504381404, - "grad_norm": 1.8467528820037842, - "learning_rate": 1.7203326897371156e-05, - "loss": 0.0596, + "epoch": 4.265557700876281, + "grad_norm": 0.8650168180465698, + "learning_rate": 5.734442299123719e-06, + "loss": 0.061, "step": 28720 }, { - "epoch": 2.133521461458488, - "grad_norm": 1.9839740991592407, - "learning_rate": 1.719887123124907e-05, - "loss": 0.082, + "epoch": 4.267042922916976, + "grad_norm": 0.374861478805542, + "learning_rate": 5.7329570770830245e-06, + "loss": 0.0592, "step": 28730 }, { - "epoch": 2.1342640724788358, - "grad_norm": 1.7829911708831787, - "learning_rate": 1.719441556512699e-05, - "loss": 0.0709, + "epoch": 4.2685281449576715, + "grad_norm": 0.9369165897369385, + "learning_rate": 5.73147185504233e-06, + "loss": 0.0645, "step": 28740 }, { - "epoch": 2.135006683499183, - "grad_norm": 1.043108582496643, - "learning_rate": 1.71899598990049e-05, - "loss": 0.0571, + "epoch": 4.270013366998366, + "grad_norm": 0.5820393562316895, + "learning_rate": 5.729986633001634e-06, + "loss": 0.058, "step": 28750 }, { - "epoch": 2.1357492945195307, - "grad_norm": 1.6130720376968384, - "learning_rate": 1.7185504232882816e-05, - "loss": 0.0577, + "epoch": 4.271498589039061, + "grad_norm": 0.9628231525421143, + "learning_rate": 5.7285014109609395e-06, + "loss": 0.0548, "step": 28760 }, { - "epoch": 2.136491905539878, - "grad_norm": 0.3938934803009033, - "learning_rate": 1.718104856676073e-05, - "loss": 0.0713, + "epoch": 4.272983811079756, + "grad_norm": 0.6571143269538879, + "learning_rate": 5.727016188920244e-06, + "loss": 0.0568, "step": 28770 }, { - "epoch": 2.1372345165602256, - "grad_norm": 2.0002403259277344, - "learning_rate": 1.7176592900638646e-05, - "loss": 0.063, + "epoch": 4.274469033120451, + "grad_norm": 1.2203211784362793, + "learning_rate": 5.725530966879549e-06, + "loss": 0.0647, "step": 28780 }, { - "epoch": 2.1379771275805735, - "grad_norm": 1.1039925813674927, - "learning_rate": 1.717213723451656e-05, - "loss": 0.0555, + "epoch": 4.275954255161147, + "grad_norm": 1.1997301578521729, + "learning_rate": 5.7240457448388545e-06, + "loss": 0.0608, "step": 28790 }, { - "epoch": 2.138719738600921, - "grad_norm": 1.2151685953140259, - "learning_rate": 1.7167681568394476e-05, - "loss": 0.0613, + "epoch": 4.277439477201842, + "grad_norm": 1.1112606525421143, + "learning_rate": 5.722560522798158e-06, + "loss": 0.0674, "step": 28800 }, { - "epoch": 2.1394623496212684, - "grad_norm": 0.6045921444892883, - "learning_rate": 1.716322590227239e-05, - "loss": 0.0827, + "epoch": 4.278924699242537, + "grad_norm": 0.9697360992431641, + "learning_rate": 5.721075300757463e-06, + "loss": 0.0619, "step": 28810 }, { - "epoch": 2.140204960641616, - "grad_norm": 1.541783094406128, - "learning_rate": 1.7158770236150306e-05, - "loss": 0.0771, + "epoch": 4.280409921283232, + "grad_norm": 0.5399026274681091, + "learning_rate": 5.7195900787167695e-06, + "loss": 0.0707, "step": 28820 }, { - "epoch": 2.1409475716619633, - "grad_norm": 3.0538811683654785, - "learning_rate": 1.7154314570028218e-05, - "loss": 0.1097, + "epoch": 4.2818951433239265, + "grad_norm": 0.41528892517089844, + "learning_rate": 5.718104856676073e-06, + "loss": 0.0653, "step": 28830 }, { - "epoch": 2.141690182682311, - "grad_norm": 1.1775662899017334, - "learning_rate": 1.7149858903906136e-05, - "loss": 0.0666, + "epoch": 4.283380365364622, + "grad_norm": 0.672845184803009, + "learning_rate": 5.716619634635378e-06, + "loss": 0.0751, "step": 28840 }, { - "epoch": 2.1424327937026586, - "grad_norm": 1.498507022857666, - "learning_rate": 1.7145403237784048e-05, - "loss": 0.0671, + "epoch": 4.284865587405317, + "grad_norm": 0.8563491106033325, + "learning_rate": 5.715134412594683e-06, + "loss": 0.0424, "step": 28850 }, { - "epoch": 2.143175404723006, - "grad_norm": 0.7959874868392944, - "learning_rate": 1.7140947571661963e-05, - "loss": 0.057, + "epoch": 4.286350809446012, + "grad_norm": 0.6113738417625427, + "learning_rate": 5.713649190553988e-06, + "loss": 0.0584, "step": 28860 }, { - "epoch": 2.1439180157433535, - "grad_norm": 2.1774017810821533, - "learning_rate": 1.713649190553988e-05, - "loss": 0.1016, + "epoch": 4.287836031486707, + "grad_norm": 0.9037973880767822, + "learning_rate": 5.712163968513293e-06, + "loss": 0.0495, "step": 28870 }, { - "epoch": 2.1446606267637014, - "grad_norm": 0.7331980466842651, - "learning_rate": 1.7132036239417793e-05, - "loss": 0.0634, + "epoch": 4.289321253527403, + "grad_norm": 1.3024934530258179, + "learning_rate": 5.710678746472598e-06, + "loss": 0.066, "step": 28880 }, { - "epoch": 2.145403237784049, - "grad_norm": 0.8138454556465149, - "learning_rate": 1.7127580573295708e-05, - "loss": 0.0474, + "epoch": 4.290806475568098, + "grad_norm": 1.2819448709487915, + "learning_rate": 5.709193524431903e-06, + "loss": 0.0579, "step": 28890 }, { - "epoch": 2.1461458488043963, - "grad_norm": 1.016754388809204, - "learning_rate": 1.7123124907173623e-05, - "loss": 0.0718, + "epoch": 4.292291697608793, + "grad_norm": 0.638242781162262, + "learning_rate": 5.707708302391208e-06, + "loss": 0.063, "step": 28900 }, { - "epoch": 2.1468884598247437, - "grad_norm": 1.882819414138794, - "learning_rate": 1.7118669241051538e-05, - "loss": 0.0809, + "epoch": 4.2937769196494875, + "grad_norm": 0.6230556964874268, + "learning_rate": 5.706223080350513e-06, + "loss": 0.0522, "step": 28910 }, { - "epoch": 2.147631070845091, - "grad_norm": 0.7724311351776123, - "learning_rate": 1.7114213574929453e-05, - "loss": 0.0775, + "epoch": 4.295262141690182, + "grad_norm": 1.8110722303390503, + "learning_rate": 5.704737858309818e-06, + "loss": 0.0818, "step": 28920 }, { - "epoch": 2.148373681865439, - "grad_norm": 2.551377534866333, - "learning_rate": 1.7109757908807368e-05, - "loss": 0.0776, + "epoch": 4.296747363730878, + "grad_norm": 0.7264518141746521, + "learning_rate": 5.703252636269123e-06, + "loss": 0.0551, "step": 28930 }, { - "epoch": 2.1491162928857865, - "grad_norm": 1.454253911972046, - "learning_rate": 1.7105302242685283e-05, - "loss": 0.0635, + "epoch": 4.298232585771573, + "grad_norm": 0.7805657386779785, + "learning_rate": 5.701767414228428e-06, + "loss": 0.0676, "step": 28940 }, { - "epoch": 2.149858903906134, - "grad_norm": 0.8662858009338379, - "learning_rate": 1.7100846576563197e-05, - "loss": 0.0689, + "epoch": 4.299717807812268, + "grad_norm": 0.32692262530326843, + "learning_rate": 5.700282192187733e-06, + "loss": 0.0511, "step": 28950 }, { - "epoch": 2.1506015149264814, - "grad_norm": 1.5164152383804321, - "learning_rate": 1.709639091044111e-05, - "loss": 0.0956, + "epoch": 4.301203029852963, + "grad_norm": 0.7880625128746033, + "learning_rate": 5.698796970147037e-06, + "loss": 0.0785, "step": 28960 }, { - "epoch": 2.151344125946829, - "grad_norm": 1.9796892404556274, - "learning_rate": 1.7091935244319024e-05, - "loss": 0.08, + "epoch": 4.302688251893658, + "grad_norm": 0.8821731805801392, + "learning_rate": 5.697311748106342e-06, + "loss": 0.0541, "step": 28970 }, { - "epoch": 2.152086736967177, - "grad_norm": 2.7751400470733643, - "learning_rate": 1.7087479578196942e-05, - "loss": 0.0689, + "epoch": 4.304173473934354, + "grad_norm": 1.0957586765289307, + "learning_rate": 5.695826526065647e-06, + "loss": 0.0664, "step": 28980 }, { - "epoch": 2.1528293479875242, - "grad_norm": 0.9930230379104614, - "learning_rate": 1.7083023912074854e-05, - "loss": 0.0691, + "epoch": 4.3056586959750485, + "grad_norm": 0.8482171297073364, + "learning_rate": 5.694341304024952e-06, + "loss": 0.0603, "step": 28990 }, { - "epoch": 2.1535719590078717, - "grad_norm": 0.8586207032203674, - "learning_rate": 1.707856824595277e-05, - "loss": 0.0729, + "epoch": 4.307143918015743, + "grad_norm": 1.5131299495697021, + "learning_rate": 5.692856081984257e-06, + "loss": 0.0661, "step": 29000 }, { - "epoch": 2.154314570028219, - "grad_norm": 1.4148691892623901, - "learning_rate": 1.7074112579830687e-05, - "loss": 0.0488, + "epoch": 4.308629140056438, + "grad_norm": 0.6365652680397034, + "learning_rate": 5.691370859943562e-06, + "loss": 0.0769, "step": 29010 }, { - "epoch": 2.1550571810485666, - "grad_norm": 1.3059778213500977, - "learning_rate": 1.70696569137086e-05, - "loss": 0.0812, + "epoch": 4.310114362097133, + "grad_norm": 0.40474069118499756, + "learning_rate": 5.689885637902867e-06, + "loss": 0.0676, "step": 29020 }, { - "epoch": 2.1557997920689145, - "grad_norm": 0.3190613389015198, - "learning_rate": 1.7065201247586514e-05, - "loss": 0.0524, + "epoch": 4.311599584137829, + "grad_norm": 0.3460848927497864, + "learning_rate": 5.688400415862172e-06, + "loss": 0.0846, "step": 29030 }, { - "epoch": 2.156542403089262, - "grad_norm": 2.4894535541534424, - "learning_rate": 1.7060745581464432e-05, - "loss": 0.1131, + "epoch": 4.313084806178524, + "grad_norm": 0.7716390490531921, + "learning_rate": 5.686915193821477e-06, + "loss": 0.0727, "step": 29040 }, { - "epoch": 2.1572850141096094, - "grad_norm": 2.3826353549957275, - "learning_rate": 1.7056289915342344e-05, - "loss": 0.0722, + "epoch": 4.314570028219219, + "grad_norm": 0.7166088223457336, + "learning_rate": 5.685429971780782e-06, + "loss": 0.0497, "step": 29050 }, { - "epoch": 2.158027625129957, - "grad_norm": 0.887143611907959, - "learning_rate": 1.705183424922026e-05, - "loss": 0.0498, + "epoch": 4.316055250259914, + "grad_norm": 0.3296228349208832, + "learning_rate": 5.683944749740087e-06, + "loss": 0.073, "step": 29060 }, { - "epoch": 2.1587702361503043, - "grad_norm": 2.6809613704681396, - "learning_rate": 1.704737858309817e-05, - "loss": 0.1214, + "epoch": 4.317540472300609, + "grad_norm": 0.9404149651527405, + "learning_rate": 5.682459527699391e-06, + "loss": 0.0506, "step": 29070 }, { - "epoch": 2.159512847170652, - "grad_norm": 1.2329598665237427, - "learning_rate": 1.704292291697609e-05, - "loss": 0.0972, + "epoch": 4.319025694341304, + "grad_norm": 1.397255301475525, + "learning_rate": 5.680974305658697e-06, + "loss": 0.0499, "step": 29080 }, { - "epoch": 2.1602554581909996, - "grad_norm": 2.446826457977295, - "learning_rate": 1.7038467250854004e-05, - "loss": 0.0659, + "epoch": 4.320510916381999, + "grad_norm": 1.0144386291503906, + "learning_rate": 5.679489083618002e-06, + "loss": 0.0639, "step": 29090 }, { - "epoch": 2.160998069211347, - "grad_norm": 1.961200475692749, - "learning_rate": 1.7034011584731916e-05, - "loss": 0.0396, + "epoch": 4.321996138422694, + "grad_norm": 0.328948974609375, + "learning_rate": 5.6780038615773055e-06, + "loss": 0.0429, "step": 29100 }, { - "epoch": 2.1617406802316945, - "grad_norm": 0.2081407755613327, - "learning_rate": 1.7029555918609834e-05, - "loss": 0.0452, + "epoch": 4.323481360463389, + "grad_norm": 0.8655416965484619, + "learning_rate": 5.676518639536611e-06, + "loss": 0.0617, "step": 29110 }, { - "epoch": 2.162483291252042, - "grad_norm": 1.8672429323196411, - "learning_rate": 1.702510025248775e-05, - "loss": 0.0674, + "epoch": 4.324966582504084, + "grad_norm": 1.1616047620773315, + "learning_rate": 5.675033417495917e-06, + "loss": 0.0643, "step": 29120 }, { - "epoch": 2.16322590227239, - "grad_norm": 2.0744621753692627, - "learning_rate": 1.702064458636566e-05, - "loss": 0.0899, + "epoch": 4.32645180454478, + "grad_norm": 1.1419157981872559, + "learning_rate": 5.6735481954552205e-06, + "loss": 0.0594, "step": 29130 }, { - "epoch": 2.1639685132927373, - "grad_norm": 0.9854013323783875, - "learning_rate": 1.7016188920243576e-05, - "loss": 0.0731, + "epoch": 4.327937026585475, + "grad_norm": 0.5438464879989624, + "learning_rate": 5.672062973414526e-06, + "loss": 0.069, "step": 29140 }, { - "epoch": 2.1647111243130848, - "grad_norm": 1.5703667402267456, - "learning_rate": 1.7011733254121494e-05, - "loss": 0.0887, + "epoch": 4.3294222486261695, + "grad_norm": 0.4473876357078552, + "learning_rate": 5.670577751373831e-06, + "loss": 0.0613, "step": 29150 }, { - "epoch": 2.165453735333432, - "grad_norm": 1.483916997909546, - "learning_rate": 1.7007277587999406e-05, - "loss": 0.0863, + "epoch": 4.330907470666864, + "grad_norm": 0.7418670654296875, + "learning_rate": 5.6690925293331355e-06, + "loss": 0.0521, "step": 29160 }, { - "epoch": 2.1661963463537797, - "grad_norm": 1.5202964544296265, - "learning_rate": 1.700282192187732e-05, - "loss": 0.0785, + "epoch": 4.332392692707559, + "grad_norm": 1.3394016027450562, + "learning_rate": 5.667607307292441e-06, + "loss": 0.0818, "step": 29170 }, { - "epoch": 2.1669389573741276, - "grad_norm": 0.9036029577255249, - "learning_rate": 1.6998366255755236e-05, - "loss": 0.0603, + "epoch": 4.333877914748255, + "grad_norm": 1.4490028619766235, + "learning_rate": 5.666122085251745e-06, + "loss": 0.0641, "step": 29180 }, { - "epoch": 2.167681568394475, - "grad_norm": 0.647527277469635, - "learning_rate": 1.699391058963315e-05, - "loss": 0.0313, + "epoch": 4.33536313678895, + "grad_norm": 0.673684298992157, + "learning_rate": 5.6646368632110505e-06, + "loss": 0.0593, "step": 29190 }, { - "epoch": 2.1684241794148225, - "grad_norm": 1.4471430778503418, - "learning_rate": 1.6989454923511066e-05, - "loss": 0.1026, + "epoch": 4.336848358829645, + "grad_norm": 0.585928201675415, + "learning_rate": 5.663151641170356e-06, + "loss": 0.0466, "step": 29200 }, { - "epoch": 2.16916679043517, - "grad_norm": 1.5521758794784546, - "learning_rate": 1.698499925738898e-05, - "loss": 0.0581, + "epoch": 4.33833358087034, + "grad_norm": 0.650763213634491, + "learning_rate": 5.66166641912966e-06, + "loss": 0.0552, "step": 29210 }, { - "epoch": 2.169909401455518, - "grad_norm": 0.8579624891281128, - "learning_rate": 1.6980543591266896e-05, - "loss": 0.0591, + "epoch": 4.339818802911036, + "grad_norm": 0.9486691951751709, + "learning_rate": 5.6601811970889655e-06, + "loss": 0.0636, "step": 29220 }, { - "epoch": 2.1706520124758653, - "grad_norm": 0.8837199211120605, - "learning_rate": 1.697608792514481e-05, - "loss": 0.0776, + "epoch": 4.3413040249517305, + "grad_norm": 0.45562392473220825, + "learning_rate": 5.658695975048271e-06, + "loss": 0.0343, "step": 29230 }, { - "epoch": 2.1713946234962127, - "grad_norm": 0.8631309866905212, - "learning_rate": 1.6971632259022722e-05, - "loss": 0.0709, + "epoch": 4.342789246992425, + "grad_norm": 0.8459610342979431, + "learning_rate": 5.657210753007575e-06, + "loss": 0.0662, "step": 29240 }, { - "epoch": 2.17213723451656, - "grad_norm": 1.2741137742996216, - "learning_rate": 1.696717659290064e-05, - "loss": 0.0607, + "epoch": 4.34427446903312, + "grad_norm": 0.9982208609580994, + "learning_rate": 5.6557255309668805e-06, + "loss": 0.0562, "step": 29250 }, { - "epoch": 2.1728798455369076, - "grad_norm": 2.396149158477783, - "learning_rate": 1.6962720926778556e-05, - "loss": 0.0546, + "epoch": 4.345759691073815, + "grad_norm": 0.28092893958091736, + "learning_rate": 5.654240308926186e-06, + "loss": 0.0588, "step": 29260 }, { - "epoch": 2.1736224565572555, - "grad_norm": 1.4403908252716064, - "learning_rate": 1.6958265260656467e-05, - "loss": 0.0452, + "epoch": 4.347244913114511, + "grad_norm": 0.7825678586959839, + "learning_rate": 5.652755086885489e-06, + "loss": 0.0692, "step": 29270 }, { - "epoch": 2.174365067577603, - "grad_norm": 1.8442835807800293, - "learning_rate": 1.6953809594534386e-05, - "loss": 0.0869, + "epoch": 4.348730135155206, + "grad_norm": 0.8129569292068481, + "learning_rate": 5.651269864844795e-06, + "loss": 0.0737, "step": 29280 }, { - "epoch": 2.1751076785979504, - "grad_norm": 1.5323288440704346, - "learning_rate": 1.6949353928412297e-05, - "loss": 0.0881, + "epoch": 4.350215357195901, + "grad_norm": 0.7594356536865234, + "learning_rate": 5.649784642804099e-06, + "loss": 0.0693, "step": 29290 }, { - "epoch": 2.175850289618298, - "grad_norm": 0.6434569358825684, - "learning_rate": 1.6944898262290212e-05, - "loss": 0.0635, + "epoch": 4.351700579236596, + "grad_norm": 0.6552794575691223, + "learning_rate": 5.648299420763404e-06, + "loss": 0.0517, "step": 29300 }, { - "epoch": 2.1765929006386453, - "grad_norm": 0.8383660912513733, - "learning_rate": 1.6940442596168127e-05, - "loss": 0.0545, + "epoch": 4.353185801277291, + "grad_norm": 0.8315490484237671, + "learning_rate": 5.64681419872271e-06, + "loss": 0.05, "step": 29310 }, { - "epoch": 2.177335511658993, - "grad_norm": 1.861343264579773, - "learning_rate": 1.6935986930046042e-05, - "loss": 0.0557, + "epoch": 4.354671023317986, + "grad_norm": 0.9624179005622864, + "learning_rate": 5.645328976682014e-06, + "loss": 0.0882, "step": 29320 }, { - "epoch": 2.1780781226793406, - "grad_norm": 1.4994820356369019, - "learning_rate": 1.6931531263923957e-05, - "loss": 0.0584, + "epoch": 4.356156245358681, + "grad_norm": 0.36779454350471497, + "learning_rate": 5.643843754641319e-06, + "loss": 0.0555, "step": 29330 }, { - "epoch": 2.178820733699688, - "grad_norm": 0.872047483921051, - "learning_rate": 1.6927075597801872e-05, - "loss": 0.1263, + "epoch": 4.357641467399376, + "grad_norm": 0.9770904183387756, + "learning_rate": 5.642358532600625e-06, + "loss": 0.0491, "step": 29340 }, { - "epoch": 2.1795633447200355, - "grad_norm": 1.3193352222442627, - "learning_rate": 1.6922619931679787e-05, - "loss": 0.0715, + "epoch": 4.359126689440071, + "grad_norm": 0.6968162655830383, + "learning_rate": 5.640873310559929e-06, + "loss": 0.0699, "step": 29350 }, { - "epoch": 2.180305955740383, - "grad_norm": 1.5889509916305542, - "learning_rate": 1.6918164265557702e-05, - "loss": 0.0859, + "epoch": 4.360611911480766, + "grad_norm": 1.1757110357284546, + "learning_rate": 5.639388088519234e-06, + "loss": 0.0565, "step": 29360 }, { - "epoch": 2.181048566760731, - "grad_norm": 1.012092113494873, - "learning_rate": 1.6913708599435614e-05, - "loss": 0.0596, + "epoch": 4.362097133521462, + "grad_norm": 0.8306859135627747, + "learning_rate": 5.637902866478539e-06, + "loss": 0.0629, "step": 29370 }, { - "epoch": 2.1817911777810783, - "grad_norm": 1.8053189516067505, - "learning_rate": 1.6909252933313532e-05, - "loss": 0.0625, + "epoch": 4.363582355562157, + "grad_norm": 0.7834184169769287, + "learning_rate": 5.636417644437844e-06, + "loss": 0.0596, "step": 29380 }, { - "epoch": 2.182533788801426, - "grad_norm": 0.7296652793884277, - "learning_rate": 1.6904797267191447e-05, - "loss": 0.0597, + "epoch": 4.365067577602852, + "grad_norm": 0.5488366484642029, + "learning_rate": 5.634932422397149e-06, + "loss": 0.0556, "step": 29390 }, { - "epoch": 2.1832763998217732, - "grad_norm": 1.110438346862793, - "learning_rate": 1.690034160106936e-05, - "loss": 0.0538, + "epoch": 4.3665527996435465, + "grad_norm": 1.1125340461730957, + "learning_rate": 5.633447200356453e-06, + "loss": 0.062, "step": 29400 }, { - "epoch": 2.1840190108421207, - "grad_norm": 2.128885507583618, - "learning_rate": 1.6895885934947274e-05, - "loss": 0.0559, + "epoch": 4.368038021684241, + "grad_norm": 0.7609226107597351, + "learning_rate": 5.631961978315759e-06, + "loss": 0.0645, "step": 29410 }, { - "epoch": 2.1847616218624686, - "grad_norm": 1.590346097946167, - "learning_rate": 1.6891430268825192e-05, - "loss": 0.0496, + "epoch": 4.369523243724937, + "grad_norm": 1.2974096536636353, + "learning_rate": 5.630476756275064e-06, + "loss": 0.0544, "step": 29420 }, { - "epoch": 2.185504232882816, - "grad_norm": 1.5324519872665405, - "learning_rate": 1.6886974602703104e-05, - "loss": 0.0778, + "epoch": 4.371008465765632, + "grad_norm": 0.3472174108028412, + "learning_rate": 5.628991534234368e-06, + "loss": 0.0605, "step": 29430 }, { - "epoch": 2.1862468439031635, - "grad_norm": 0.8141632080078125, - "learning_rate": 1.688251893658102e-05, - "loss": 0.079, + "epoch": 4.372493687806327, + "grad_norm": 0.852185845375061, + "learning_rate": 5.627506312193673e-06, + "loss": 0.0698, "step": 29440 }, { - "epoch": 2.186989454923511, - "grad_norm": 2.3867433071136475, - "learning_rate": 1.6878063270458937e-05, - "loss": 0.0727, + "epoch": 4.373978909847022, + "grad_norm": 0.5808961391448975, + "learning_rate": 5.6260210901529785e-06, + "loss": 0.0477, "step": 29450 }, { - "epoch": 2.187732065943859, - "grad_norm": 1.383835792541504, - "learning_rate": 1.687360760433685e-05, - "loss": 0.0553, + "epoch": 4.375464131887718, + "grad_norm": 0.761407732963562, + "learning_rate": 5.624535868112283e-06, + "loss": 0.0502, "step": 29460 }, { - "epoch": 2.1884746769642063, - "grad_norm": 1.1126325130462646, - "learning_rate": 1.6869151938214764e-05, - "loss": 0.0698, + "epoch": 4.376949353928413, + "grad_norm": 0.5551052689552307, + "learning_rate": 5.623050646071588e-06, + "loss": 0.0631, "step": 29470 }, { - "epoch": 2.1892172879845537, - "grad_norm": 1.8178859949111938, - "learning_rate": 1.6864696272092675e-05, - "loss": 0.087, + "epoch": 4.3784345759691075, + "grad_norm": 0.8643785715103149, + "learning_rate": 5.621565424030893e-06, + "loss": 0.0634, "step": 29480 }, { - "epoch": 2.189959899004901, - "grad_norm": 1.6789990663528442, - "learning_rate": 1.6860240605970594e-05, - "loss": 0.0652, + "epoch": 4.379919798009802, + "grad_norm": 0.40301206707954407, + "learning_rate": 5.620080201990198e-06, + "loss": 0.0561, "step": 29490 }, { - "epoch": 2.1907025100252486, - "grad_norm": 1.2279289960861206, - "learning_rate": 1.685578493984851e-05, - "loss": 0.0915, + "epoch": 4.381405020050497, + "grad_norm": 0.8390589952468872, + "learning_rate": 5.618594979949503e-06, + "loss": 0.0616, "step": 29500 }, { - "epoch": 2.1914451210455965, - "grad_norm": 1.7912808656692505, - "learning_rate": 1.685132927372642e-05, - "loss": 0.0849, + "epoch": 4.382890242091193, + "grad_norm": 0.6667717099189758, + "learning_rate": 5.617109757908808e-06, + "loss": 0.0703, "step": 29510 }, { - "epoch": 2.192187732065944, - "grad_norm": 0.8681305050849915, - "learning_rate": 1.684687360760434e-05, - "loss": 0.0771, + "epoch": 4.384375464131888, + "grad_norm": 0.7195507884025574, + "learning_rate": 5.615624535868113e-06, + "loss": 0.0466, "step": 29520 }, { - "epoch": 2.1929303430862914, - "grad_norm": 3.010956287384033, - "learning_rate": 1.6842417941482254e-05, - "loss": 0.0818, + "epoch": 4.385860686172583, + "grad_norm": 0.4358431100845337, + "learning_rate": 5.614139313827418e-06, + "loss": 0.0545, "step": 29530 }, { - "epoch": 2.193672954106639, - "grad_norm": 1.075864553451538, - "learning_rate": 1.6837962275360165e-05, - "loss": 0.0665, + "epoch": 4.387345908213278, + "grad_norm": 1.1485531330108643, + "learning_rate": 5.612654091786723e-06, + "loss": 0.0654, "step": 29540 }, { - "epoch": 2.1944155651269863, - "grad_norm": 1.1837166547775269, - "learning_rate": 1.683350660923808e-05, - "loss": 0.0608, + "epoch": 4.388831130253973, + "grad_norm": 0.8820552825927734, + "learning_rate": 5.611168869746028e-06, + "loss": 0.0654, "step": 29550 }, { - "epoch": 2.195158176147334, - "grad_norm": 0.6628900766372681, - "learning_rate": 1.6829050943116e-05, - "loss": 0.0717, + "epoch": 4.390316352294668, + "grad_norm": 0.4499886631965637, + "learning_rate": 5.609683647705333e-06, + "loss": 0.0541, "step": 29560 }, { - "epoch": 2.1959007871676817, - "grad_norm": 0.9537403583526611, - "learning_rate": 1.682459527699391e-05, - "loss": 0.0573, + "epoch": 4.391801574335363, + "grad_norm": 0.4159229099750519, + "learning_rate": 5.608198425664637e-06, + "loss": 0.0653, "step": 29570 }, { - "epoch": 2.196643398188029, - "grad_norm": 2.0913939476013184, - "learning_rate": 1.6820139610871825e-05, - "loss": 0.1074, + "epoch": 4.393286796376058, + "grad_norm": 0.28467857837677, + "learning_rate": 5.606713203623943e-06, + "loss": 0.0597, "step": 29580 }, { - "epoch": 2.1973860092083766, - "grad_norm": 0.6338614821434021, - "learning_rate": 1.681568394474974e-05, - "loss": 0.0655, + "epoch": 4.394772018416753, + "grad_norm": 0.20366248488426208, + "learning_rate": 5.6052279815832464e-06, + "loss": 0.0582, "step": 29590 }, { - "epoch": 2.198128620228724, - "grad_norm": 2.1373088359832764, - "learning_rate": 1.6811228278627655e-05, - "loss": 0.05, + "epoch": 4.396257240457448, + "grad_norm": 0.976061224937439, + "learning_rate": 5.603742759542552e-06, + "loss": 0.0503, "step": 29600 }, { - "epoch": 2.198871231249072, - "grad_norm": 1.9427019357681274, - "learning_rate": 1.680677261250557e-05, - "loss": 0.0555, + "epoch": 4.397742462498144, + "grad_norm": 0.4987535774707794, + "learning_rate": 5.602257537501857e-06, + "loss": 0.0509, "step": 29610 }, { - "epoch": 2.1996138422694194, - "grad_norm": 1.4638181924819946, - "learning_rate": 1.6802316946383485e-05, - "loss": 0.0617, + "epoch": 4.399227684538839, + "grad_norm": 0.7110401391983032, + "learning_rate": 5.6007723154611614e-06, + "loss": 0.0622, "step": 29620 }, { - "epoch": 2.200356453289767, - "grad_norm": 1.2105026245117188, - "learning_rate": 1.67978612802614e-05, - "loss": 0.0647, + "epoch": 4.400712906579534, + "grad_norm": 0.6875151991844177, + "learning_rate": 5.599287093420467e-06, + "loss": 0.064, "step": 29630 }, { - "epoch": 2.2010990643101143, - "grad_norm": 1.4060852527618408, - "learning_rate": 1.6793405614139315e-05, - "loss": 0.0727, + "epoch": 4.4021981286202285, + "grad_norm": 1.0403201580047607, + "learning_rate": 5.597801871379772e-06, + "loss": 0.065, "step": 29640 }, { - "epoch": 2.2018416753304617, - "grad_norm": 2.5179665088653564, - "learning_rate": 1.6788949948017227e-05, - "loss": 0.0736, + "epoch": 4.403683350660923, + "grad_norm": 0.5002561807632446, + "learning_rate": 5.5963166493390764e-06, + "loss": 0.045, "step": 29650 }, { - "epoch": 2.2025842863508096, - "grad_norm": 2.2634148597717285, - "learning_rate": 1.6784494281895145e-05, - "loss": 0.0724, + "epoch": 4.405168572701619, + "grad_norm": 0.8764870762825012, + "learning_rate": 5.594831427298382e-06, + "loss": 0.0539, "step": 29660 }, { - "epoch": 2.203326897371157, - "grad_norm": 1.1711833477020264, - "learning_rate": 1.678003861577306e-05, - "loss": 0.0677, + "epoch": 4.406653794742314, + "grad_norm": 0.687566339969635, + "learning_rate": 5.593346205257687e-06, + "loss": 0.0605, "step": 29670 }, { - "epoch": 2.2040695083915045, - "grad_norm": 1.9533751010894775, - "learning_rate": 1.6775582949650972e-05, - "loss": 0.0682, + "epoch": 4.408139016783009, + "grad_norm": 0.5420661568641663, + "learning_rate": 5.5918609832169914e-06, + "loss": 0.0709, "step": 29680 }, { - "epoch": 2.204812119411852, - "grad_norm": 1.6772757768630981, - "learning_rate": 1.677112728352889e-05, - "loss": 0.0952, + "epoch": 4.409624238823704, + "grad_norm": 0.5420895218849182, + "learning_rate": 5.590375761176297e-06, + "loss": 0.0897, "step": 29690 }, { - "epoch": 2.2055547304321994, - "grad_norm": 1.3155872821807861, - "learning_rate": 1.6766671617406802e-05, - "loss": 0.0654, + "epoch": 4.411109460864399, + "grad_norm": 1.2310397624969482, + "learning_rate": 5.588890539135601e-06, + "loss": 0.0684, "step": 29700 }, { - "epoch": 2.2062973414525473, - "grad_norm": 0.7310417294502258, - "learning_rate": 1.6762215951284717e-05, - "loss": 0.0749, + "epoch": 4.412594682905095, + "grad_norm": 0.5618414282798767, + "learning_rate": 5.5874053170949064e-06, + "loss": 0.0486, "step": 29710 }, { - "epoch": 2.2070399524728948, - "grad_norm": 1.559289813041687, - "learning_rate": 1.6757760285162632e-05, - "loss": 0.0524, + "epoch": 4.4140799049457895, + "grad_norm": 0.9262076020240784, + "learning_rate": 5.585920095054212e-06, + "loss": 0.0551, "step": 29720 }, { - "epoch": 2.207782563493242, - "grad_norm": 1.340665578842163, - "learning_rate": 1.6753304619040547e-05, - "loss": 0.0692, + "epoch": 4.415565126986484, + "grad_norm": 1.0242255926132202, + "learning_rate": 5.584434873013515e-06, + "loss": 0.0567, "step": 29730 }, { - "epoch": 2.2085251745135897, - "grad_norm": 0.5830607414245605, - "learning_rate": 1.6748848952918462e-05, - "loss": 0.1112, + "epoch": 4.417050349027179, + "grad_norm": 1.1418559551239014, + "learning_rate": 5.5829496509728206e-06, + "loss": 0.0615, "step": 29740 }, { - "epoch": 2.209267785533937, - "grad_norm": 1.7768720388412476, - "learning_rate": 1.6744393286796377e-05, - "loss": 0.0557, + "epoch": 4.418535571067874, + "grad_norm": 0.736282229423523, + "learning_rate": 5.581464428932126e-06, + "loss": 0.0567, "step": 29750 }, { - "epoch": 2.210010396554285, - "grad_norm": 0.5409281849861145, - "learning_rate": 1.6739937620674292e-05, - "loss": 0.0565, + "epoch": 4.42002079310857, + "grad_norm": 0.9747437834739685, + "learning_rate": 5.57997920689143e-06, + "loss": 0.0755, "step": 29760 }, { - "epoch": 2.2107530075746324, - "grad_norm": 1.0394829511642456, - "learning_rate": 1.6735481954552207e-05, - "loss": 0.0581, + "epoch": 4.421506015149265, + "grad_norm": 1.3061301708221436, + "learning_rate": 5.5784939848507356e-06, + "loss": 0.0981, "step": 29770 }, { - "epoch": 2.21149561859498, - "grad_norm": 1.975055456161499, - "learning_rate": 1.673102628843012e-05, - "loss": 0.101, + "epoch": 4.42299123718996, + "grad_norm": 0.5164014101028442, + "learning_rate": 5.577008762810041e-06, + "loss": 0.0672, "step": 29780 }, { - "epoch": 2.2122382296153273, - "grad_norm": 1.235195279121399, - "learning_rate": 1.6726570622308037e-05, - "loss": 0.0529, + "epoch": 4.424476459230655, + "grad_norm": 0.5094213485717773, + "learning_rate": 5.575523540769345e-06, + "loss": 0.0685, "step": 29790 }, { - "epoch": 2.2129808406356752, - "grad_norm": 3.506690263748169, - "learning_rate": 1.672211495618595e-05, - "loss": 0.0922, + "epoch": 4.4259616812713505, + "grad_norm": 0.5946676135063171, + "learning_rate": 5.5740383187286506e-06, + "loss": 0.0514, "step": 29800 }, { - "epoch": 2.2137234516560227, - "grad_norm": 2.5403103828430176, - "learning_rate": 1.6717659290063863e-05, - "loss": 0.0768, + "epoch": 4.427446903312045, + "grad_norm": 0.37052375078201294, + "learning_rate": 5.572553096687955e-06, + "loss": 0.0823, "step": 29810 }, { - "epoch": 2.21446606267637, - "grad_norm": 1.3894189596176147, - "learning_rate": 1.6713203623941778e-05, - "loss": 0.0642, + "epoch": 4.42893212535274, + "grad_norm": 0.9672272205352783, + "learning_rate": 5.57106787464726e-06, + "loss": 0.0727, "step": 29820 }, { - "epoch": 2.2152086736967176, - "grad_norm": 1.7695733308792114, - "learning_rate": 1.6708747957819697e-05, - "loss": 0.0699, + "epoch": 4.430417347393435, + "grad_norm": 0.974509596824646, + "learning_rate": 5.5695826526065656e-06, + "loss": 0.0669, "step": 29830 }, { - "epoch": 2.215951284717065, - "grad_norm": 1.9002711772918701, - "learning_rate": 1.6704292291697608e-05, - "loss": 0.0587, + "epoch": 4.43190256943413, + "grad_norm": 0.9510679244995117, + "learning_rate": 5.56809743056587e-06, + "loss": 0.0589, "step": 29840 }, { - "epoch": 2.216693895737413, - "grad_norm": 2.1885085105895996, - "learning_rate": 1.6699836625575523e-05, - "loss": 0.0808, + "epoch": 4.433387791474826, + "grad_norm": 0.5893191695213318, + "learning_rate": 5.566612208525175e-06, + "loss": 0.0533, "step": 29850 }, { - "epoch": 2.2174365067577604, - "grad_norm": 0.7245926856994629, - "learning_rate": 1.669538095945344e-05, - "loss": 0.0767, + "epoch": 4.434873013515521, + "grad_norm": 0.9645144939422607, + "learning_rate": 5.5651269864844805e-06, + "loss": 0.0706, "step": 29860 }, { - "epoch": 2.218179117778108, - "grad_norm": 1.1416701078414917, - "learning_rate": 1.6690925293331353e-05, - "loss": 0.0552, + "epoch": 4.436358235556216, + "grad_norm": 1.2255542278289795, + "learning_rate": 5.563641764443785e-06, + "loss": 0.0733, "step": 29870 }, { - "epoch": 2.2189217287984553, - "grad_norm": 0.4477648138999939, - "learning_rate": 1.6686469627209268e-05, - "loss": 0.092, + "epoch": 4.437843457596911, + "grad_norm": 0.8338664770126343, + "learning_rate": 5.56215654240309e-06, + "loss": 0.0475, "step": 29880 }, { - "epoch": 2.2196643398188027, - "grad_norm": 1.4506340026855469, - "learning_rate": 1.668201396108718e-05, - "loss": 0.0803, + "epoch": 4.4393286796376055, + "grad_norm": 1.517331600189209, + "learning_rate": 5.560671320362394e-06, + "loss": 0.0762, "step": 29890 }, { - "epoch": 2.2204069508391506, - "grad_norm": 1.6978100538253784, - "learning_rate": 1.6677558294965098e-05, - "loss": 0.0516, + "epoch": 4.440813901678301, + "grad_norm": 0.5551873445510864, + "learning_rate": 5.559186098321699e-06, + "loss": 0.0626, "step": 29900 }, { - "epoch": 2.221149561859498, - "grad_norm": 0.5973829030990601, - "learning_rate": 1.6673102628843013e-05, - "loss": 0.0748, + "epoch": 4.442299123718996, + "grad_norm": 1.3025144338607788, + "learning_rate": 5.557700876281004e-06, + "loss": 0.0495, "step": 29910 }, { - "epoch": 2.2218921728798455, - "grad_norm": 1.091837763786316, - "learning_rate": 1.6668646962720925e-05, - "loss": 0.0831, + "epoch": 4.443784345759691, + "grad_norm": 0.9925779700279236, + "learning_rate": 5.556215654240309e-06, + "loss": 0.0545, "step": 29920 }, { - "epoch": 2.222634783900193, - "grad_norm": 0.9532679319381714, - "learning_rate": 1.6664191296598843e-05, - "loss": 0.0632, + "epoch": 4.445269567800386, + "grad_norm": 1.2901450395584106, + "learning_rate": 5.554730432199614e-06, + "loss": 0.0499, "step": 29930 }, { - "epoch": 2.2233773949205404, - "grad_norm": 1.0691170692443848, - "learning_rate": 1.6659735630476758e-05, - "loss": 0.0863, + "epoch": 4.446754789841081, + "grad_norm": 1.417781114578247, + "learning_rate": 5.553245210158919e-06, + "loss": 0.0598, "step": 29940 }, { - "epoch": 2.2241200059408883, - "grad_norm": 0.724433958530426, - "learning_rate": 1.665527996435467e-05, - "loss": 0.0485, + "epoch": 4.448240011881777, + "grad_norm": 0.6772493720054626, + "learning_rate": 5.551759988118224e-06, + "loss": 0.0656, "step": 29950 }, { - "epoch": 2.2248626169612358, - "grad_norm": 2.8994836807250977, - "learning_rate": 1.6650824298232585e-05, - "loss": 0.0611, + "epoch": 4.4497252339224715, + "grad_norm": 1.4603149890899658, + "learning_rate": 5.550274766077529e-06, + "loss": 0.0698, "step": 29960 }, { - "epoch": 2.2256052279815832, - "grad_norm": 0.5383403301239014, - "learning_rate": 1.6646368632110503e-05, - "loss": 0.0382, + "epoch": 4.4512104559631664, + "grad_norm": 0.8940395712852478, + "learning_rate": 5.548789544036834e-06, + "loss": 0.081, "step": 29970 }, { - "epoch": 2.2263478390019307, - "grad_norm": 4.375266075134277, - "learning_rate": 1.6641912965988415e-05, - "loss": 0.0434, + "epoch": 4.452695678003861, + "grad_norm": 0.7605615258216858, + "learning_rate": 5.547304321996139e-06, + "loss": 0.0584, "step": 29980 }, { - "epoch": 2.227090450022278, - "grad_norm": 4.827844619750977, - "learning_rate": 1.663745729986633e-05, - "loss": 0.0683, + "epoch": 4.454180900044556, + "grad_norm": 0.46938732266426086, + "learning_rate": 5.545819099955444e-06, + "loss": 0.0674, "step": 29990 }, { - "epoch": 2.227833061042626, - "grad_norm": 1.5270919799804688, - "learning_rate": 1.6633001633744245e-05, - "loss": 0.0859, + "epoch": 4.455666122085252, + "grad_norm": 0.3251242935657501, + "learning_rate": 5.5443338779147485e-06, + "loss": 0.0709, "step": 30000 }, { - "epoch": 2.2285756720629735, - "grad_norm": 2.311201572418213, - "learning_rate": 1.662854596762216e-05, - "loss": 0.1089, + "epoch": 4.457151344125947, + "grad_norm": 0.9395661354064941, + "learning_rate": 5.542848655874054e-06, + "loss": 0.06, "step": 30010 }, { - "epoch": 2.229318283083321, - "grad_norm": 1.0003308057785034, - "learning_rate": 1.6624090301500075e-05, - "loss": 0.0851, + "epoch": 4.458636566166642, + "grad_norm": 0.8586018681526184, + "learning_rate": 5.541363433833359e-06, + "loss": 0.054, "step": 30020 }, { - "epoch": 2.2300608941036684, - "grad_norm": 1.064595103263855, - "learning_rate": 1.661963463537799e-05, - "loss": 0.0711, + "epoch": 4.460121788207337, + "grad_norm": 0.9087779521942139, + "learning_rate": 5.539878211792663e-06, + "loss": 0.0668, "step": 30030 }, { - "epoch": 2.2308035051240163, - "grad_norm": 2.0935966968536377, - "learning_rate": 1.6615178969255905e-05, - "loss": 0.073, + "epoch": 4.4616070102480325, + "grad_norm": 0.4863450527191162, + "learning_rate": 5.538392989751968e-06, + "loss": 0.0802, "step": 30040 }, { - "epoch": 2.2315461161443637, - "grad_norm": 1.3619099855422974, - "learning_rate": 1.661072330313382e-05, - "loss": 0.0707, + "epoch": 4.463092232288727, + "grad_norm": 0.5601634383201599, + "learning_rate": 5.536907767711274e-06, + "loss": 0.0634, "step": 30050 }, { - "epoch": 2.232288727164711, - "grad_norm": 3.1474947929382324, - "learning_rate": 1.660626763701173e-05, - "loss": 0.0401, + "epoch": 4.464577454329422, + "grad_norm": 1.1524817943572998, + "learning_rate": 5.535422545670578e-06, + "loss": 0.071, "step": 30060 }, { - "epoch": 2.2330313381850586, - "grad_norm": 2.438246250152588, - "learning_rate": 1.660181197088965e-05, - "loss": 0.0783, + "epoch": 4.466062676370117, + "grad_norm": 1.1712273359298706, + "learning_rate": 5.533937323629883e-06, + "loss": 0.0839, "step": 30070 }, { - "epoch": 2.233773949205406, - "grad_norm": 2.8391036987304688, - "learning_rate": 1.6597356304767565e-05, - "loss": 0.0673, + "epoch": 4.467547898410812, + "grad_norm": 1.044423222541809, + "learning_rate": 5.532452101589188e-06, + "loss": 0.0563, "step": 30080 }, { - "epoch": 2.234516560225754, - "grad_norm": 1.0939983129501343, - "learning_rate": 1.6592900638645476e-05, - "loss": 0.0676, + "epoch": 4.469033120451508, + "grad_norm": 0.6070758104324341, + "learning_rate": 5.530966879548493e-06, + "loss": 0.0672, "step": 30090 }, { - "epoch": 2.2352591712461014, - "grad_norm": 1.0146929025650024, - "learning_rate": 1.6588444972523395e-05, - "loss": 0.0567, + "epoch": 4.470518342492203, + "grad_norm": 1.0291193723678589, + "learning_rate": 5.529481657507798e-06, + "loss": 0.0705, "step": 30100 }, { - "epoch": 2.236001782266449, - "grad_norm": 0.7500823140144348, - "learning_rate": 1.6583989306401306e-05, - "loss": 0.0618, + "epoch": 4.472003564532898, + "grad_norm": 1.1457722187042236, + "learning_rate": 5.527996435467102e-06, + "loss": 0.0608, "step": 30110 }, { - "epoch": 2.2367443932867963, - "grad_norm": 1.022570252418518, - "learning_rate": 1.657953364027922e-05, - "loss": 0.0878, + "epoch": 4.473488786573593, + "grad_norm": 0.4338544011116028, + "learning_rate": 5.526511213426408e-06, + "loss": 0.0477, "step": 30120 }, { - "epoch": 2.2374870043071438, - "grad_norm": 2.924889087677002, - "learning_rate": 1.6575077974157136e-05, - "loss": 0.0537, + "epoch": 4.4749740086142875, + "grad_norm": 1.3526643514633179, + "learning_rate": 5.525025991385713e-06, + "loss": 0.0642, "step": 30130 }, { - "epoch": 2.2382296153274917, - "grad_norm": 1.7216036319732666, - "learning_rate": 1.657062230803505e-05, - "loss": 0.1027, + "epoch": 4.476459230654983, + "grad_norm": 0.9014106392860413, + "learning_rate": 5.523540769345017e-06, + "loss": 0.0679, "step": 30140 }, { - "epoch": 2.238972226347839, - "grad_norm": 0.9251584410667419, - "learning_rate": 1.6566166641912966e-05, - "loss": 0.0823, + "epoch": 4.477944452695678, + "grad_norm": 0.8641489744186401, + "learning_rate": 5.522055547304323e-06, + "loss": 0.0602, "step": 30150 }, { - "epoch": 2.2397148373681866, - "grad_norm": 3.20623779296875, - "learning_rate": 1.656171097579088e-05, - "loss": 0.0619, + "epoch": 4.479429674736373, + "grad_norm": 0.8070448637008667, + "learning_rate": 5.520570325263628e-06, + "loss": 0.0547, "step": 30160 }, { - "epoch": 2.240457448388534, - "grad_norm": 1.0136836767196655, - "learning_rate": 1.6557255309668796e-05, - "loss": 0.0755, + "epoch": 4.480914896777068, + "grad_norm": 0.3844519257545471, + "learning_rate": 5.519085103222932e-06, + "loss": 0.05, "step": 30170 }, { - "epoch": 2.2412000594088815, - "grad_norm": 1.4571853876113892, - "learning_rate": 1.655279964354671e-05, - "loss": 0.1068, + "epoch": 4.482400118817763, + "grad_norm": 0.6818383932113647, + "learning_rate": 5.517599881182238e-06, + "loss": 0.0652, "step": 30180 }, { - "epoch": 2.2419426704292293, - "grad_norm": 1.8920878171920776, - "learning_rate": 1.6548343977424626e-05, - "loss": 0.0733, + "epoch": 4.483885340858459, + "grad_norm": 0.952302098274231, + "learning_rate": 5.516114659141543e-06, + "loss": 0.0702, "step": 30190 }, { - "epoch": 2.242685281449577, - "grad_norm": 1.619733452796936, - "learning_rate": 1.654388831130254e-05, - "loss": 0.092, + "epoch": 4.485370562899154, + "grad_norm": 0.5874274969100952, + "learning_rate": 5.5146294371008465e-06, + "loss": 0.0667, "step": 30200 }, { - "epoch": 2.2434278924699242, - "grad_norm": 2.0480968952178955, - "learning_rate": 1.6539432645180456e-05, - "loss": 0.061, + "epoch": 4.4868557849398485, + "grad_norm": 1.1029670238494873, + "learning_rate": 5.513144215060152e-06, + "loss": 0.0495, "step": 30210 }, { - "epoch": 2.2441705034902717, - "grad_norm": 0.9134529829025269, - "learning_rate": 1.6534976979058368e-05, - "loss": 0.0773, + "epoch": 4.488341006980543, + "grad_norm": 1.2427738904953003, + "learning_rate": 5.511658993019456e-06, + "loss": 0.0705, "step": 30220 }, { - "epoch": 2.244913114510619, - "grad_norm": 1.261897325515747, - "learning_rate": 1.6530521312936283e-05, - "loss": 0.0656, + "epoch": 4.489826229021238, + "grad_norm": 0.7121690511703491, + "learning_rate": 5.5101737709787615e-06, + "loss": 0.0513, "step": 30230 }, { - "epoch": 2.245655725530967, - "grad_norm": 1.7915825843811035, - "learning_rate": 1.65260656468142e-05, - "loss": 0.0685, + "epoch": 4.491311451061934, + "grad_norm": 0.7534963488578796, + "learning_rate": 5.508688548938067e-06, + "loss": 0.0754, "step": 30240 }, { - "epoch": 2.2463983365513145, - "grad_norm": 2.638289451599121, - "learning_rate": 1.6521609980692113e-05, - "loss": 0.0938, + "epoch": 4.492796673102629, + "grad_norm": 1.0792324542999268, + "learning_rate": 5.507203326897371e-06, + "loss": 0.0459, "step": 30250 }, { - "epoch": 2.247140947571662, - "grad_norm": 1.1665395498275757, - "learning_rate": 1.6517154314570028e-05, - "loss": 0.0999, + "epoch": 4.494281895143324, + "grad_norm": 0.8321207165718079, + "learning_rate": 5.5057181048566765e-06, + "loss": 0.0477, "step": 30260 }, { - "epoch": 2.2478835585920094, - "grad_norm": 0.8219088912010193, - "learning_rate": 1.6512698648447946e-05, - "loss": 0.0732, + "epoch": 4.495767117184019, + "grad_norm": 1.2834551334381104, + "learning_rate": 5.504232882815982e-06, + "loss": 0.0573, "step": 30270 }, { - "epoch": 2.248626169612357, - "grad_norm": 1.1381323337554932, - "learning_rate": 1.6508242982325858e-05, - "loss": 0.1115, + "epoch": 4.497252339224714, + "grad_norm": 0.8430453538894653, + "learning_rate": 5.502747660775286e-06, + "loss": 0.0593, "step": 30280 }, { - "epoch": 2.2493687806327047, - "grad_norm": 2.4219374656677246, - "learning_rate": 1.6503787316203773e-05, - "loss": 0.0829, + "epoch": 4.4987375612654095, + "grad_norm": 0.8827682733535767, + "learning_rate": 5.5012624387345915e-06, + "loss": 0.0463, "step": 30290 }, { - "epoch": 2.250111391653052, - "grad_norm": 1.201055645942688, - "learning_rate": 1.6499331650081684e-05, - "loss": 0.047, + "epoch": 4.500222783306104, + "grad_norm": 0.30821776390075684, + "learning_rate": 5.499777216693896e-06, + "loss": 0.0513, "step": 30300 }, { - "epoch": 2.2508540026733996, - "grad_norm": 0.7391691207885742, - "learning_rate": 1.6494875983959603e-05, - "loss": 0.0452, + "epoch": 4.501708005346799, + "grad_norm": 0.45330941677093506, + "learning_rate": 5.498291994653201e-06, + "loss": 0.0529, "step": 30310 }, { - "epoch": 2.251596613693747, - "grad_norm": 0.3732907176017761, - "learning_rate": 1.6490420317837518e-05, - "loss": 0.0624, + "epoch": 4.503193227387494, + "grad_norm": 0.5481662750244141, + "learning_rate": 5.4968067726125065e-06, + "loss": 0.0699, "step": 30320 }, { - "epoch": 2.2523392247140945, - "grad_norm": 0.5641918182373047, - "learning_rate": 1.648596465171543e-05, - "loss": 0.0906, + "epoch": 4.504678449428189, + "grad_norm": 2.128347873687744, + "learning_rate": 5.49532155057181e-06, + "loss": 0.0726, "step": 30330 }, { - "epoch": 2.2530818357344424, - "grad_norm": 2.029506206512451, - "learning_rate": 1.6481508985593348e-05, - "loss": 0.0745, + "epoch": 4.506163671468885, + "grad_norm": 1.1982522010803223, + "learning_rate": 5.493836328531116e-06, + "loss": 0.051, "step": 30340 }, { - "epoch": 2.25382444675479, - "grad_norm": 2.202345132827759, - "learning_rate": 1.6477053319471263e-05, - "loss": 0.0843, + "epoch": 4.50764889350958, + "grad_norm": 0.745867908000946, + "learning_rate": 5.4923511064904215e-06, + "loss": 0.0637, "step": 30350 }, { - "epoch": 2.2545670577751373, - "grad_norm": 2.1847546100616455, - "learning_rate": 1.6472597653349174e-05, - "loss": 0.0897, + "epoch": 4.509134115550275, + "grad_norm": 0.6008644104003906, + "learning_rate": 5.490865884449725e-06, + "loss": 0.0771, "step": 30360 }, { - "epoch": 2.255309668795485, - "grad_norm": 1.7908791303634644, - "learning_rate": 1.646814198722709e-05, - "loss": 0.0694, + "epoch": 4.51061933759097, + "grad_norm": 0.9833894968032837, + "learning_rate": 5.48938066240903e-06, + "loss": 0.0682, "step": 30370 }, { - "epoch": 2.2560522798158322, - "grad_norm": 3.3176610469818115, - "learning_rate": 1.6463686321105008e-05, - "loss": 0.0674, + "epoch": 4.5121045596316645, + "grad_norm": 0.38291507959365845, + "learning_rate": 5.487895440368336e-06, + "loss": 0.0547, "step": 30380 }, { - "epoch": 2.25679489083618, - "grad_norm": 1.1117013692855835, - "learning_rate": 1.645923065498292e-05, - "loss": 0.0419, + "epoch": 4.51358978167236, + "grad_norm": 0.5758116841316223, + "learning_rate": 5.48641021832764e-06, + "loss": 0.065, "step": 30390 }, { - "epoch": 2.2575375018565276, - "grad_norm": 2.5527663230895996, - "learning_rate": 1.6454774988860834e-05, - "loss": 0.0818, + "epoch": 4.515075003713055, + "grad_norm": 1.0570487976074219, + "learning_rate": 5.484924996286945e-06, + "loss": 0.0725, "step": 30400 }, { - "epoch": 2.258280112876875, - "grad_norm": 0.6371174454689026, - "learning_rate": 1.645031932273875e-05, - "loss": 0.0626, + "epoch": 4.51656022575375, + "grad_norm": 1.7718385457992554, + "learning_rate": 5.48343977424625e-06, + "loss": 0.0619, "step": 30410 }, { - "epoch": 2.2590227238972225, - "grad_norm": 1.5492578744888306, - "learning_rate": 1.6445863656616664e-05, - "loss": 0.0681, + "epoch": 4.518045447794445, + "grad_norm": 1.0061246156692505, + "learning_rate": 5.481954552205555e-06, + "loss": 0.0701, "step": 30420 }, { - "epoch": 2.2597653349175704, - "grad_norm": 0.8762646317481995, - "learning_rate": 1.644140799049458e-05, - "loss": 0.0724, + "epoch": 4.519530669835141, + "grad_norm": 0.5751404762268066, + "learning_rate": 5.48046933016486e-06, + "loss": 0.0736, "step": 30430 }, { - "epoch": 2.260507945937918, - "grad_norm": 2.67170786857605, - "learning_rate": 1.6436952324372494e-05, - "loss": 0.0614, + "epoch": 4.521015891875836, + "grad_norm": 0.612650990486145, + "learning_rate": 5.478984108124165e-06, + "loss": 0.0581, "step": 30440 }, { - "epoch": 2.2612505569582653, - "grad_norm": 2.633847713470459, - "learning_rate": 1.643249665825041e-05, - "loss": 0.0799, + "epoch": 4.5225011139165305, + "grad_norm": 0.7062779664993286, + "learning_rate": 5.47749888608347e-06, + "loss": 0.0566, "step": 30450 }, { - "epoch": 2.2619931679786127, - "grad_norm": 2.2049152851104736, - "learning_rate": 1.6428040992128324e-05, - "loss": 0.0641, + "epoch": 4.523986335957225, + "grad_norm": 1.410231351852417, + "learning_rate": 5.476013664042775e-06, + "loss": 0.0513, "step": 30460 }, { - "epoch": 2.26273577899896, - "grad_norm": 6.4670186042785645, - "learning_rate": 1.6423585326006236e-05, - "loss": 0.0696, + "epoch": 4.52547155799792, + "grad_norm": 0.4978226125240326, + "learning_rate": 5.47452844200208e-06, + "loss": 0.0683, "step": 30470 }, { - "epoch": 2.263478390019308, - "grad_norm": 1.440319538116455, - "learning_rate": 1.6419129659884154e-05, - "loss": 0.0519, + "epoch": 4.526956780038616, + "grad_norm": 0.6738853454589844, + "learning_rate": 5.473043219961385e-06, + "loss": 0.0414, "step": 30480 }, { - "epoch": 2.2642210010396555, - "grad_norm": 1.5117939710617065, - "learning_rate": 1.641467399376207e-05, - "loss": 0.0679, + "epoch": 4.528442002079311, + "grad_norm": 1.0385953187942505, + "learning_rate": 5.47155799792069e-06, + "loss": 0.0648, "step": 30490 }, { - "epoch": 2.264963612060003, - "grad_norm": 0.705443799495697, - "learning_rate": 1.641021832763998e-05, - "loss": 0.0956, + "epoch": 4.529927224120006, + "grad_norm": 0.38971370458602905, + "learning_rate": 5.470072775879994e-06, + "loss": 0.0453, "step": 30500 }, { - "epoch": 2.2657062230803504, - "grad_norm": 2.2516098022460938, - "learning_rate": 1.64057626615179e-05, - "loss": 0.0704, + "epoch": 4.531412446160701, + "grad_norm": 1.1835120916366577, + "learning_rate": 5.468587553839299e-06, + "loss": 0.0792, "step": 30510 }, { - "epoch": 2.266448834100698, - "grad_norm": 1.6648731231689453, - "learning_rate": 1.640130699539581e-05, - "loss": 0.0531, + "epoch": 4.532897668201396, + "grad_norm": 0.6529055833816528, + "learning_rate": 5.467102331798604e-06, + "loss": 0.0783, "step": 30520 }, { - "epoch": 2.2671914451210458, - "grad_norm": 1.7640278339385986, - "learning_rate": 1.6396851329273726e-05, - "loss": 0.0787, + "epoch": 4.5343828902420915, + "grad_norm": 1.151580810546875, + "learning_rate": 5.465617109757909e-06, + "loss": 0.0629, "step": 30530 }, { - "epoch": 2.267934056141393, - "grad_norm": 0.9552247524261475, - "learning_rate": 1.639239566315164e-05, - "loss": 0.0744, + "epoch": 4.535868112282786, + "grad_norm": 0.6512138843536377, + "learning_rate": 5.464131887717214e-06, + "loss": 0.0854, "step": 30540 }, { - "epoch": 2.2686766671617407, - "grad_norm": 2.791886329650879, - "learning_rate": 1.6387939997029556e-05, - "loss": 0.0758, + "epoch": 4.537353334323481, + "grad_norm": 0.6657187342643738, + "learning_rate": 5.462646665676519e-06, + "loss": 0.0546, "step": 30550 }, { - "epoch": 2.269419278182088, - "grad_norm": 3.42543363571167, - "learning_rate": 1.638348433090747e-05, - "loss": 0.0743, + "epoch": 4.538838556364176, + "grad_norm": 0.7522188425064087, + "learning_rate": 5.461161443635824e-06, + "loss": 0.0613, "step": 30560 }, { - "epoch": 2.270161889202436, - "grad_norm": 2.195741653442383, - "learning_rate": 1.6379028664785386e-05, - "loss": 0.0793, + "epoch": 4.540323778404872, + "grad_norm": 1.369394063949585, + "learning_rate": 5.459676221595129e-06, + "loss": 0.0562, "step": 30570 }, { - "epoch": 2.2709045002227835, - "grad_norm": 1.3603512048721313, - "learning_rate": 1.63745729986633e-05, - "loss": 0.094, + "epoch": 4.541809000445567, + "grad_norm": 0.35974112153053284, + "learning_rate": 5.458190999554434e-06, + "loss": 0.0573, "step": 30580 }, { - "epoch": 2.271647111243131, - "grad_norm": 0.9138447642326355, - "learning_rate": 1.6370117332541216e-05, - "loss": 0.0973, + "epoch": 4.543294222486262, + "grad_norm": 1.749398946762085, + "learning_rate": 5.456705777513739e-06, + "loss": 0.0653, "step": 30590 }, { - "epoch": 2.2723897222634784, - "grad_norm": 1.8551127910614014, - "learning_rate": 1.636566166641913e-05, - "loss": 0.037, + "epoch": 4.544779444526957, + "grad_norm": 1.257237195968628, + "learning_rate": 5.455220555473044e-06, + "loss": 0.0798, "step": 30600 }, { - "epoch": 2.273132333283826, - "grad_norm": 0.9217031002044678, - "learning_rate": 1.6361206000297046e-05, - "loss": 0.0537, + "epoch": 4.546264666567652, + "grad_norm": 1.2745774984359741, + "learning_rate": 5.453735333432349e-06, + "loss": 0.0729, "step": 30610 }, { - "epoch": 2.2738749443041737, - "grad_norm": 0.9425798654556274, - "learning_rate": 1.635675033417496e-05, - "loss": 0.067, + "epoch": 4.547749888608347, + "grad_norm": 1.1518669128417969, + "learning_rate": 5.452250111391654e-06, + "loss": 0.0579, "step": 30620 }, { - "epoch": 2.274617555324521, - "grad_norm": 2.4280683994293213, - "learning_rate": 1.6352294668052872e-05, - "loss": 0.0611, + "epoch": 4.549235110649042, + "grad_norm": 0.6923162341117859, + "learning_rate": 5.450764889350958e-06, + "loss": 0.0494, "step": 30630 }, { - "epoch": 2.2753601663448686, - "grad_norm": 1.6596300601959229, - "learning_rate": 1.6347839001930787e-05, - "loss": 0.1251, + "epoch": 4.550720332689737, + "grad_norm": 0.6949621438980103, + "learning_rate": 5.449279667310264e-06, + "loss": 0.0612, "step": 30640 }, { - "epoch": 2.276102777365216, - "grad_norm": 1.1469758749008179, - "learning_rate": 1.6343383335808706e-05, - "loss": 0.0594, + "epoch": 4.552205554730432, + "grad_norm": 0.6583867073059082, + "learning_rate": 5.447794445269569e-06, + "loss": 0.0589, "step": 30650 }, { - "epoch": 2.2768453883855635, - "grad_norm": 0.3723772466182709, - "learning_rate": 1.6338927669686617e-05, - "loss": 0.046, + "epoch": 4.553690776771127, + "grad_norm": 0.3452644348144531, + "learning_rate": 5.4463092232288725e-06, + "loss": 0.0571, "step": 30660 }, { - "epoch": 2.2775879994059114, - "grad_norm": 1.2316060066223145, - "learning_rate": 1.6334472003564532e-05, - "loss": 0.0913, + "epoch": 4.555175998811823, + "grad_norm": 0.7595973610877991, + "learning_rate": 5.444824001188178e-06, + "loss": 0.0708, "step": 30670 }, { - "epoch": 2.278330610426259, - "grad_norm": 0.8177586793899536, - "learning_rate": 1.633001633744245e-05, - "loss": 0.0796, + "epoch": 4.556661220852518, + "grad_norm": 0.611835241317749, + "learning_rate": 5.443338779147483e-06, + "loss": 0.0548, "step": 30680 }, { - "epoch": 2.2790732214466063, - "grad_norm": 2.4429774284362793, - "learning_rate": 1.6325560671320362e-05, - "loss": 0.0675, + "epoch": 4.558146442893213, + "grad_norm": 0.9598454833030701, + "learning_rate": 5.4418535571067875e-06, + "loss": 0.0659, "step": 30690 }, { - "epoch": 2.2798158324669537, - "grad_norm": 2.337932586669922, - "learning_rate": 1.6321105005198277e-05, - "loss": 0.0905, + "epoch": 4.5596316649339075, + "grad_norm": 0.8964837193489075, + "learning_rate": 5.440368335066093e-06, + "loss": 0.0727, "step": 30700 }, { - "epoch": 2.280558443487301, - "grad_norm": 1.1022733449935913, - "learning_rate": 1.6316649339076192e-05, - "loss": 0.0586, + "epoch": 4.561116886974602, + "grad_norm": 0.995827853679657, + "learning_rate": 5.438883113025398e-06, + "loss": 0.0731, "step": 30710 }, { - "epoch": 2.281301054507649, - "grad_norm": 1.6780964136123657, - "learning_rate": 1.6312193672954107e-05, - "loss": 0.0661, + "epoch": 4.562602109015298, + "grad_norm": 0.6015493273735046, + "learning_rate": 5.4373978909847025e-06, + "loss": 0.0634, "step": 30720 }, { - "epoch": 2.2820436655279965, - "grad_norm": 1.0337419509887695, - "learning_rate": 1.6307738006832022e-05, - "loss": 0.1078, + "epoch": 4.564087331055993, + "grad_norm": 0.38653287291526794, + "learning_rate": 5.435912668944008e-06, + "loss": 0.1031, "step": 30730 }, { - "epoch": 2.282786276548344, - "grad_norm": 0.539939284324646, - "learning_rate": 1.6303282340709934e-05, - "loss": 0.0679, + "epoch": 4.565572553096688, + "grad_norm": 1.0255281925201416, + "learning_rate": 5.434427446903312e-06, + "loss": 0.0685, "step": 30740 }, { - "epoch": 2.2835288875686914, - "grad_norm": 3.132404088973999, - "learning_rate": 1.6298826674587852e-05, - "loss": 0.0875, + "epoch": 4.567057775137383, + "grad_norm": 0.8919768929481506, + "learning_rate": 5.4329422248626175e-06, + "loss": 0.0638, "step": 30750 }, { - "epoch": 2.284271498589039, - "grad_norm": 0.9195793271064758, - "learning_rate": 1.6294371008465767e-05, - "loss": 0.0704, + "epoch": 4.568542997178078, + "grad_norm": 0.8848775625228882, + "learning_rate": 5.431457002821923e-06, + "loss": 0.0867, "step": 30760 }, { - "epoch": 2.285014109609387, - "grad_norm": 1.0817722082138062, - "learning_rate": 1.628991534234368e-05, - "loss": 0.0503, + "epoch": 4.570028219218774, + "grad_norm": 0.7540958523750305, + "learning_rate": 5.429971780781227e-06, + "loss": 0.058, "step": 30770 }, { - "epoch": 2.2857567206297342, - "grad_norm": 1.1843632459640503, - "learning_rate": 1.6285459676221597e-05, - "loss": 0.0481, + "epoch": 4.5715134412594685, + "grad_norm": 1.1147123575210571, + "learning_rate": 5.4284865587405325e-06, + "loss": 0.0732, "step": 30780 }, { - "epoch": 2.2864993316500817, - "grad_norm": 2.589695930480957, - "learning_rate": 1.6281004010099512e-05, - "loss": 0.0587, + "epoch": 4.572998663300163, + "grad_norm": 0.8272676467895508, + "learning_rate": 5.427001336699838e-06, + "loss": 0.0537, "step": 30790 }, { - "epoch": 2.287241942670429, - "grad_norm": 1.0527863502502441, - "learning_rate": 1.6276548343977424e-05, - "loss": 0.0863, + "epoch": 4.574483885340858, + "grad_norm": 0.6863977313041687, + "learning_rate": 5.425516114659141e-06, + "loss": 0.0675, "step": 30800 }, { - "epoch": 2.2879845536907766, - "grad_norm": 1.128021478652954, - "learning_rate": 1.627209267785534e-05, - "loss": 0.0738, + "epoch": 4.575969107381553, + "grad_norm": 0.7427458167076111, + "learning_rate": 5.4240308926184475e-06, + "loss": 0.058, "step": 30810 }, { - "epoch": 2.2887271647111245, - "grad_norm": 1.0027931928634644, - "learning_rate": 1.6267637011733254e-05, - "loss": 0.0423, + "epoch": 4.577454329422249, + "grad_norm": 0.6880619525909424, + "learning_rate": 5.422545670577751e-06, + "loss": 0.0402, "step": 30820 }, { - "epoch": 2.289469775731472, - "grad_norm": 1.4265313148498535, - "learning_rate": 1.626318134561117e-05, - "loss": 0.0381, + "epoch": 4.578939551462944, + "grad_norm": 0.84503173828125, + "learning_rate": 5.421060448537056e-06, + "loss": 0.0731, "step": 30830 }, { - "epoch": 2.2902123867518194, - "grad_norm": 1.3314334154129028, - "learning_rate": 1.6258725679489084e-05, - "loss": 0.0521, + "epoch": 4.580424773503639, + "grad_norm": 0.5866569876670837, + "learning_rate": 5.419575226496362e-06, + "loss": 0.0614, "step": 30840 }, { - "epoch": 2.290954997772167, - "grad_norm": 1.9009617567062378, - "learning_rate": 1.6254270013367e-05, - "loss": 0.0532, + "epoch": 4.581909995544334, + "grad_norm": 0.6142920851707458, + "learning_rate": 5.418090004455666e-06, + "loss": 0.0683, "step": 30850 }, { - "epoch": 2.2916976087925143, - "grad_norm": 1.4951937198638916, - "learning_rate": 1.6249814347244914e-05, - "loss": 0.0764, + "epoch": 4.5833952175850285, + "grad_norm": 0.8540779948234558, + "learning_rate": 5.416604782414971e-06, + "loss": 0.0405, "step": 30860 }, { - "epoch": 2.292440219812862, - "grad_norm": 1.393104910850525, - "learning_rate": 1.624535868112283e-05, - "loss": 0.0694, + "epoch": 4.584880439625724, + "grad_norm": 0.5846472978591919, + "learning_rate": 5.415119560374277e-06, + "loss": 0.0505, "step": 30870 }, { - "epoch": 2.2931828308332096, - "grad_norm": 1.1076534986495972, - "learning_rate": 1.624090301500074e-05, - "loss": 0.0542, + "epoch": 4.586365661666419, + "grad_norm": 0.5436474680900574, + "learning_rate": 5.413634338333581e-06, + "loss": 0.0493, "step": 30880 }, { - "epoch": 2.293925441853557, - "grad_norm": 2.4654695987701416, - "learning_rate": 1.623644734887866e-05, - "loss": 0.0647, + "epoch": 4.587850883707114, + "grad_norm": 0.6070688366889954, + "learning_rate": 5.412149116292886e-06, + "loss": 0.0539, "step": 30890 }, { - "epoch": 2.2946680528739045, - "grad_norm": 0.662090003490448, - "learning_rate": 1.6231991682756574e-05, - "loss": 0.0686, + "epoch": 4.589336105747809, + "grad_norm": 0.39871883392333984, + "learning_rate": 5.410663894252192e-06, + "loss": 0.0595, "step": 30900 }, { - "epoch": 2.295410663894252, - "grad_norm": 2.0039710998535156, - "learning_rate": 1.6227536016634486e-05, - "loss": 0.0792, + "epoch": 4.590821327788504, + "grad_norm": 0.9332908391952515, + "learning_rate": 5.409178672211496e-06, + "loss": 0.0591, "step": 30910 }, { - "epoch": 2.2961532749146, - "grad_norm": 0.9649547338485718, - "learning_rate": 1.6223080350512404e-05, - "loss": 0.0603, + "epoch": 4.5923065498292, + "grad_norm": 1.1472511291503906, + "learning_rate": 5.407693450170801e-06, + "loss": 0.0755, "step": 30920 }, { - "epoch": 2.2968958859349473, - "grad_norm": 1.8113723993301392, - "learning_rate": 1.6218624684390316e-05, - "loss": 0.0739, + "epoch": 4.593791771869895, + "grad_norm": 0.8061735033988953, + "learning_rate": 5.406208228130106e-06, + "loss": 0.064, "step": 30930 }, { - "epoch": 2.2976384969552948, - "grad_norm": 0.8658888936042786, - "learning_rate": 1.621416901826823e-05, - "loss": 0.0417, + "epoch": 4.5952769939105895, + "grad_norm": 0.8099207878112793, + "learning_rate": 5.404723006089411e-06, + "loss": 0.0661, "step": 30940 }, { - "epoch": 2.298381107975642, - "grad_norm": 1.820826530456543, - "learning_rate": 1.6209713352146146e-05, - "loss": 0.0584, + "epoch": 4.596762215951284, + "grad_norm": 1.1036105155944824, + "learning_rate": 5.403237784048716e-06, + "loss": 0.0533, "step": 30950 }, { - "epoch": 2.2991237189959897, - "grad_norm": 3.5532517433166504, - "learning_rate": 1.620525768602406e-05, - "loss": 0.0779, + "epoch": 4.598247437991979, + "grad_norm": 0.9364911317825317, + "learning_rate": 5.40175256200802e-06, + "loss": 0.0875, "step": 30960 }, { - "epoch": 2.2998663300163376, - "grad_norm": 1.2699205875396729, - "learning_rate": 1.6200802019901976e-05, - "loss": 0.0722, + "epoch": 4.599732660032675, + "grad_norm": 0.85829097032547, + "learning_rate": 5.400267339967325e-06, + "loss": 0.068, "step": 30970 }, { - "epoch": 2.300608941036685, - "grad_norm": 1.4592735767364502, - "learning_rate": 1.619634635377989e-05, - "loss": 0.0539, + "epoch": 4.60121788207337, + "grad_norm": 0.7444193959236145, + "learning_rate": 5.3987821179266304e-06, + "loss": 0.0678, "step": 30980 }, { - "epoch": 2.3013515520570325, - "grad_norm": 1.4031466245651245, - "learning_rate": 1.6191890687657805e-05, - "loss": 0.0849, + "epoch": 4.602703104114065, + "grad_norm": 0.9241144061088562, + "learning_rate": 5.397296895885935e-06, + "loss": 0.0663, "step": 30990 }, { - "epoch": 2.30209416307738, - "grad_norm": 2.4945876598358154, - "learning_rate": 1.618743502153572e-05, - "loss": 0.0859, + "epoch": 4.60418832615476, + "grad_norm": 0.8394775390625, + "learning_rate": 5.39581167384524e-06, + "loss": 0.0513, "step": 31000 }, { - "epoch": 2.302836774097728, - "grad_norm": 1.129449725151062, - "learning_rate": 1.6182979355413635e-05, - "loss": 0.0807, + "epoch": 4.605673548195456, + "grad_norm": 0.821804404258728, + "learning_rate": 5.3943264518045454e-06, + "loss": 0.0526, "step": 31010 }, { - "epoch": 2.3035793851180753, - "grad_norm": 3.043400526046753, - "learning_rate": 1.617852368929155e-05, - "loss": 0.056, + "epoch": 4.6071587702361505, + "grad_norm": 0.6810859441757202, + "learning_rate": 5.39284122976385e-06, + "loss": 0.0569, "step": 31020 }, { - "epoch": 2.3043219961384227, - "grad_norm": 2.541620969772339, - "learning_rate": 1.6174068023169465e-05, - "loss": 0.0738, + "epoch": 4.608643992276845, + "grad_norm": 0.8569344282150269, + "learning_rate": 5.391356007723155e-06, + "loss": 0.0646, "step": 31030 }, { - "epoch": 2.30506460715877, - "grad_norm": 0.742087721824646, - "learning_rate": 1.6169612357047377e-05, - "loss": 0.0583, + "epoch": 4.61012921431754, + "grad_norm": 0.4078651964664459, + "learning_rate": 5.38987078568246e-06, + "loss": 0.0527, "step": 31040 }, { - "epoch": 2.3058072181791176, - "grad_norm": 2.608368158340454, - "learning_rate": 1.6165156690925292e-05, - "loss": 0.0645, + "epoch": 4.611614436358235, + "grad_norm": 0.6530470848083496, + "learning_rate": 5.388385563641765e-06, + "loss": 0.0549, "step": 31050 }, { - "epoch": 2.3065498291994655, - "grad_norm": 1.629696249961853, - "learning_rate": 1.616070102480321e-05, - "loss": 0.0897, + "epoch": 4.613099658398931, + "grad_norm": 1.1770886182785034, + "learning_rate": 5.38690034160107e-06, + "loss": 0.0705, "step": 31060 }, { - "epoch": 2.307292440219813, - "grad_norm": 1.6755260229110718, - "learning_rate": 1.6156245358681122e-05, - "loss": 0.065, + "epoch": 4.614584880439626, + "grad_norm": 0.842705488204956, + "learning_rate": 5.385415119560375e-06, + "loss": 0.0563, "step": 31070 }, { - "epoch": 2.3080350512401604, - "grad_norm": 1.018589735031128, - "learning_rate": 1.6151789692559037e-05, - "loss": 0.0527, + "epoch": 4.616070102480321, + "grad_norm": 1.254603624343872, + "learning_rate": 5.38392989751968e-06, + "loss": 0.0849, "step": 31080 }, { - "epoch": 2.308777662260508, - "grad_norm": 0.7608964443206787, - "learning_rate": 1.6147334026436955e-05, - "loss": 0.0697, + "epoch": 4.617555324521016, + "grad_norm": 0.6766051650047302, + "learning_rate": 5.382444675478985e-06, + "loss": 0.0649, "step": 31090 }, { - "epoch": 2.3095202732808553, - "grad_norm": 0.854860246181488, - "learning_rate": 1.6142878360314867e-05, - "loss": 0.0657, + "epoch": 4.619040546561711, + "grad_norm": 1.1732462644577026, + "learning_rate": 5.38095945343829e-06, + "loss": 0.063, "step": 31100 }, { - "epoch": 2.310262884301203, - "grad_norm": 1.3979119062423706, - "learning_rate": 1.6138422694192782e-05, - "loss": 0.0776, + "epoch": 4.620525768602406, + "grad_norm": 1.3196903467178345, + "learning_rate": 5.379474231397595e-06, + "loss": 0.0698, "step": 31110 }, { - "epoch": 2.3110054953215506, - "grad_norm": 1.7942464351654053, - "learning_rate": 1.6133967028070697e-05, - "loss": 0.0688, + "epoch": 4.622010990643101, + "grad_norm": 0.4553806185722351, + "learning_rate": 5.3779890093569e-06, + "loss": 0.0576, "step": 31120 }, { - "epoch": 2.311748106341898, - "grad_norm": 1.9012426137924194, - "learning_rate": 1.6129511361948612e-05, - "loss": 0.0585, + "epoch": 4.623496212683796, + "grad_norm": 1.2071045637130737, + "learning_rate": 5.376503787316204e-06, + "loss": 0.0571, "step": 31130 }, { - "epoch": 2.3124907173622455, - "grad_norm": 1.9309673309326172, - "learning_rate": 1.6125055695826527e-05, - "loss": 0.0747, + "epoch": 4.624981434724491, + "grad_norm": 0.4207744002342224, + "learning_rate": 5.375018565275509e-06, + "loss": 0.0475, "step": 31140 }, { - "epoch": 2.3132333283825934, - "grad_norm": 1.511763572692871, - "learning_rate": 1.612060002970444e-05, - "loss": 0.0732, + "epoch": 4.626466656765187, + "grad_norm": 0.45888543128967285, + "learning_rate": 5.3735333432348134e-06, + "loss": 0.0706, "step": 31150 }, { - "epoch": 2.313975939402941, - "grad_norm": 2.341627836227417, - "learning_rate": 1.6116144363582357e-05, - "loss": 0.0883, + "epoch": 4.627951878805882, + "grad_norm": 0.9755649566650391, + "learning_rate": 5.372048121194119e-06, + "loss": 0.0709, "step": 31160 }, { - "epoch": 2.3147185504232883, - "grad_norm": 2.5105557441711426, - "learning_rate": 1.6111688697460272e-05, - "loss": 0.0588, + "epoch": 4.629437100846577, + "grad_norm": 0.9244090914726257, + "learning_rate": 5.370562899153424e-06, + "loss": 0.0689, "step": 31170 }, { - "epoch": 2.315461161443636, - "grad_norm": 1.0133614540100098, - "learning_rate": 1.6107233031338184e-05, - "loss": 0.0887, + "epoch": 4.630922322887272, + "grad_norm": 0.8915614485740662, + "learning_rate": 5.3690776771127284e-06, + "loss": 0.0475, "step": 31180 }, { - "epoch": 2.3162037724639832, - "grad_norm": 5.659106254577637, - "learning_rate": 1.6102777365216102e-05, - "loss": 0.0934, + "epoch": 4.6324075449279665, + "grad_norm": 0.4594666063785553, + "learning_rate": 5.367592455072034e-06, + "loss": 0.0586, "step": 31190 }, { - "epoch": 2.316946383484331, - "grad_norm": 0.734591543674469, - "learning_rate": 1.6098321699094017e-05, - "loss": 0.0798, + "epoch": 4.633892766968662, + "grad_norm": 0.8010812401771545, + "learning_rate": 5.366107233031339e-06, + "loss": 0.0768, "step": 31200 }, { - "epoch": 2.3176889945046786, - "grad_norm": 2.1692800521850586, - "learning_rate": 1.609386603297193e-05, - "loss": 0.0662, + "epoch": 4.635377989009357, + "grad_norm": 1.156713604927063, + "learning_rate": 5.3646220109906434e-06, + "loss": 0.0781, "step": 31210 }, { - "epoch": 2.318431605525026, - "grad_norm": 0.6351516842842102, - "learning_rate": 1.6089410366849844e-05, - "loss": 0.0478, + "epoch": 4.636863211050052, + "grad_norm": 0.8136284351348877, + "learning_rate": 5.363136788949949e-06, + "loss": 0.0639, "step": 31220 }, { - "epoch": 2.3191742165453735, - "grad_norm": 1.4886484146118164, - "learning_rate": 1.6084954700727762e-05, - "loss": 0.0698, + "epoch": 4.638348433090747, + "grad_norm": 0.629618763923645, + "learning_rate": 5.361651566909254e-06, + "loss": 0.0516, "step": 31230 }, { - "epoch": 2.319916827565721, - "grad_norm": 0.3388558626174927, - "learning_rate": 1.6080499034605674e-05, - "loss": 0.0557, + "epoch": 4.639833655131442, + "grad_norm": 0.6487692594528198, + "learning_rate": 5.360166344868558e-06, + "loss": 0.0741, "step": 31240 }, { - "epoch": 2.320659438586069, - "grad_norm": 0.2938145697116852, - "learning_rate": 1.607604336848359e-05, - "loss": 0.0889, + "epoch": 4.641318877172138, + "grad_norm": 1.1369839906692505, + "learning_rate": 5.358681122827864e-06, + "loss": 0.0638, "step": 31250 }, { - "epoch": 2.3214020496064163, - "grad_norm": 2.3439390659332275, - "learning_rate": 1.6071587702361504e-05, - "loss": 0.064, + "epoch": 4.6428040992128325, + "grad_norm": 0.9577314853668213, + "learning_rate": 5.357195900787167e-06, + "loss": 0.0665, "step": 31260 }, { - "epoch": 2.3221446606267637, - "grad_norm": 1.6882779598236084, - "learning_rate": 1.606713203623942e-05, - "loss": 0.0456, + "epoch": 4.6442893212535274, + "grad_norm": 0.8269117474555969, + "learning_rate": 5.3557106787464726e-06, + "loss": 0.0651, "step": 31270 }, { - "epoch": 2.322887271647111, - "grad_norm": 1.6335844993591309, - "learning_rate": 1.6062676370117334e-05, - "loss": 0.0528, + "epoch": 4.645774543294222, + "grad_norm": 1.153686285018921, + "learning_rate": 5.354225456705779e-06, + "loss": 0.0672, "step": 31280 }, { - "epoch": 2.3236298826674586, - "grad_norm": 1.1933094263076782, - "learning_rate": 1.6058220703995245e-05, - "loss": 0.0913, + "epoch": 4.647259765334917, + "grad_norm": 1.1111594438552856, + "learning_rate": 5.352740234665082e-06, + "loss": 0.0706, "step": 31290 }, { - "epoch": 2.3243724936878065, - "grad_norm": 2.4354095458984375, - "learning_rate": 1.6053765037873164e-05, - "loss": 0.0795, + "epoch": 4.648744987375613, + "grad_norm": 0.9219614863395691, + "learning_rate": 5.3512550126243876e-06, + "loss": 0.0569, "step": 31300 }, { - "epoch": 2.325115104708154, - "grad_norm": 1.5631943941116333, - "learning_rate": 1.604930937175108e-05, - "loss": 0.0792, + "epoch": 4.650230209416308, + "grad_norm": 1.7073825597763062, + "learning_rate": 5.349769790583693e-06, + "loss": 0.0652, "step": 31310 }, { - "epoch": 2.3258577157285014, - "grad_norm": 2.0377357006073, - "learning_rate": 1.604485370562899e-05, - "loss": 0.0836, + "epoch": 4.651715431457003, + "grad_norm": 0.46422526240348816, + "learning_rate": 5.348284568542997e-06, + "loss": 0.063, "step": 31320 }, { - "epoch": 2.326600326748849, - "grad_norm": 2.2633230686187744, - "learning_rate": 1.604039803950691e-05, - "loss": 0.0778, + "epoch": 4.653200653497698, + "grad_norm": 0.552732527256012, + "learning_rate": 5.3467993465023026e-06, + "loss": 0.0611, "step": 31330 }, { - "epoch": 2.3273429377691963, - "grad_norm": 1.7147982120513916, - "learning_rate": 1.603594237338482e-05, - "loss": 0.0757, + "epoch": 4.654685875538393, + "grad_norm": 1.0013930797576904, + "learning_rate": 5.345314124461607e-06, + "loss": 0.0662, "step": 31340 }, { - "epoch": 2.328085548789544, - "grad_norm": 2.1653573513031006, - "learning_rate": 1.6031486707262735e-05, - "loss": 0.0769, + "epoch": 4.656171097579088, + "grad_norm": 0.7439885139465332, + "learning_rate": 5.343828902420912e-06, + "loss": 0.0656, "step": 31350 }, { - "epoch": 2.3288281598098917, - "grad_norm": 1.5791271924972534, - "learning_rate": 1.602703104114065e-05, - "loss": 0.0756, + "epoch": 4.657656319619783, + "grad_norm": 0.7561959624290466, + "learning_rate": 5.3423436803802176e-06, + "loss": 0.0618, "step": 31360 }, { - "epoch": 2.329570770830239, - "grad_norm": 2.118759870529175, - "learning_rate": 1.6022575375018565e-05, - "loss": 0.0939, + "epoch": 4.659141541660478, + "grad_norm": 0.8221492767333984, + "learning_rate": 5.340858458339522e-06, + "loss": 0.0635, "step": 31370 }, { - "epoch": 2.3303133818505866, - "grad_norm": 1.0190156698226929, - "learning_rate": 1.601811970889648e-05, - "loss": 0.0663, + "epoch": 4.660626763701173, + "grad_norm": 0.8837724328041077, + "learning_rate": 5.339373236298827e-06, + "loss": 0.0687, "step": 31380 }, { - "epoch": 2.331055992870934, - "grad_norm": 1.578240156173706, - "learning_rate": 1.6013664042774395e-05, - "loss": 0.0899, + "epoch": 4.662111985741868, + "grad_norm": 1.0688098669052124, + "learning_rate": 5.3378880142581325e-06, + "loss": 0.0675, "step": 31390 }, { - "epoch": 2.331798603891282, - "grad_norm": 1.2670795917510986, - "learning_rate": 1.600920837665231e-05, - "loss": 0.056, + "epoch": 4.663597207782564, + "grad_norm": 1.1708858013153076, + "learning_rate": 5.336402792217437e-06, + "loss": 0.0461, "step": 31400 }, { - "epoch": 2.3325412149116294, - "grad_norm": 0.6794329285621643, - "learning_rate": 1.6004752710530225e-05, - "loss": 0.0792, + "epoch": 4.665082429823259, + "grad_norm": 0.5535678863525391, + "learning_rate": 5.334917570176742e-06, + "loss": 0.0766, "step": 31410 }, { - "epoch": 2.333283825931977, - "grad_norm": 1.2964015007019043, - "learning_rate": 1.600029704440814e-05, - "loss": 0.071, + "epoch": 4.666567651863954, + "grad_norm": 0.7136598229408264, + "learning_rate": 5.3334323481360475e-06, + "loss": 0.0584, "step": 31420 }, { - "epoch": 2.3340264369523243, - "grad_norm": 1.2982021570205688, - "learning_rate": 1.5995841378286055e-05, - "loss": 0.0841, + "epoch": 4.6680528739046485, + "grad_norm": 0.8499895334243774, + "learning_rate": 5.331947126095351e-06, + "loss": 0.0668, "step": 31430 }, { - "epoch": 2.3347690479726717, - "grad_norm": 2.294980049133301, - "learning_rate": 1.599138571216397e-05, - "loss": 0.0691, + "epoch": 4.669538095945343, + "grad_norm": 0.7829546332359314, + "learning_rate": 5.330461904054656e-06, + "loss": 0.0529, "step": 31440 }, { - "epoch": 2.3355116589930196, - "grad_norm": 1.0897853374481201, - "learning_rate": 1.598693004604188e-05, - "loss": 0.0898, + "epoch": 4.671023317986039, + "grad_norm": 0.8320195078849792, + "learning_rate": 5.328976682013961e-06, + "loss": 0.0704, "step": 31450 }, { - "epoch": 2.336254270013367, - "grad_norm": 2.2197341918945312, - "learning_rate": 1.5982474379919797e-05, - "loss": 0.0478, + "epoch": 4.672508540026734, + "grad_norm": 1.6752804517745972, + "learning_rate": 5.327491459973266e-06, + "loss": 0.0642, "step": 31460 }, { - "epoch": 2.3369968810337145, - "grad_norm": 2.2933666706085205, - "learning_rate": 1.5978018713797715e-05, - "loss": 0.039, + "epoch": 4.673993762067429, + "grad_norm": 2.0226798057556152, + "learning_rate": 5.326006237932571e-06, + "loss": 0.0701, "step": 31470 }, { - "epoch": 2.337739492054062, - "grad_norm": 2.1594624519348145, - "learning_rate": 1.5973563047675627e-05, - "loss": 0.0615, + "epoch": 4.675478984108124, + "grad_norm": 0.8305063247680664, + "learning_rate": 5.324521015891876e-06, + "loss": 0.0608, "step": 31480 }, { - "epoch": 2.3384821030744094, - "grad_norm": 0.8612133264541626, - "learning_rate": 1.596910738155354e-05, - "loss": 0.0652, + "epoch": 4.676964206148819, + "grad_norm": 1.0752133131027222, + "learning_rate": 5.323035793851181e-06, + "loss": 0.0582, "step": 31490 }, { - "epoch": 2.3392247140947573, - "grad_norm": 1.2774549722671509, - "learning_rate": 1.596465171543146e-05, - "loss": 0.0666, + "epoch": 4.678449428189515, + "grad_norm": 0.8303189277648926, + "learning_rate": 5.321550571810486e-06, + "loss": 0.064, "step": 31500 }, { - "epoch": 2.3399673251151047, - "grad_norm": 0.8495298027992249, - "learning_rate": 1.596019604930937e-05, - "loss": 0.0859, + "epoch": 4.6799346502302095, + "grad_norm": 0.772408127784729, + "learning_rate": 5.320065349769791e-06, + "loss": 0.0655, "step": 31510 }, { - "epoch": 2.340709936135452, - "grad_norm": 0.653118908405304, - "learning_rate": 1.5955740383187287e-05, - "loss": 0.0565, + "epoch": 4.681419872270904, + "grad_norm": 0.7097247242927551, + "learning_rate": 5.318580127729096e-06, + "loss": 0.063, "step": 31520 }, { - "epoch": 2.3414525471557996, - "grad_norm": 0.9799015522003174, - "learning_rate": 1.59512847170652e-05, - "loss": 0.0624, + "epoch": 4.682905094311599, + "grad_norm": 0.8347052931785583, + "learning_rate": 5.317094905688401e-06, + "loss": 0.066, "step": 31530 }, { - "epoch": 2.342195158176147, - "grad_norm": 0.8276538848876953, - "learning_rate": 1.5946829050943117e-05, - "loss": 0.0819, + "epoch": 4.684390316352294, + "grad_norm": 0.6834461688995361, + "learning_rate": 5.315609683647706e-06, + "loss": 0.0633, "step": 31540 }, { - "epoch": 2.342937769196495, - "grad_norm": 2.8302557468414307, - "learning_rate": 1.594237338482103e-05, - "loss": 0.0728, + "epoch": 4.68587553839299, + "grad_norm": 0.6303119659423828, + "learning_rate": 5.314124461607011e-06, + "loss": 0.0639, "step": 31550 }, { - "epoch": 2.3436803802168424, - "grad_norm": 2.348175287246704, - "learning_rate": 1.5937917718698943e-05, - "loss": 0.0857, + "epoch": 4.687360760433685, + "grad_norm": 1.4411765336990356, + "learning_rate": 5.312639239566315e-06, + "loss": 0.0647, "step": 31560 }, { - "epoch": 2.34442299123719, - "grad_norm": 1.6633504629135132, - "learning_rate": 1.593346205257686e-05, - "loss": 0.0724, + "epoch": 4.68884598247438, + "grad_norm": 1.3410730361938477, + "learning_rate": 5.311154017525621e-06, + "loss": 0.0612, "step": 31570 }, { - "epoch": 2.3451656022575373, - "grad_norm": 0.9694968461990356, - "learning_rate": 1.5929006386454777e-05, - "loss": 0.0795, + "epoch": 4.690331204515075, + "grad_norm": 0.6906945109367371, + "learning_rate": 5.309668795484926e-06, + "loss": 0.0445, "step": 31580 }, { - "epoch": 2.3459082132778852, - "grad_norm": 2.0099871158599854, - "learning_rate": 1.5924550720332688e-05, - "loss": 0.0568, + "epoch": 4.6918164265557705, + "grad_norm": 0.22727198898792267, + "learning_rate": 5.30818357344423e-06, + "loss": 0.0492, "step": 31590 }, { - "epoch": 2.3466508242982327, - "grad_norm": 1.2258661985397339, - "learning_rate": 1.5920095054210607e-05, - "loss": 0.0553, + "epoch": 4.693301648596465, + "grad_norm": 1.2610745429992676, + "learning_rate": 5.306698351403535e-06, + "loss": 0.0703, "step": 31600 }, { - "epoch": 2.34739343531858, - "grad_norm": 1.5101720094680786, - "learning_rate": 1.591563938808852e-05, - "loss": 0.0631, + "epoch": 4.69478687063716, + "grad_norm": 1.4050370454788208, + "learning_rate": 5.30521312936284e-06, + "loss": 0.0723, "step": 31610 }, { - "epoch": 2.3481360463389276, - "grad_norm": 1.2098814249038696, - "learning_rate": 1.5911183721966433e-05, - "loss": 0.0588, + "epoch": 4.696272092677855, + "grad_norm": 0.4102284014225006, + "learning_rate": 5.303727907322145e-06, + "loss": 0.0648, "step": 31620 }, { - "epoch": 2.348878657359275, - "grad_norm": 0.48147693276405334, - "learning_rate": 1.5906728055844348e-05, - "loss": 0.0648, + "epoch": 4.69775731471855, + "grad_norm": 0.755649745464325, + "learning_rate": 5.30224268528145e-06, + "loss": 0.054, "step": 31630 }, { - "epoch": 2.349621268379623, - "grad_norm": 1.1367077827453613, - "learning_rate": 1.5902272389722267e-05, - "loss": 0.0493, + "epoch": 4.699242536759246, + "grad_norm": 0.6751394867897034, + "learning_rate": 5.300757463240755e-06, + "loss": 0.0532, "step": 31640 }, { - "epoch": 2.3503638793999704, - "grad_norm": 1.2265082597732544, - "learning_rate": 1.5897816723600178e-05, - "loss": 0.067, + "epoch": 4.700727758799941, + "grad_norm": 0.7214498519897461, + "learning_rate": 5.29927224120006e-06, + "loss": 0.0989, "step": 31650 }, { - "epoch": 2.351106490420318, - "grad_norm": 3.4290337562561035, - "learning_rate": 1.5893361057478093e-05, - "loss": 0.0638, + "epoch": 4.702212980840636, + "grad_norm": 1.3037954568862915, + "learning_rate": 5.297787019159365e-06, + "loss": 0.0623, "step": 31660 }, { - "epoch": 2.3518491014406653, - "grad_norm": 1.1333622932434082, - "learning_rate": 1.5888905391356008e-05, - "loss": 0.0814, + "epoch": 4.703698202881331, + "grad_norm": 0.7185441851615906, + "learning_rate": 5.296301797118669e-06, + "loss": 0.0603, "step": 31670 }, { - "epoch": 2.3525917124610127, - "grad_norm": 1.8252911567687988, - "learning_rate": 1.5884449725233923e-05, - "loss": 0.0561, + "epoch": 4.7051834249220255, + "grad_norm": 0.603461503982544, + "learning_rate": 5.294816575077975e-06, + "loss": 0.047, "step": 31680 }, { - "epoch": 2.3533343234813606, - "grad_norm": 0.9480688571929932, - "learning_rate": 1.5879994059111838e-05, - "loss": 0.0603, + "epoch": 4.706668646962721, + "grad_norm": 1.2898454666137695, + "learning_rate": 5.29333135303728e-06, + "loss": 0.0605, "step": 31690 }, { - "epoch": 2.354076934501708, - "grad_norm": 1.576391339302063, - "learning_rate": 1.587553839298975e-05, - "loss": 0.0568, + "epoch": 4.708153869003416, + "grad_norm": 0.8753405213356018, + "learning_rate": 5.291846130996584e-06, + "loss": 0.0577, "step": 31700 }, { - "epoch": 2.3548195455220555, - "grad_norm": 0.9777421951293945, - "learning_rate": 1.5871082726867668e-05, - "loss": 0.0481, + "epoch": 4.709639091044111, + "grad_norm": 0.7883022427558899, + "learning_rate": 5.29036090895589e-06, + "loss": 0.0709, "step": 31710 }, { - "epoch": 2.355562156542403, - "grad_norm": 1.60342538356781, - "learning_rate": 1.5866627060745583e-05, - "loss": 0.0618, + "epoch": 4.711124313084806, + "grad_norm": 1.3050919771194458, + "learning_rate": 5.288875686915195e-06, + "loss": 0.0621, "step": 31720 }, { - "epoch": 2.356304767562751, - "grad_norm": 1.1398688554763794, - "learning_rate": 1.5862171394623495e-05, - "loss": 0.0554, + "epoch": 4.712609535125502, + "grad_norm": 0.40759310126304626, + "learning_rate": 5.2873904648744985e-06, + "loss": 0.0464, "step": 31730 }, { - "epoch": 2.3570473785830983, - "grad_norm": 0.9877955913543701, - "learning_rate": 1.5857715728501413e-05, - "loss": 0.0851, + "epoch": 4.714094757166197, + "grad_norm": 1.0722618103027344, + "learning_rate": 5.285905242833805e-06, + "loss": 0.0794, "step": 31740 }, { - "epoch": 2.3577899896034458, - "grad_norm": 1.1725736856460571, - "learning_rate": 1.5853260062379325e-05, - "loss": 0.0643, + "epoch": 4.7155799792068915, + "grad_norm": 0.8702318072319031, + "learning_rate": 5.284420020793108e-06, + "loss": 0.0605, "step": 31750 }, { - "epoch": 2.358532600623793, - "grad_norm": 1.4532411098480225, - "learning_rate": 1.584880439625724e-05, - "loss": 0.057, + "epoch": 4.717065201247586, + "grad_norm": 0.5412219166755676, + "learning_rate": 5.2829347987524135e-06, + "loss": 0.051, "step": 31760 }, { - "epoch": 2.3592752116441407, - "grad_norm": 1.950925350189209, - "learning_rate": 1.5844348730135155e-05, - "loss": 0.039, + "epoch": 4.718550423288281, + "grad_norm": 1.113112211227417, + "learning_rate": 5.281449576711719e-06, + "loss": 0.056, "step": 31770 }, { - "epoch": 2.3600178226644886, - "grad_norm": 4.71244478225708, - "learning_rate": 1.583989306401307e-05, - "loss": 0.0499, + "epoch": 4.720035645328977, + "grad_norm": 0.8241788148880005, + "learning_rate": 5.279964354671023e-06, + "loss": 0.0544, "step": 31780 }, { - "epoch": 2.360760433684836, - "grad_norm": 3.2281157970428467, - "learning_rate": 1.5835437397890985e-05, - "loss": 0.0861, + "epoch": 4.721520867369672, + "grad_norm": 0.5303086638450623, + "learning_rate": 5.2784791326303285e-06, + "loss": 0.0695, "step": 31790 }, { - "epoch": 2.3615030447051835, - "grad_norm": 1.568460464477539, - "learning_rate": 1.58309817317689e-05, - "loss": 0.0689, + "epoch": 4.723006089410367, + "grad_norm": 0.9856883883476257, + "learning_rate": 5.276993910589634e-06, + "loss": 0.0656, "step": 31800 }, { - "epoch": 2.362245655725531, - "grad_norm": 0.7501446008682251, - "learning_rate": 1.5826526065646815e-05, - "loss": 0.0754, + "epoch": 4.724491311451062, + "grad_norm": 0.6378732323646545, + "learning_rate": 5.275508688548938e-06, + "loss": 0.0544, "step": 31810 }, { - "epoch": 2.3629882667458784, - "grad_norm": 1.1231635808944702, - "learning_rate": 1.582207039952473e-05, - "loss": 0.088, + "epoch": 4.725976533491757, + "grad_norm": 1.0125995874404907, + "learning_rate": 5.2740234665082435e-06, + "loss": 0.0615, "step": 31820 }, { - "epoch": 2.3637308777662263, - "grad_norm": 1.5443603992462158, - "learning_rate": 1.5817614733402645e-05, - "loss": 0.0677, + "epoch": 4.7274617555324525, + "grad_norm": 0.9217480421066284, + "learning_rate": 5.272538244467549e-06, + "loss": 0.0501, "step": 31830 }, { - "epoch": 2.3644734887865737, - "grad_norm": 0.9648088216781616, - "learning_rate": 1.581315906728056e-05, - "loss": 0.0883, + "epoch": 4.728946977573147, + "grad_norm": 0.46245720982551575, + "learning_rate": 5.271053022426853e-06, + "loss": 0.0613, "step": 31840 }, { - "epoch": 2.365216099806921, - "grad_norm": 1.1336426734924316, - "learning_rate": 1.5808703401158475e-05, - "loss": 0.0545, + "epoch": 4.730432199613842, + "grad_norm": 0.4453304708003998, + "learning_rate": 5.2695678003861585e-06, + "loss": 0.0519, "step": 31850 }, { - "epoch": 2.3659587108272686, - "grad_norm": 1.6498336791992188, - "learning_rate": 1.5804247735036386e-05, - "loss": 0.067, + "epoch": 4.731917421654537, + "grad_norm": 0.49151769280433655, + "learning_rate": 5.268082578345463e-06, + "loss": 0.0868, "step": 31860 }, { - "epoch": 2.366701321847616, - "grad_norm": 2.2792677879333496, - "learning_rate": 1.57997920689143e-05, - "loss": 0.0773, + "epoch": 4.733402643695232, + "grad_norm": 0.8002780675888062, + "learning_rate": 5.266597356304768e-06, + "loss": 0.0646, "step": 31870 }, { - "epoch": 2.367443932867964, - "grad_norm": 0.7917251586914062, - "learning_rate": 1.579533640279222e-05, - "loss": 0.0453, + "epoch": 4.734887865735928, + "grad_norm": 0.7369169592857361, + "learning_rate": 5.2651121342640735e-06, + "loss": 0.0504, "step": 31880 }, { - "epoch": 2.3681865438883114, - "grad_norm": 1.3908026218414307, - "learning_rate": 1.579088073667013e-05, - "loss": 0.0596, + "epoch": 4.736373087776623, + "grad_norm": 0.8730064630508423, + "learning_rate": 5.263626912223377e-06, + "loss": 0.0489, "step": 31890 }, { - "epoch": 2.368929154908659, - "grad_norm": 0.8732894659042358, - "learning_rate": 1.5786425070548046e-05, - "loss": 0.0487, + "epoch": 4.737858309817318, + "grad_norm": 0.6994331479072571, + "learning_rate": 5.262141690182682e-06, + "loss": 0.0675, "step": 31900 }, { - "epoch": 2.3696717659290063, - "grad_norm": 0.30048489570617676, - "learning_rate": 1.5781969404425965e-05, - "loss": 0.0723, + "epoch": 4.739343531858013, + "grad_norm": 0.7177156209945679, + "learning_rate": 5.260656468141988e-06, + "loss": 0.0464, "step": 31910 }, { - "epoch": 2.3704143769493538, - "grad_norm": 1.2894927263259888, - "learning_rate": 1.5777513738303876e-05, - "loss": 0.0716, + "epoch": 4.7408287538987075, + "grad_norm": 0.7728835940361023, + "learning_rate": 5.259171246101292e-06, + "loss": 0.0595, "step": 31920 }, { - "epoch": 2.3711569879697016, - "grad_norm": 0.718590259552002, - "learning_rate": 1.577305807218179e-05, - "loss": 0.0684, + "epoch": 4.742313975939403, + "grad_norm": 0.6116012334823608, + "learning_rate": 5.257686024060597e-06, + "loss": 0.0587, "step": 31930 }, { - "epoch": 2.371899598990049, - "grad_norm": 1.6592215299606323, - "learning_rate": 1.5768602406059706e-05, - "loss": 0.0799, + "epoch": 4.743799197980098, + "grad_norm": 0.7422853112220764, + "learning_rate": 5.256200802019903e-06, + "loss": 0.0667, "step": 31940 }, { - "epoch": 2.3726422100103965, - "grad_norm": 1.2301288843154907, - "learning_rate": 1.576414673993762e-05, - "loss": 0.0526, + "epoch": 4.745284420020793, + "grad_norm": 0.7492051720619202, + "learning_rate": 5.254715579979207e-06, + "loss": 0.0621, "step": 31950 }, { - "epoch": 2.373384821030744, - "grad_norm": 1.742710828781128, - "learning_rate": 1.5759691073815536e-05, - "loss": 0.051, + "epoch": 4.746769642061488, + "grad_norm": 0.4576432406902313, + "learning_rate": 5.253230357938512e-06, + "loss": 0.0736, "step": 31960 }, { - "epoch": 2.3741274320510914, - "grad_norm": 0.8959303498268127, - "learning_rate": 1.5755235407693448e-05, - "loss": 0.0439, + "epoch": 4.748254864102183, + "grad_norm": 1.2411152124404907, + "learning_rate": 5.251745135897817e-06, + "loss": 0.0642, "step": 31970 }, { - "epoch": 2.3748700430714393, - "grad_norm": 2.8201303482055664, - "learning_rate": 1.5750779741571366e-05, - "loss": 0.0608, + "epoch": 4.749740086142879, + "grad_norm": 1.3862706422805786, + "learning_rate": 5.250259913857122e-06, + "loss": 0.0428, "step": 31980 }, { - "epoch": 2.375612654091787, - "grad_norm": 1.701446294784546, - "learning_rate": 1.574632407544928e-05, - "loss": 0.0872, + "epoch": 4.751225308183574, + "grad_norm": 0.7565192580223083, + "learning_rate": 5.248774691816427e-06, + "loss": 0.0641, "step": 31990 }, { - "epoch": 2.3763552651121342, - "grad_norm": 0.7672728300094604, - "learning_rate": 1.5741868409327193e-05, - "loss": 0.0975, + "epoch": 4.7527105302242685, + "grad_norm": 0.395856112241745, + "learning_rate": 5.247289469775732e-06, + "loss": 0.0677, "step": 32000 }, { - "epoch": 2.3770978761324817, - "grad_norm": 2.049514055252075, - "learning_rate": 1.573741274320511e-05, - "loss": 0.0846, + "epoch": 4.754195752264963, + "grad_norm": 0.7864809036254883, + "learning_rate": 5.245804247735037e-06, + "loss": 0.0391, "step": 32010 }, { - "epoch": 2.377840487152829, - "grad_norm": 1.193021535873413, - "learning_rate": 1.5732957077083026e-05, - "loss": 0.0705, + "epoch": 4.755680974305658, + "grad_norm": 0.42430078983306885, + "learning_rate": 5.244319025694342e-06, + "loss": 0.0513, "step": 32020 }, { - "epoch": 2.378583098173177, - "grad_norm": 2.212050676345825, - "learning_rate": 1.5728501410960938e-05, - "loss": 0.0715, + "epoch": 4.757166196346354, + "grad_norm": 0.6918451189994812, + "learning_rate": 5.242833803653647e-06, + "loss": 0.05, "step": 32030 }, { - "epoch": 2.3793257091935245, - "grad_norm": 1.0855233669281006, - "learning_rate": 1.5724045744838853e-05, - "loss": 0.0644, + "epoch": 4.758651418387049, + "grad_norm": 0.4560699164867401, + "learning_rate": 5.241348581612952e-06, + "loss": 0.0538, "step": 32040 }, { - "epoch": 2.380068320213872, - "grad_norm": 2.6349453926086426, - "learning_rate": 1.571959007871677e-05, - "loss": 0.0604, + "epoch": 4.760136640427744, + "grad_norm": 0.4515649378299713, + "learning_rate": 5.239863359572257e-06, + "loss": 0.0642, "step": 32050 }, { - "epoch": 2.3808109312342194, - "grad_norm": 0.948853611946106, - "learning_rate": 1.5715134412594683e-05, - "loss": 0.0721, + "epoch": 4.761621862468439, + "grad_norm": 1.9437707662582397, + "learning_rate": 5.238378137531561e-06, + "loss": 0.0594, "step": 32060 }, { - "epoch": 2.381553542254567, - "grad_norm": 1.4371938705444336, - "learning_rate": 1.5710678746472598e-05, - "loss": 0.0499, + "epoch": 4.763107084509134, + "grad_norm": 0.34391626715660095, + "learning_rate": 5.236892915490866e-06, + "loss": 0.0708, "step": 32070 }, { - "epoch": 2.3822961532749147, - "grad_norm": 0.6612533926963806, - "learning_rate": 1.5706223080350513e-05, - "loss": 0.0827, + "epoch": 4.7645923065498295, + "grad_norm": 0.8328548073768616, + "learning_rate": 5.235407693450171e-06, + "loss": 0.0597, "step": 32080 }, { - "epoch": 2.383038764295262, - "grad_norm": 3.743394136428833, - "learning_rate": 1.5701767414228428e-05, - "loss": 0.0729, + "epoch": 4.766077528590524, + "grad_norm": 1.0488471984863281, + "learning_rate": 5.233922471409476e-06, + "loss": 0.0652, "step": 32090 }, { - "epoch": 2.3837813753156096, - "grad_norm": 1.6435579061508179, - "learning_rate": 1.5697311748106343e-05, - "loss": 0.0618, + "epoch": 4.767562750631219, + "grad_norm": 0.7127065658569336, + "learning_rate": 5.232437249368781e-06, + "loss": 0.038, "step": 32100 }, { - "epoch": 2.384523986335957, - "grad_norm": 2.4289140701293945, - "learning_rate": 1.5692856081984254e-05, - "loss": 0.0822, + "epoch": 4.769047972671914, + "grad_norm": 1.633682370185852, + "learning_rate": 5.230952027328086e-06, + "loss": 0.0747, "step": 32110 }, { - "epoch": 2.3852665973563045, - "grad_norm": 0.4796588122844696, - "learning_rate": 1.5688400415862173e-05, - "loss": 0.0544, + "epoch": 4.770533194712609, + "grad_norm": 0.8790189623832703, + "learning_rate": 5.229466805287391e-06, + "loss": 0.0874, "step": 32120 }, { - "epoch": 2.3860092083766524, - "grad_norm": 2.2078115940093994, - "learning_rate": 1.5683944749740088e-05, - "loss": 0.0714, + "epoch": 4.772018416753305, + "grad_norm": 0.8211573362350464, + "learning_rate": 5.227981583246696e-06, + "loss": 0.0602, "step": 32130 }, { - "epoch": 2.386751819397, - "grad_norm": 1.0921403169631958, - "learning_rate": 1.5679489083618e-05, - "loss": 0.0562, + "epoch": 4.773503638794, + "grad_norm": 0.5534783601760864, + "learning_rate": 5.226496361206001e-06, + "loss": 0.0494, "step": 32140 }, { - "epoch": 2.3874944304173473, - "grad_norm": 2.1888418197631836, - "learning_rate": 1.5675033417495918e-05, - "loss": 0.0794, + "epoch": 4.774988860834695, + "grad_norm": 2.4223477840423584, + "learning_rate": 5.225011139165306e-06, + "loss": 0.0718, "step": 32150 }, { - "epoch": 2.3882370414376948, - "grad_norm": 2.4097537994384766, - "learning_rate": 1.5670577751373833e-05, - "loss": 0.1025, + "epoch": 4.7764740828753895, + "grad_norm": 0.7991631031036377, + "learning_rate": 5.223525917124611e-06, + "loss": 0.0774, "step": 32160 }, { - "epoch": 2.3889796524580427, - "grad_norm": 2.782663583755493, - "learning_rate": 1.5666122085251744e-05, - "loss": 0.0667, + "epoch": 4.777959304916085, + "grad_norm": 0.8528579473495483, + "learning_rate": 5.222040695083916e-06, + "loss": 0.0747, "step": 32170 }, { - "epoch": 2.38972226347839, - "grad_norm": 2.659151554107666, - "learning_rate": 1.5661666419129663e-05, - "loss": 0.0515, + "epoch": 4.77944452695678, + "grad_norm": 1.0776245594024658, + "learning_rate": 5.220555473043221e-06, + "loss": 0.0685, "step": 32180 }, { - "epoch": 2.3904648744987376, - "grad_norm": 1.7082628011703491, - "learning_rate": 1.5657210753007574e-05, - "loss": 0.1092, + "epoch": 4.780929748997475, + "grad_norm": 0.5044242143630981, + "learning_rate": 5.2190702510025245e-06, + "loss": 0.0745, "step": 32190 }, { - "epoch": 2.391207485519085, - "grad_norm": 1.8074676990509033, - "learning_rate": 1.565275508688549e-05, - "loss": 0.0675, + "epoch": 4.78241497103817, + "grad_norm": 0.6456106901168823, + "learning_rate": 5.21758502896183e-06, + "loss": 0.0816, "step": 32200 }, { - "epoch": 2.3919500965394325, - "grad_norm": 1.0085229873657227, - "learning_rate": 1.5648299420763404e-05, - "loss": 0.1148, + "epoch": 4.783900193078865, + "grad_norm": 0.7129038572311401, + "learning_rate": 5.216099806921136e-06, + "loss": 0.0666, "step": 32210 }, { - "epoch": 2.3926927075597804, - "grad_norm": 4.384804725646973, - "learning_rate": 1.564384375464132e-05, - "loss": 0.0551, + "epoch": 4.785385415119561, + "grad_norm": 0.7487207651138306, + "learning_rate": 5.2146145848804395e-06, + "loss": 0.0559, "step": 32220 }, { - "epoch": 2.393435318580128, - "grad_norm": 1.1402451992034912, - "learning_rate": 1.5639388088519234e-05, - "loss": 0.0603, + "epoch": 4.786870637160256, + "grad_norm": 0.36898553371429443, + "learning_rate": 5.213129362839745e-06, + "loss": 0.0508, "step": 32230 }, { - "epoch": 2.3941779296004753, - "grad_norm": 0.9392279386520386, - "learning_rate": 1.563493242239715e-05, - "loss": 0.0836, + "epoch": 4.7883558592009505, + "grad_norm": 0.6122729182243347, + "learning_rate": 5.21164414079905e-06, + "loss": 0.0675, "step": 32240 }, { - "epoch": 2.3949205406208227, - "grad_norm": 1.5614676475524902, - "learning_rate": 1.5630476756275064e-05, - "loss": 0.0956, + "epoch": 4.789841081241645, + "grad_norm": 0.7119560837745667, + "learning_rate": 5.2101589187583545e-06, + "loss": 0.0548, "step": 32250 }, { - "epoch": 2.39566315164117, - "grad_norm": 1.321561574935913, - "learning_rate": 1.562602109015298e-05, - "loss": 0.0715, + "epoch": 4.79132630328234, + "grad_norm": 1.06340754032135, + "learning_rate": 5.20867369671766e-06, + "loss": 0.0627, "step": 32260 }, { - "epoch": 2.396405762661518, - "grad_norm": 0.910446047782898, - "learning_rate": 1.562156542403089e-05, - "loss": 0.0434, + "epoch": 4.792811525323036, + "grad_norm": 0.45462164282798767, + "learning_rate": 5.207188474676964e-06, + "loss": 0.0612, "step": 32270 }, { - "epoch": 2.3971483736818655, - "grad_norm": 0.8245983123779297, - "learning_rate": 1.5617109757908806e-05, - "loss": 0.076, + "epoch": 4.794296747363731, + "grad_norm": 0.6373799443244934, + "learning_rate": 5.2057032526362695e-06, + "loss": 0.0815, "step": 32280 }, { - "epoch": 2.397890984702213, - "grad_norm": 1.7808678150177002, - "learning_rate": 1.5612654091786724e-05, - "loss": 0.0731, + "epoch": 4.795781969404426, + "grad_norm": 0.4400479793548584, + "learning_rate": 5.204218030595575e-06, + "loss": 0.0726, "step": 32290 }, { - "epoch": 2.3986335957225604, - "grad_norm": 2.991241455078125, - "learning_rate": 1.5608198425664636e-05, - "loss": 0.087, + "epoch": 4.797267191445121, + "grad_norm": 0.8259301781654358, + "learning_rate": 5.202732808554879e-06, + "loss": 0.064, "step": 32300 }, { - "epoch": 2.3993762067429083, - "grad_norm": 2.8807268142700195, - "learning_rate": 1.560374275954255e-05, - "loss": 0.0876, + "epoch": 4.798752413485817, + "grad_norm": 1.5500998497009277, + "learning_rate": 5.2012475865141845e-06, + "loss": 0.0605, "step": 32310 }, { - "epoch": 2.4001188177632558, - "grad_norm": 0.5517368316650391, - "learning_rate": 1.559928709342047e-05, - "loss": 0.0773, + "epoch": 4.8002376355265115, + "grad_norm": 1.0718823671340942, + "learning_rate": 5.19976236447349e-06, + "loss": 0.0576, "step": 32320 }, { - "epoch": 2.400861428783603, - "grad_norm": 1.6342768669128418, - "learning_rate": 1.559483142729838e-05, - "loss": 0.0609, + "epoch": 4.801722857567206, + "grad_norm": 0.33371061086654663, + "learning_rate": 5.198277142432794e-06, + "loss": 0.0446, "step": 32330 }, { - "epoch": 2.4016040398039507, - "grad_norm": 5.523561954498291, - "learning_rate": 1.5590375761176296e-05, - "loss": 0.0431, + "epoch": 4.803208079607901, + "grad_norm": 0.66288161277771, + "learning_rate": 5.1967919203920995e-06, + "loss": 0.0664, "step": 32340 }, { - "epoch": 2.402346650824298, - "grad_norm": 0.7796204090118408, - "learning_rate": 1.558592009505421e-05, - "loss": 0.0905, + "epoch": 4.804693301648596, + "grad_norm": 1.0212267637252808, + "learning_rate": 5.195306698351405e-06, + "loss": 0.0466, "step": 32350 }, { - "epoch": 2.403089261844646, - "grad_norm": 2.9090576171875, - "learning_rate": 1.5581464428932126e-05, - "loss": 0.0998, + "epoch": 4.806178523689292, + "grad_norm": 0.8142063617706299, + "learning_rate": 5.193821476310708e-06, + "loss": 0.0621, "step": 32360 }, { - "epoch": 2.4038318728649934, - "grad_norm": 0.7734149098396301, - "learning_rate": 1.557700876281004e-05, - "loss": 0.0585, + "epoch": 4.807663745729987, + "grad_norm": 0.7264366745948792, + "learning_rate": 5.192336254270014e-06, + "loss": 0.0608, "step": 32370 }, { - "epoch": 2.404574483885341, - "grad_norm": 1.852062702178955, - "learning_rate": 1.5572553096687952e-05, - "loss": 0.1048, + "epoch": 4.809148967770682, + "grad_norm": 0.8260073065757751, + "learning_rate": 5.190851032229318e-06, + "loss": 0.0604, "step": 32380 }, { - "epoch": 2.4053170949056883, - "grad_norm": 0.8608161807060242, - "learning_rate": 1.556809743056587e-05, - "loss": 0.0548, + "epoch": 4.810634189811377, + "grad_norm": 0.9729313254356384, + "learning_rate": 5.189365810188623e-06, + "loss": 0.0602, "step": 32390 }, { - "epoch": 2.406059705926036, - "grad_norm": 3.039947032928467, - "learning_rate": 1.5563641764443786e-05, - "loss": 0.0854, + "epoch": 4.812119411852072, + "grad_norm": 1.0569586753845215, + "learning_rate": 5.187880588147929e-06, + "loss": 0.0552, "step": 32400 }, { - "epoch": 2.4068023169463837, - "grad_norm": 2.2535059452056885, - "learning_rate": 1.5559186098321697e-05, - "loss": 0.0921, + "epoch": 4.813604633892767, + "grad_norm": 0.6376864314079285, + "learning_rate": 5.186395366107233e-06, + "loss": 0.0519, "step": 32410 }, { - "epoch": 2.407544927966731, - "grad_norm": 3.556171417236328, - "learning_rate": 1.5554730432199616e-05, - "loss": 0.0632, + "epoch": 4.815089855933462, + "grad_norm": 0.7130823135375977, + "learning_rate": 5.184910144066538e-06, + "loss": 0.0675, "step": 32420 }, { - "epoch": 2.4082875389870786, - "grad_norm": 2.6815476417541504, - "learning_rate": 1.555027476607753e-05, - "loss": 0.0689, + "epoch": 4.816575077974157, + "grad_norm": 0.6740859746932983, + "learning_rate": 5.183424922025844e-06, + "loss": 0.0618, "step": 32430 }, { - "epoch": 2.409030150007426, - "grad_norm": 0.38437405228614807, - "learning_rate": 1.5545819099955442e-05, - "loss": 0.0491, + "epoch": 4.818060300014852, + "grad_norm": 0.7788185477256775, + "learning_rate": 5.181939699985148e-06, + "loss": 0.0739, "step": 32440 }, { - "epoch": 2.4097727610277735, - "grad_norm": 1.429911494255066, - "learning_rate": 1.5541363433833357e-05, - "loss": 0.0423, + "epoch": 4.819545522055547, + "grad_norm": 0.9308912754058838, + "learning_rate": 5.180454477944453e-06, + "loss": 0.0571, "step": 32450 }, { - "epoch": 2.4105153720481214, - "grad_norm": 1.3753142356872559, - "learning_rate": 1.5536907767711276e-05, - "loss": 0.0816, + "epoch": 4.821030744096243, + "grad_norm": 0.6136270761489868, + "learning_rate": 5.178969255903759e-06, + "loss": 0.0771, "step": 32460 }, { - "epoch": 2.411257983068469, - "grad_norm": 1.1020511388778687, - "learning_rate": 1.5532452101589187e-05, - "loss": 0.0403, + "epoch": 4.822515966136938, + "grad_norm": 0.9954419732093811, + "learning_rate": 5.177484033863063e-06, + "loss": 0.0707, "step": 32470 }, { - "epoch": 2.4120005940888163, - "grad_norm": 3.2163803577423096, - "learning_rate": 1.5527996435467102e-05, - "loss": 0.09, + "epoch": 4.824001188177633, + "grad_norm": 0.6545411348342896, + "learning_rate": 5.175998811822368e-06, + "loss": 0.0826, "step": 32480 }, { - "epoch": 2.4127432051091637, - "grad_norm": 0.8695268630981445, - "learning_rate": 1.5523540769345017e-05, - "loss": 0.0565, + "epoch": 4.8254864102183275, + "grad_norm": 0.5363044142723083, + "learning_rate": 5.174513589781672e-06, + "loss": 0.0612, "step": 32490 }, { - "epoch": 2.413485816129511, - "grad_norm": 0.9537298083305359, - "learning_rate": 1.5519085103222932e-05, - "loss": 0.0913, + "epoch": 4.826971632259022, + "grad_norm": 0.7954429984092712, + "learning_rate": 5.173028367740978e-06, + "loss": 0.0538, "step": 32500 }, { - "epoch": 2.414228427149859, - "grad_norm": 1.1136554479599, - "learning_rate": 1.5514629437100847e-05, - "loss": 0.0829, + "epoch": 4.828456854299718, + "grad_norm": 0.9464428424835205, + "learning_rate": 5.171543145700283e-06, + "loss": 0.0724, "step": 32510 }, { - "epoch": 2.4149710381702065, - "grad_norm": 1.2654924392700195, - "learning_rate": 1.551017377097876e-05, - "loss": 0.0915, + "epoch": 4.829942076340413, + "grad_norm": 0.3676034212112427, + "learning_rate": 5.170057923659587e-06, + "loss": 0.0498, "step": 32520 }, { - "epoch": 2.415713649190554, - "grad_norm": 3.5749497413635254, - "learning_rate": 1.5505718104856677e-05, - "loss": 0.0921, + "epoch": 4.831427298381108, + "grad_norm": 0.337434858083725, + "learning_rate": 5.168572701618892e-06, + "loss": 0.078, "step": 32530 }, { - "epoch": 2.4164562602109014, - "grad_norm": 1.6017907857894897, - "learning_rate": 1.5501262438734592e-05, - "loss": 0.0645, + "epoch": 4.832912520421803, + "grad_norm": 0.7221982479095459, + "learning_rate": 5.1670874795781974e-06, + "loss": 0.0685, "step": 32540 }, { - "epoch": 2.417198871231249, - "grad_norm": 2.2795519828796387, - "learning_rate": 1.5496806772612504e-05, - "loss": 0.0624, + "epoch": 4.834397742462498, + "grad_norm": 1.243183970451355, + "learning_rate": 5.165602257537502e-06, + "loss": 0.0501, "step": 32550 }, { - "epoch": 2.4179414822515968, - "grad_norm": 1.48874831199646, - "learning_rate": 1.5492351106490422e-05, - "loss": 0.0667, + "epoch": 4.8358829645031935, + "grad_norm": 0.7665337324142456, + "learning_rate": 5.164117035496807e-06, + "loss": 0.076, "step": 32560 }, { - "epoch": 2.4186840932719442, - "grad_norm": 3.069807529449463, - "learning_rate": 1.5487895440368337e-05, - "loss": 0.0767, + "epoch": 4.8373681865438884, + "grad_norm": 1.4218223094940186, + "learning_rate": 5.1626318134561124e-06, + "loss": 0.0524, "step": 32570 }, { - "epoch": 2.4194267042922917, - "grad_norm": 0.8146727085113525, - "learning_rate": 1.548343977424625e-05, - "loss": 0.09, + "epoch": 4.838853408584583, + "grad_norm": 0.9128448963165283, + "learning_rate": 5.161146591415417e-06, + "loss": 0.0589, "step": 32580 }, { - "epoch": 2.420169315312639, - "grad_norm": 2.0153746604919434, - "learning_rate": 1.5478984108124167e-05, - "loss": 0.0834, + "epoch": 4.840338630625278, + "grad_norm": 0.6334198713302612, + "learning_rate": 5.159661369374722e-06, + "loss": 0.0573, "step": 32590 }, { - "epoch": 2.4209119263329866, - "grad_norm": 1.9810839891433716, - "learning_rate": 1.547452844200208e-05, - "loss": 0.0843, + "epoch": 4.841823852665973, + "grad_norm": 0.7682795524597168, + "learning_rate": 5.158176147334027e-06, + "loss": 0.059, "step": 32600 }, { - "epoch": 2.4216545373533345, - "grad_norm": 1.213164210319519, - "learning_rate": 1.5470072775879994e-05, - "loss": 0.0798, + "epoch": 4.843309074706669, + "grad_norm": 0.635378897190094, + "learning_rate": 5.156690925293332e-06, + "loss": 0.0628, "step": 32610 }, { - "epoch": 2.422397148373682, - "grad_norm": 0.5485877394676208, - "learning_rate": 1.546561710975791e-05, - "loss": 0.0396, + "epoch": 4.844794296747364, + "grad_norm": 0.6622408628463745, + "learning_rate": 5.155205703252637e-06, + "loss": 0.0479, "step": 32620 }, { - "epoch": 2.4231397593940294, - "grad_norm": 1.8385777473449707, - "learning_rate": 1.5461161443635824e-05, - "loss": 0.0538, + "epoch": 4.846279518788059, + "grad_norm": 0.47650235891342163, + "learning_rate": 5.153720481211942e-06, + "loss": 0.042, "step": 32630 }, { - "epoch": 2.423882370414377, - "grad_norm": 2.222101926803589, - "learning_rate": 1.545670577751374e-05, - "loss": 0.0494, + "epoch": 4.847764740828754, + "grad_norm": 1.37141752243042, + "learning_rate": 5.152235259171247e-06, + "loss": 0.0691, "step": 32640 }, { - "epoch": 2.4246249814347243, - "grad_norm": 0.7490872740745544, - "learning_rate": 1.5452250111391654e-05, - "loss": 0.0645, + "epoch": 4.8492499628694485, + "grad_norm": 0.7843444347381592, + "learning_rate": 5.150750037130552e-06, + "loss": 0.0672, "step": 32650 }, { - "epoch": 2.425367592455072, - "grad_norm": 1.1471827030181885, - "learning_rate": 1.544779444526957e-05, - "loss": 0.0842, + "epoch": 4.850735184910144, + "grad_norm": 0.6222081780433655, + "learning_rate": 5.149264815089856e-06, + "loss": 0.0684, "step": 32660 }, { - "epoch": 2.4261102034754196, - "grad_norm": 1.6201061010360718, - "learning_rate": 1.5443338779147484e-05, - "loss": 0.0834, + "epoch": 4.852220406950839, + "grad_norm": 0.8082340955734253, + "learning_rate": 5.147779593049161e-06, + "loss": 0.0612, "step": 32670 }, { - "epoch": 2.426852814495767, - "grad_norm": 1.3933659791946411, - "learning_rate": 1.54388831130254e-05, - "loss": 0.0674, + "epoch": 4.853705628991534, + "grad_norm": 1.1446117162704468, + "learning_rate": 5.146294371008467e-06, + "loss": 0.0497, "step": 32680 }, { - "epoch": 2.4275954255161145, - "grad_norm": 1.0082453489303589, - "learning_rate": 1.543442744690331e-05, - "loss": 0.043, + "epoch": 4.855190851032229, + "grad_norm": 0.883907675743103, + "learning_rate": 5.144809148967771e-06, + "loss": 0.07, "step": 32690 }, { - "epoch": 2.428338036536462, - "grad_norm": 0.7856671214103699, - "learning_rate": 1.542997178078123e-05, - "loss": 0.048, + "epoch": 4.856676073072924, + "grad_norm": 1.053536295890808, + "learning_rate": 5.143323926927076e-06, + "loss": 0.0419, "step": 32700 }, { - "epoch": 2.42908064755681, - "grad_norm": 0.4259887635707855, - "learning_rate": 1.542551611465914e-05, - "loss": 0.0641, + "epoch": 4.85816129511362, + "grad_norm": 1.5421696901321411, + "learning_rate": 5.1418387048863804e-06, + "loss": 0.0677, "step": 32710 }, { - "epoch": 2.4298232585771573, - "grad_norm": 0.42960453033447266, - "learning_rate": 1.5421060448537055e-05, - "loss": 0.0755, + "epoch": 4.859646517154315, + "grad_norm": 0.4656846523284912, + "learning_rate": 5.140353482845686e-06, + "loss": 0.0475, "step": 32720 }, { - "epoch": 2.4305658695975048, - "grad_norm": 3.8865599632263184, - "learning_rate": 1.5416604782414974e-05, - "loss": 0.0687, + "epoch": 4.8611317391950095, + "grad_norm": 0.9899516105651855, + "learning_rate": 5.138868260804991e-06, + "loss": 0.0601, "step": 32730 }, { - "epoch": 2.431308480617852, - "grad_norm": 3.587674140930176, - "learning_rate": 1.5412149116292885e-05, + "epoch": 4.862616961235704, + "grad_norm": 0.761489987373352, + "learning_rate": 5.1373830387642954e-06, "loss": 0.0657, "step": 32740 }, { - "epoch": 2.4320510916382, - "grad_norm": 1.8166769742965698, - "learning_rate": 1.54076934501708e-05, - "loss": 0.0776, + "epoch": 4.8641021832764, + "grad_norm": 1.031511664390564, + "learning_rate": 5.135897816723601e-06, + "loss": 0.0576, "step": 32750 }, { - "epoch": 2.4327937026585476, - "grad_norm": 2.718137264251709, - "learning_rate": 1.5403237784048715e-05, - "loss": 0.0761, + "epoch": 4.865587405317095, + "grad_norm": 0.9384909272193909, + "learning_rate": 5.134412594682906e-06, + "loss": 0.0769, "step": 32760 }, { - "epoch": 2.433536313678895, - "grad_norm": 1.776787281036377, - "learning_rate": 1.539878211792663e-05, - "loss": 0.0838, + "epoch": 4.86707262735779, + "grad_norm": 0.9842746257781982, + "learning_rate": 5.13292737264221e-06, + "loss": 0.0606, "step": 32770 }, { - "epoch": 2.4342789246992425, - "grad_norm": 0.9153753519058228, - "learning_rate": 1.5394326451804545e-05, - "loss": 0.065, + "epoch": 4.868557849398485, + "grad_norm": 0.6807608008384705, + "learning_rate": 5.131442150601516e-06, + "loss": 0.0425, "step": 32780 }, { - "epoch": 2.43502153571959, - "grad_norm": 1.0639044046401978, - "learning_rate": 1.5389870785682457e-05, - "loss": 0.0608, + "epoch": 4.87004307143918, + "grad_norm": 0.8210038542747498, + "learning_rate": 5.12995692856082e-06, + "loss": 0.0708, "step": 32790 }, { - "epoch": 2.435764146739938, - "grad_norm": 1.5037258863449097, - "learning_rate": 1.5385415119560375e-05, - "loss": 0.067, + "epoch": 4.871528293479876, + "grad_norm": 0.6338664293289185, + "learning_rate": 5.128471706520125e-06, + "loss": 0.0817, "step": 32800 }, { - "epoch": 2.4365067577602852, - "grad_norm": 1.892593502998352, - "learning_rate": 1.538095945343829e-05, - "loss": 0.061, + "epoch": 4.8730135155205705, + "grad_norm": 0.8477684259414673, + "learning_rate": 5.126986484479431e-06, + "loss": 0.0597, "step": 32810 }, { - "epoch": 2.4372493687806327, - "grad_norm": 3.2514467239379883, - "learning_rate": 1.5376503787316202e-05, - "loss": 0.0801, + "epoch": 4.874498737561265, + "grad_norm": 1.4142152070999146, + "learning_rate": 5.125501262438734e-06, + "loss": 0.0901, "step": 32820 }, { - "epoch": 2.43799197980098, - "grad_norm": 1.7820117473602295, - "learning_rate": 1.537204812119412e-05, - "loss": 0.0887, + "epoch": 4.87598395960196, + "grad_norm": 0.40460431575775146, + "learning_rate": 5.1240160403980396e-06, + "loss": 0.04, "step": 32830 }, { - "epoch": 2.4387345908213276, - "grad_norm": 1.159784197807312, - "learning_rate": 1.5367592455072035e-05, - "loss": 0.0719, + "epoch": 4.877469181642655, + "grad_norm": 1.283377766609192, + "learning_rate": 5.122530818357345e-06, + "loss": 0.0509, "step": 32840 }, { - "epoch": 2.4394772018416755, - "grad_norm": 2.0374605655670166, - "learning_rate": 1.5363136788949947e-05, - "loss": 0.0854, + "epoch": 4.878954403683351, + "grad_norm": 1.3284411430358887, + "learning_rate": 5.121045596316649e-06, + "loss": 0.0806, "step": 32850 }, { - "epoch": 2.440219812862023, - "grad_norm": 2.429708957672119, - "learning_rate": 1.5358681122827862e-05, - "loss": 0.0552, + "epoch": 4.880439625724046, + "grad_norm": 1.282408356666565, + "learning_rate": 5.1195603742759546e-06, + "loss": 0.0674, "step": 32860 }, { - "epoch": 2.4409624238823704, - "grad_norm": 2.368227243423462, - "learning_rate": 1.535422545670578e-05, - "loss": 0.07, + "epoch": 4.881924847764741, + "grad_norm": 0.5994721055030823, + "learning_rate": 5.11807515223526e-06, + "loss": 0.0485, "step": 32870 }, { - "epoch": 2.441705034902718, - "grad_norm": 0.7669575214385986, - "learning_rate": 1.5349769790583692e-05, - "loss": 0.0622, + "epoch": 4.883410069805436, + "grad_norm": 1.1487478017807007, + "learning_rate": 5.116589930194564e-06, + "loss": 0.0633, "step": 32880 }, { - "epoch": 2.4424476459230657, - "grad_norm": 0.9094696044921875, - "learning_rate": 1.5345314124461607e-05, - "loss": 0.0522, + "epoch": 4.8848952918461315, + "grad_norm": 0.6418612599372864, + "learning_rate": 5.1151047081538696e-06, + "loss": 0.069, "step": 32890 }, { - "epoch": 2.443190256943413, - "grad_norm": 1.134655237197876, - "learning_rate": 1.5340858458339522e-05, - "loss": 0.0789, + "epoch": 4.886380513886826, + "grad_norm": 1.3160861730575562, + "learning_rate": 5.113619486113174e-06, + "loss": 0.0682, "step": 32900 }, { - "epoch": 2.4439328679637606, - "grad_norm": 1.5787122249603271, - "learning_rate": 1.5336402792217437e-05, - "loss": 0.0823, + "epoch": 4.887865735927521, + "grad_norm": 0.5356642007827759, + "learning_rate": 5.112134264072479e-06, + "loss": 0.0533, "step": 32910 }, { - "epoch": 2.444675478984108, - "grad_norm": 1.5267248153686523, - "learning_rate": 1.5331947126095352e-05, - "loss": 0.0557, + "epoch": 4.889350957968216, + "grad_norm": 1.1569596529006958, + "learning_rate": 5.1106490420317845e-06, + "loss": 0.0737, "step": 32920 }, { - "epoch": 2.4454180900044555, - "grad_norm": 1.0408943891525269, - "learning_rate": 1.5327491459973264e-05, - "loss": 0.0481, + "epoch": 4.890836180008911, + "grad_norm": 0.5608000755310059, + "learning_rate": 5.109163819991089e-06, + "loss": 0.0584, "step": 32930 }, { - "epoch": 2.4461607010248034, - "grad_norm": 0.36609914898872375, - "learning_rate": 1.5323035793851182e-05, - "loss": 0.0744, + "epoch": 4.892321402049607, + "grad_norm": 1.6011561155319214, + "learning_rate": 5.107678597950394e-06, + "loss": 0.0649, "step": 32940 }, { - "epoch": 2.446903312045151, - "grad_norm": 0.8200104832649231, - "learning_rate": 1.5318580127729097e-05, - "loss": 0.087, + "epoch": 4.893806624090302, + "grad_norm": 1.0872085094451904, + "learning_rate": 5.1061933759096995e-06, + "loss": 0.0581, "step": 32950 }, { - "epoch": 2.4476459230654983, - "grad_norm": 1.5445940494537354, - "learning_rate": 1.531412446160701e-05, - "loss": 0.0407, + "epoch": 4.895291846130997, + "grad_norm": 0.6621091961860657, + "learning_rate": 5.104708153869003e-06, + "loss": 0.0683, "step": 32960 }, { - "epoch": 2.448388534085846, - "grad_norm": 1.621883749961853, - "learning_rate": 1.5309668795484927e-05, - "loss": 0.0679, + "epoch": 4.896777068171692, + "grad_norm": 0.9341262578964233, + "learning_rate": 5.103222931828309e-06, + "loss": 0.0765, "step": 32970 }, { - "epoch": 2.4491311451061932, - "grad_norm": 0.8579855561256409, - "learning_rate": 1.5305213129362842e-05, - "loss": 0.0799, + "epoch": 4.8982622902123865, + "grad_norm": 0.588538408279419, + "learning_rate": 5.1017377097876145e-06, + "loss": 0.0541, "step": 32980 }, { - "epoch": 2.449873756126541, - "grad_norm": 1.6563255786895752, - "learning_rate": 1.5300757463240754e-05, - "loss": 0.0634, + "epoch": 4.899747512253082, + "grad_norm": 0.7938698530197144, + "learning_rate": 5.100252487746918e-06, + "loss": 0.0553, "step": 32990 }, { - "epoch": 2.4506163671468886, - "grad_norm": 0.7070105671882629, - "learning_rate": 1.5296301797118672e-05, - "loss": 0.0956, + "epoch": 4.901232734293777, + "grad_norm": 0.8380831480026245, + "learning_rate": 5.098767265706223e-06, + "loss": 0.0764, "step": 33000 }, { - "epoch": 2.451358978167236, - "grad_norm": 2.0392887592315674, - "learning_rate": 1.5291846130996584e-05, - "loss": 0.0828, + "epoch": 4.902717956334472, + "grad_norm": 1.6798778772354126, + "learning_rate": 5.097282043665528e-06, + "loss": 0.0699, "step": 33010 }, { - "epoch": 2.4521015891875835, - "grad_norm": 1.0714646577835083, - "learning_rate": 1.52873904648745e-05, - "loss": 0.0737, + "epoch": 4.904203178375167, + "grad_norm": 0.6386799812316895, + "learning_rate": 5.095796821624833e-06, + "loss": 0.0549, "step": 33020 }, { - "epoch": 2.452844200207931, - "grad_norm": 1.0105502605438232, - "learning_rate": 1.5282934798752413e-05, - "loss": 0.0637, + "epoch": 4.905688400415862, + "grad_norm": 1.2105910778045654, + "learning_rate": 5.094311599584138e-06, + "loss": 0.0629, "step": 33030 }, { - "epoch": 2.453586811228279, - "grad_norm": 2.0838091373443604, - "learning_rate": 1.527847913263033e-05, - "loss": 0.0807, + "epoch": 4.907173622456558, + "grad_norm": 1.107356309890747, + "learning_rate": 5.092826377543443e-06, + "loss": 0.0683, "step": 33040 }, { - "epoch": 2.4543294222486263, - "grad_norm": 3.914405107498169, - "learning_rate": 1.5274023466508243e-05, - "loss": 0.0905, + "epoch": 4.9086588444972525, + "grad_norm": 0.8204216361045837, + "learning_rate": 5.091341155502748e-06, + "loss": 0.0598, "step": 33050 }, { - "epoch": 2.4550720332689737, - "grad_norm": 0.8861109018325806, - "learning_rate": 1.526956780038616e-05, - "loss": 0.0531, + "epoch": 4.910144066537947, + "grad_norm": 0.5383062958717346, + "learning_rate": 5.089855933462053e-06, + "loss": 0.0588, "step": 33060 }, { - "epoch": 2.455814644289321, - "grad_norm": 1.2929595708847046, - "learning_rate": 1.5265112134264073e-05, - "loss": 0.068, + "epoch": 4.911629288578642, + "grad_norm": 0.49663034081459045, + "learning_rate": 5.088370711421358e-06, + "loss": 0.0498, "step": 33070 }, { - "epoch": 2.4565572553096686, - "grad_norm": 1.6953091621398926, - "learning_rate": 1.526065646814199e-05, - "loss": 0.0674, + "epoch": 4.913114510619337, + "grad_norm": 1.1822824478149414, + "learning_rate": 5.086885489380663e-06, + "loss": 0.0645, "step": 33080 }, { - "epoch": 2.4572998663300165, - "grad_norm": 1.0414247512817383, - "learning_rate": 1.5256200802019903e-05, - "loss": 0.0852, + "epoch": 4.914599732660033, + "grad_norm": 0.9482120275497437, + "learning_rate": 5.085400267339968e-06, + "loss": 0.0811, "step": 33090 }, { - "epoch": 2.458042477350364, - "grad_norm": 1.8523513078689575, - "learning_rate": 1.5251745135897817e-05, - "loss": 0.0707, + "epoch": 4.916084954700728, + "grad_norm": 0.5398374795913696, + "learning_rate": 5.083915045299273e-06, + "loss": 0.0769, "step": 33100 }, { - "epoch": 2.4587850883707114, - "grad_norm": 1.1955831050872803, - "learning_rate": 1.5247289469775732e-05, - "loss": 0.0472, + "epoch": 4.917570176741423, + "grad_norm": 0.3150595724582672, + "learning_rate": 5.082429823258578e-06, + "loss": 0.0511, "step": 33110 }, { - "epoch": 2.459527699391059, - "grad_norm": 2.124166250228882, - "learning_rate": 1.5242833803653645e-05, - "loss": 0.0604, + "epoch": 4.919055398782118, + "grad_norm": 0.9777528047561646, + "learning_rate": 5.080944601217882e-06, + "loss": 0.0644, "step": 33120 }, { - "epoch": 2.4602703104114063, - "grad_norm": 2.213921070098877, - "learning_rate": 1.5238378137531562e-05, - "loss": 0.0677, + "epoch": 4.920540620822813, + "grad_norm": 0.9668843746185303, + "learning_rate": 5.079459379177187e-06, + "loss": 0.0626, "step": 33130 }, { - "epoch": 2.461012921431754, - "grad_norm": 0.6650950312614441, - "learning_rate": 1.5233922471409477e-05, - "loss": 0.0743, + "epoch": 4.922025842863508, + "grad_norm": 1.1156543493270874, + "learning_rate": 5.077974157136492e-06, + "loss": 0.0419, "step": 33140 }, { - "epoch": 2.4617555324521017, - "grad_norm": 0.972344160079956, - "learning_rate": 1.522946680528739e-05, - "loss": 0.0534, + "epoch": 4.923511064904203, + "grad_norm": 1.1860060691833496, + "learning_rate": 5.076488935095797e-06, + "loss": 0.0638, "step": 33150 }, { - "epoch": 2.462498143472449, - "grad_norm": 0.7517353296279907, - "learning_rate": 1.5225011139165305e-05, - "loss": 0.0736, + "epoch": 4.924996286944898, + "grad_norm": 1.0625947713851929, + "learning_rate": 5.075003713055102e-06, + "loss": 0.044, "step": 33160 }, { - "epoch": 2.4632407544927966, - "grad_norm": 2.8627500534057617, - "learning_rate": 1.5220555473043222e-05, - "loss": 0.0417, + "epoch": 4.926481508985593, + "grad_norm": 0.5870559811592102, + "learning_rate": 5.073518491014407e-06, + "loss": 0.0604, "step": 33170 }, { - "epoch": 2.463983365513144, - "grad_norm": 0.2295779585838318, - "learning_rate": 1.5216099806921135e-05, - "loss": 0.0424, + "epoch": 4.927966731026288, + "grad_norm": 0.8210609555244446, + "learning_rate": 5.072033268973712e-06, + "loss": 0.0859, "step": 33180 }, { - "epoch": 2.464725976533492, - "grad_norm": 1.0014530420303345, - "learning_rate": 1.521164414079905e-05, - "loss": 0.0509, + "epoch": 4.929451953066984, + "grad_norm": 0.7899804711341858, + "learning_rate": 5.070548046933017e-06, + "loss": 0.0592, "step": 33190 }, { - "epoch": 2.4654685875538394, - "grad_norm": 0.8116422295570374, - "learning_rate": 1.5207188474676963e-05, - "loss": 0.0773, + "epoch": 4.930937175107679, + "grad_norm": 1.406674861907959, + "learning_rate": 5.069062824892321e-06, + "loss": 0.0589, "step": 33200 }, { - "epoch": 2.466211198574187, - "grad_norm": 2.368131399154663, - "learning_rate": 1.5202732808554878e-05, - "loss": 0.0643, + "epoch": 4.932422397148374, + "grad_norm": 0.7716672420501709, + "learning_rate": 5.067577602851627e-06, + "loss": 0.0601, "step": 33210 }, { - "epoch": 2.4669538095945343, - "grad_norm": 0.573657751083374, - "learning_rate": 1.5198277142432795e-05, - "loss": 0.0482, + "epoch": 4.9339076191890685, + "grad_norm": 1.136020302772522, + "learning_rate": 5.066092380810932e-06, + "loss": 0.0507, "step": 33220 }, { - "epoch": 2.4676964206148817, - "grad_norm": 2.1423192024230957, - "learning_rate": 1.5193821476310708e-05, - "loss": 0.0896, + "epoch": 4.935392841229763, + "grad_norm": 0.45281824469566345, + "learning_rate": 5.064607158770236e-06, + "loss": 0.0524, "step": 33230 }, { - "epoch": 2.4684390316352296, - "grad_norm": 0.7535884976387024, - "learning_rate": 1.5189365810188623e-05, - "loss": 0.0625, + "epoch": 4.936878063270459, + "grad_norm": 1.0124505758285522, + "learning_rate": 5.063121936729542e-06, + "loss": 0.0647, "step": 33240 }, { - "epoch": 2.469181642655577, - "grad_norm": 1.2931323051452637, - "learning_rate": 1.518491014406654e-05, - "loss": 0.053, + "epoch": 4.938363285311154, + "grad_norm": 0.6182017922401428, + "learning_rate": 5.061636714688847e-06, + "loss": 0.0627, "step": 33250 }, { - "epoch": 2.4699242536759245, - "grad_norm": 2.7069478034973145, - "learning_rate": 1.5180454477944452e-05, - "loss": 0.0806, + "epoch": 4.939848507351849, + "grad_norm": 1.3870701789855957, + "learning_rate": 5.060151492648151e-06, + "loss": 0.0633, "step": 33260 }, { - "epoch": 2.470666864696272, - "grad_norm": 0.8650771379470825, - "learning_rate": 1.5175998811822368e-05, - "loss": 0.0689, + "epoch": 4.941333729392544, + "grad_norm": 0.8915348649024963, + "learning_rate": 5.058666270607457e-06, + "loss": 0.0554, "step": 33270 }, { - "epoch": 2.4714094757166194, - "grad_norm": 3.503204345703125, - "learning_rate": 1.5171543145700283e-05, - "loss": 0.0681, + "epoch": 4.942818951433239, + "grad_norm": 1.2844725847244263, + "learning_rate": 5.057181048566762e-06, + "loss": 0.0586, "step": 33280 }, { - "epoch": 2.4721520867369673, - "grad_norm": 0.5587957501411438, - "learning_rate": 1.5167087479578197e-05, - "loss": 0.0648, + "epoch": 4.944304173473935, + "grad_norm": 1.5099384784698486, + "learning_rate": 5.0556958265260655e-06, + "loss": 0.0726, "step": 33290 }, { - "epoch": 2.4728946977573147, - "grad_norm": 2.2347841262817383, - "learning_rate": 1.5162631813456113e-05, - "loss": 0.0624, + "epoch": 4.9457893955146295, + "grad_norm": 1.3242284059524536, + "learning_rate": 5.054210604485371e-06, + "loss": 0.0591, "step": 33300 }, { - "epoch": 2.473637308777662, - "grad_norm": 3.54778790473938, - "learning_rate": 1.5158176147334025e-05, - "loss": 0.0632, + "epoch": 4.947274617555324, + "grad_norm": 0.7010951042175293, + "learning_rate": 5.052725382444675e-06, + "loss": 0.0463, "step": 33310 }, { - "epoch": 2.4743799197980096, - "grad_norm": 1.227449893951416, - "learning_rate": 1.5153720481211942e-05, - "loss": 0.0717, + "epoch": 4.948759839596019, + "grad_norm": 0.6471588611602783, + "learning_rate": 5.0512401604039805e-06, + "loss": 0.0459, "step": 33320 }, { - "epoch": 2.4751225308183575, - "grad_norm": 1.61305570602417, - "learning_rate": 1.5149264815089857e-05, - "loss": 0.0751, + "epoch": 4.950245061636715, + "grad_norm": 0.5342668294906616, + "learning_rate": 5.049754938363286e-06, + "loss": 0.0565, "step": 33330 }, { - "epoch": 2.475865141838705, - "grad_norm": 0.8405054807662964, - "learning_rate": 1.514480914896777e-05, - "loss": 0.0638, + "epoch": 4.95173028367741, + "grad_norm": 1.4089100360870361, + "learning_rate": 5.04826971632259e-06, + "loss": 0.0671, "step": 33340 }, { - "epoch": 2.4766077528590524, - "grad_norm": 0.8352612257003784, - "learning_rate": 1.5140353482845687e-05, - "loss": 0.0644, + "epoch": 4.953215505718105, + "grad_norm": 0.394208163022995, + "learning_rate": 5.0467844942818955e-06, + "loss": 0.0545, "step": 33350 }, { - "epoch": 2.4773503638794, - "grad_norm": 3.4174530506134033, - "learning_rate": 1.5135897816723602e-05, - "loss": 0.0562, + "epoch": 4.9547007277588, + "grad_norm": 0.9516841769218445, + "learning_rate": 5.045299272241201e-06, + "loss": 0.0496, "step": 33360 }, { - "epoch": 2.4780929748997473, - "grad_norm": 1.8341305255889893, - "learning_rate": 1.5131442150601515e-05, - "loss": 0.0558, + "epoch": 4.956185949799495, + "grad_norm": 0.5728833079338074, + "learning_rate": 5.043814050200505e-06, + "loss": 0.0779, "step": 33370 }, { - "epoch": 2.4788355859200952, - "grad_norm": 2.0367071628570557, - "learning_rate": 1.512698648447943e-05, - "loss": 0.0765, + "epoch": 4.9576711718401905, + "grad_norm": 0.43828102946281433, + "learning_rate": 5.0423288281598105e-06, + "loss": 0.0485, "step": 33380 }, { - "epoch": 2.4795781969404427, - "grad_norm": 2.6080222129821777, - "learning_rate": 1.5122530818357347e-05, - "loss": 0.0549, + "epoch": 4.959156393880885, + "grad_norm": 0.5014750957489014, + "learning_rate": 5.040843606119116e-06, + "loss": 0.0463, "step": 33390 }, { - "epoch": 2.48032080796079, - "grad_norm": 2.060661792755127, - "learning_rate": 1.5118075152235258e-05, - "loss": 0.0727, + "epoch": 4.96064161592158, + "grad_norm": 0.7262021899223328, + "learning_rate": 5.03935838407842e-06, + "loss": 0.0836, "step": 33400 }, { - "epoch": 2.4810634189811376, - "grad_norm": 0.36137092113494873, - "learning_rate": 1.5113619486113175e-05, - "loss": 0.0718, + "epoch": 4.962126837962275, + "grad_norm": 1.1639851331710815, + "learning_rate": 5.0378731620377255e-06, + "loss": 0.0868, "step": 33410 }, { - "epoch": 2.481806030001485, - "grad_norm": 0.67535001039505, - "learning_rate": 1.5109163819991088e-05, - "loss": 0.0842, + "epoch": 4.96361206000297, + "grad_norm": 0.6585859656333923, + "learning_rate": 5.036387939997029e-06, + "loss": 0.0589, "step": 33420 }, { - "epoch": 2.482548641021833, - "grad_norm": 1.2539730072021484, - "learning_rate": 1.5104708153869003e-05, - "loss": 0.107, + "epoch": 4.965097282043666, + "grad_norm": 0.9515668153762817, + "learning_rate": 5.034902717956334e-06, + "loss": 0.0399, "step": 33430 }, { - "epoch": 2.4832912520421804, - "grad_norm": 1.4725462198257446, - "learning_rate": 1.510025248774692e-05, - "loss": 0.0611, + "epoch": 4.966582504084361, + "grad_norm": 0.8517674803733826, + "learning_rate": 5.0334174959156405e-06, + "loss": 0.0762, "step": 33440 }, { - "epoch": 2.484033863062528, - "grad_norm": 2.846672534942627, - "learning_rate": 1.5095796821624831e-05, - "loss": 0.1161, + "epoch": 4.968067726125056, + "grad_norm": 0.5472394227981567, + "learning_rate": 5.031932273874944e-06, + "loss": 0.0492, "step": 33450 }, { - "epoch": 2.4847764740828753, - "grad_norm": 3.4515435695648193, - "learning_rate": 1.5091341155502748e-05, - "loss": 0.0771, + "epoch": 4.9695529481657505, + "grad_norm": 0.8790189027786255, + "learning_rate": 5.030447051834249e-06, + "loss": 0.0505, "step": 33460 }, { - "epoch": 2.485519085103223, - "grad_norm": 2.5990917682647705, - "learning_rate": 1.5086885489380665e-05, - "loss": 0.0759, + "epoch": 4.971038170206446, + "grad_norm": 0.8520390391349792, + "learning_rate": 5.028961829793555e-06, + "loss": 0.0493, "step": 33470 }, { - "epoch": 2.4862616961235706, - "grad_norm": 0.4841431975364685, - "learning_rate": 1.5082429823258576e-05, - "loss": 0.0585, + "epoch": 4.972523392247141, + "grad_norm": 1.544259786605835, + "learning_rate": 5.027476607752859e-06, + "loss": 0.0642, "step": 33480 }, { - "epoch": 2.487004307143918, - "grad_norm": 1.1416738033294678, - "learning_rate": 1.5077974157136493e-05, - "loss": 0.0593, + "epoch": 4.974008614287836, + "grad_norm": 1.0220324993133545, + "learning_rate": 5.025991385712164e-06, + "loss": 0.0544, "step": 33490 }, { - "epoch": 2.4877469181642655, - "grad_norm": 1.2249414920806885, - "learning_rate": 1.5073518491014408e-05, - "loss": 0.079, + "epoch": 4.975493836328531, + "grad_norm": 1.8916149139404297, + "learning_rate": 5.02450616367147e-06, + "loss": 0.0572, "step": 33500 }, { - "epoch": 2.488489529184613, - "grad_norm": 2.8479864597320557, - "learning_rate": 1.5069062824892321e-05, - "loss": 0.0688, + "epoch": 4.976979058369226, + "grad_norm": 0.38026267290115356, + "learning_rate": 5.023020941630774e-06, + "loss": 0.0508, "step": 33510 }, { - "epoch": 2.489232140204961, - "grad_norm": 0.494975745677948, - "learning_rate": 1.5064607158770236e-05, - "loss": 0.0848, + "epoch": 4.978464280409922, + "grad_norm": 0.7838777899742126, + "learning_rate": 5.021535719590079e-06, + "loss": 0.0639, "step": 33520 }, { - "epoch": 2.4899747512253083, - "grad_norm": 0.8648329973220825, - "learning_rate": 1.506015149264815e-05, - "loss": 0.079, + "epoch": 4.979949502450617, + "grad_norm": 0.5688890218734741, + "learning_rate": 5.020050497549384e-06, + "loss": 0.0562, "step": 33530 }, { - "epoch": 2.4907173622456558, - "grad_norm": 2.451526641845703, - "learning_rate": 1.5055695826526066e-05, - "loss": 0.0679, + "epoch": 4.9814347244913115, + "grad_norm": 1.2140443325042725, + "learning_rate": 5.018565275508689e-06, + "loss": 0.0674, "step": 33540 }, { - "epoch": 2.491459973266003, - "grad_norm": 0.8754955530166626, - "learning_rate": 1.5051240160403981e-05, - "loss": 0.0766, + "epoch": 4.982919946532006, + "grad_norm": 0.6314716935157776, + "learning_rate": 5.017080053467994e-06, + "loss": 0.0465, "step": 33550 }, - { - "epoch": 2.4922025842863507, - "grad_norm": 2.0408942699432373, - "learning_rate": 1.5046784494281895e-05, - "loss": 0.0696, + { + "epoch": 4.984405168572701, + "grad_norm": 0.8482723832130432, + "learning_rate": 5.015594831427299e-06, + "loss": 0.0597, "step": 33560 }, { - "epoch": 2.4929451953066986, - "grad_norm": 3.6392688751220703, - "learning_rate": 1.504232882815981e-05, - "loss": 0.0703, + "epoch": 4.985890390613397, + "grad_norm": 0.5596425533294678, + "learning_rate": 5.014109609386604e-06, + "loss": 0.089, "step": 33570 }, { - "epoch": 2.493687806327046, - "grad_norm": 1.6540186405181885, - "learning_rate": 1.5037873162037726e-05, - "loss": 0.0852, + "epoch": 4.987375612654092, + "grad_norm": 2.0052719116210938, + "learning_rate": 5.012624387345909e-06, + "loss": 0.0507, "step": 33580 }, { - "epoch": 2.4944304173473935, - "grad_norm": 1.6503866910934448, - "learning_rate": 1.503341749591564e-05, - "loss": 0.0865, + "epoch": 4.988860834694787, + "grad_norm": 0.601597249507904, + "learning_rate": 5.011139165305213e-06, + "loss": 0.0589, "step": 33590 }, { - "epoch": 2.495173028367741, - "grad_norm": 0.8147965669631958, - "learning_rate": 1.5028961829793555e-05, - "loss": 0.0757, + "epoch": 4.990346056735482, + "grad_norm": 1.0099836587905884, + "learning_rate": 5.009653943264518e-06, + "loss": 0.062, "step": 33600 }, { - "epoch": 2.4959156393880884, - "grad_norm": 2.5200459957122803, - "learning_rate": 1.5024506163671471e-05, - "loss": 0.0582, + "epoch": 4.991831278776177, + "grad_norm": 0.6398152709007263, + "learning_rate": 5.008168721223824e-06, + "loss": 0.0454, "step": 33610 }, { - "epoch": 2.4966582504084363, - "grad_norm": 2.8182950019836426, - "learning_rate": 1.5020050497549383e-05, - "loss": 0.0855, + "epoch": 4.9933165008168725, + "grad_norm": 0.9657235145568848, + "learning_rate": 5.006683499183128e-06, + "loss": 0.0655, "step": 33620 }, { - "epoch": 2.4974008614287837, - "grad_norm": 0.40025582909584045, - "learning_rate": 1.50155948314273e-05, - "loss": 0.0717, + "epoch": 4.994801722857567, + "grad_norm": 0.9990649223327637, + "learning_rate": 5.005198277142433e-06, + "loss": 0.0651, "step": 33630 }, { - "epoch": 2.498143472449131, - "grad_norm": 1.4286984205245972, - "learning_rate": 1.5011139165305213e-05, - "loss": 0.0483, + "epoch": 4.996286944898262, + "grad_norm": 1.8582215309143066, + "learning_rate": 5.003713055101738e-06, + "loss": 0.0835, "step": 33640 }, { - "epoch": 2.4988860834694786, - "grad_norm": 0.4235764145851135, - "learning_rate": 1.5006683499183128e-05, - "loss": 0.0372, + "epoch": 4.997772166938957, + "grad_norm": 0.8416517972946167, + "learning_rate": 5.002227833061043e-06, + "loss": 0.0548, "step": 33650 }, { - "epoch": 2.499628694489826, - "grad_norm": 1.5366827249526978, - "learning_rate": 1.5002227833061045e-05, - "loss": 0.0574, + "epoch": 4.999257388979652, + "grad_norm": 0.7155054211616516, + "learning_rate": 5.000742611020348e-06, + "loss": 0.0511, "step": 33660 }, { - "epoch": 2.500371305510174, - "grad_norm": 1.9339724779129028, - "learning_rate": 1.4997772166938958e-05, - "loss": 0.0612, + "epoch": 5.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.05618023872375488, + "eval_runtime": 214.6654, + "eval_samples_per_second": 177.108, + "eval_steps_per_second": 5.539, + "step": 33665 + }, + { + "epoch": 5.000742611020348, + "grad_norm": 0.4117800295352936, + "learning_rate": 4.9992573889796535e-06, + "loss": 0.0447, "step": 33670 }, { - "epoch": 2.5011139165305214, - "grad_norm": 2.4985029697418213, - "learning_rate": 1.4993316500816873e-05, - "loss": 0.0724, + "epoch": 5.002227833061043, + "grad_norm": 0.7881307601928711, + "learning_rate": 4.997772166938958e-06, + "loss": 0.0575, "step": 33680 }, { - "epoch": 2.501856527550869, - "grad_norm": 1.3384310007095337, - "learning_rate": 1.4988860834694786e-05, - "loss": 0.0636, + "epoch": 5.003713055101738, + "grad_norm": 1.1118077039718628, + "learning_rate": 4.996286944898262e-06, + "loss": 0.0808, "step": 33690 }, { - "epoch": 2.5025991385712163, - "grad_norm": 0.9728517532348633, - "learning_rate": 1.4984405168572701e-05, - "loss": 0.0611, + "epoch": 5.005198277142433, + "grad_norm": 0.7774804830551147, + "learning_rate": 4.994801722857568e-06, + "loss": 0.0579, "step": 33700 }, { - "epoch": 2.5033417495915637, - "grad_norm": 1.3432775735855103, - "learning_rate": 1.4979949502450618e-05, - "loss": 0.0541, + "epoch": 5.0066834991831275, + "grad_norm": 1.4578781127929688, + "learning_rate": 4.993316500816873e-06, + "loss": 0.0578, "step": 33710 }, { - "epoch": 2.5040843606119116, - "grad_norm": 0.8654043674468994, - "learning_rate": 1.4975493836328531e-05, - "loss": 0.065, + "epoch": 5.008168721223823, + "grad_norm": 0.4488222002983093, + "learning_rate": 4.991831278776177e-06, + "loss": 0.0558, "step": 33720 }, { - "epoch": 2.504826971632259, - "grad_norm": 0.9931495785713196, - "learning_rate": 1.4971038170206446e-05, - "loss": 0.0447, + "epoch": 5.009653943264518, + "grad_norm": 1.2088385820388794, + "learning_rate": 4.990346056735483e-06, + "loss": 0.0727, "step": 33730 }, { - "epoch": 2.5055695826526065, - "grad_norm": 0.5981758832931519, - "learning_rate": 1.4966582504084361e-05, - "loss": 0.0511, + "epoch": 5.011139165305213, + "grad_norm": 0.7863116264343262, + "learning_rate": 4.988860834694788e-06, + "loss": 0.0745, "step": 33740 }, { - "epoch": 2.506312193672954, - "grad_norm": 1.3194613456726074, - "learning_rate": 1.4962126837962276e-05, - "loss": 0.0902, + "epoch": 5.012624387345908, + "grad_norm": 0.9223276972770691, + "learning_rate": 4.987375612654092e-06, + "loss": 0.05, "step": 33750 }, { - "epoch": 2.5070548046933014, - "grad_norm": 1.7330251932144165, - "learning_rate": 1.4957671171840191e-05, - "loss": 0.0522, + "epoch": 5.014109609386603, + "grad_norm": 0.6770899295806885, + "learning_rate": 4.985890390613397e-06, + "loss": 0.0599, "step": 33760 }, { - "epoch": 2.5077974157136493, - "grad_norm": 0.47713515162467957, - "learning_rate": 1.4953215505718104e-05, - "loss": 0.0676, + "epoch": 5.015594831427299, + "grad_norm": 1.0792419910430908, + "learning_rate": 4.984405168572702e-06, + "loss": 0.0634, "step": 33770 }, { - "epoch": 2.508540026733997, - "grad_norm": 2.3502461910247803, - "learning_rate": 1.4948759839596021e-05, - "loss": 0.0537, + "epoch": 5.017080053467994, + "grad_norm": 0.860308051109314, + "learning_rate": 4.982919946532007e-06, + "loss": 0.0586, "step": 33780 }, { - "epoch": 2.5092826377543442, - "grad_norm": 0.9015212655067444, - "learning_rate": 1.4944304173473934e-05, - "loss": 0.0861, + "epoch": 5.0185652755086885, + "grad_norm": 0.962019145488739, + "learning_rate": 4.981434724491312e-06, + "loss": 0.0683, "step": 33790 }, { - "epoch": 2.5100252487746917, - "grad_norm": 0.7821179628372192, - "learning_rate": 1.493984850735185e-05, - "loss": 0.05, + "epoch": 5.020050497549383, + "grad_norm": 0.9268213510513306, + "learning_rate": 4.979949502450617e-06, + "loss": 0.0537, "step": 33800 }, { - "epoch": 2.510767859795039, - "grad_norm": 0.5906331539154053, - "learning_rate": 1.4935392841229764e-05, - "loss": 0.0414, + "epoch": 5.021535719590078, + "grad_norm": 0.7560243010520935, + "learning_rate": 4.9784642804099215e-06, + "loss": 0.048, "step": 33810 }, { - "epoch": 2.511510470815387, - "grad_norm": 3.766472101211548, - "learning_rate": 1.493093717510768e-05, - "loss": 0.087, + "epoch": 5.023020941630774, + "grad_norm": 1.0545380115509033, + "learning_rate": 4.976979058369227e-06, + "loss": 0.0501, "step": 33820 }, { - "epoch": 2.5122530818357345, - "grad_norm": 0.8629242777824402, - "learning_rate": 1.4926481508985594e-05, - "loss": 0.0452, + "epoch": 5.024506163671469, + "grad_norm": 1.0352262258529663, + "learning_rate": 4.975493836328531e-06, + "loss": 0.0679, "step": 33830 }, { - "epoch": 2.512995692856082, - "grad_norm": 0.4955524504184723, - "learning_rate": 1.4922025842863508e-05, - "loss": 0.0487, + "epoch": 5.025991385712164, + "grad_norm": 0.8052208423614502, + "learning_rate": 4.9740086142878365e-06, + "loss": 0.0492, "step": 33840 }, { - "epoch": 2.5137383038764294, - "grad_norm": 1.7956591844558716, - "learning_rate": 1.4917570176741423e-05, - "loss": 0.0838, + "epoch": 5.027476607752859, + "grad_norm": 0.7468528151512146, + "learning_rate": 4.972523392247141e-06, + "loss": 0.0567, "step": 33850 }, { - "epoch": 2.514480914896777, - "grad_norm": 1.9371930360794067, - "learning_rate": 1.4913114510619338e-05, - "loss": 0.0987, + "epoch": 5.0289618297935545, + "grad_norm": 0.5573667287826538, + "learning_rate": 4.971038170206446e-06, + "loss": 0.0649, "step": 33860 }, { - "epoch": 2.5152235259171247, - "grad_norm": 0.846228837966919, - "learning_rate": 1.4908658844497253e-05, - "loss": 0.0762, + "epoch": 5.0304470518342495, + "grad_norm": 0.9307456016540527, + "learning_rate": 4.9695529481657515e-06, + "loss": 0.0623, "step": 33870 }, { - "epoch": 2.515966136937472, - "grad_norm": 2.3206639289855957, - "learning_rate": 1.4904203178375168e-05, - "loss": 0.0978, + "epoch": 5.031932273874944, + "grad_norm": 0.5896973013877869, + "learning_rate": 4.968067726125056e-06, + "loss": 0.0626, "step": 33880 }, { - "epoch": 2.5167087479578196, - "grad_norm": 1.545559048652649, - "learning_rate": 1.4899747512253083e-05, - "loss": 0.0446, + "epoch": 5.033417495915639, + "grad_norm": 0.6693680286407471, + "learning_rate": 4.966582504084361e-06, + "loss": 0.0719, "step": 33890 }, { - "epoch": 2.517451358978167, - "grad_norm": 1.571700930595398, - "learning_rate": 1.4895291846130998e-05, - "loss": 0.0786, + "epoch": 5.034902717956334, + "grad_norm": 0.6010963320732117, + "learning_rate": 4.9650972820436665e-06, + "loss": 0.0776, "step": 33900 }, { - "epoch": 2.5181939699985145, - "grad_norm": 1.5507023334503174, - "learning_rate": 1.4890836180008911e-05, - "loss": 0.0737, + "epoch": 5.03638793999703, + "grad_norm": 0.5392886996269226, + "learning_rate": 4.963612060002971e-06, + "loss": 0.07, "step": 33910 }, { - "epoch": 2.5189365810188624, - "grad_norm": 1.5131269693374634, - "learning_rate": 1.4886380513886826e-05, - "loss": 0.0549, + "epoch": 5.037873162037725, + "grad_norm": 0.7472323179244995, + "learning_rate": 4.962126837962275e-06, + "loss": 0.0626, "step": 33920 }, { - "epoch": 2.51967919203921, - "grad_norm": 0.8395630121231079, - "learning_rate": 1.4881924847764743e-05, - "loss": 0.0665, + "epoch": 5.03935838407842, + "grad_norm": 0.7688446640968323, + "learning_rate": 4.960641615921581e-06, + "loss": 0.0565, "step": 33930 }, { - "epoch": 2.5204218030595573, - "grad_norm": 2.2483623027801514, - "learning_rate": 1.4877469181642656e-05, - "loss": 0.0329, + "epoch": 5.040843606119115, + "grad_norm": 0.4238692820072174, + "learning_rate": 4.959156393880886e-06, + "loss": 0.0855, "step": 33940 }, { - "epoch": 2.521164414079905, - "grad_norm": 1.996645212173462, - "learning_rate": 1.4873013515520571e-05, - "loss": 0.0658, + "epoch": 5.0423288281598095, + "grad_norm": 0.6260663270950317, + "learning_rate": 4.95767117184019e-06, + "loss": 0.0485, "step": 33950 }, { - "epoch": 2.521907025100252, - "grad_norm": 1.3301950693130493, - "learning_rate": 1.4868557849398484e-05, - "loss": 0.0595, + "epoch": 5.043814050200505, + "grad_norm": 0.6706405282020569, + "learning_rate": 4.956185949799496e-06, + "loss": 0.0611, "step": 33960 }, { - "epoch": 2.5226496361206, - "grad_norm": 2.5241854190826416, - "learning_rate": 1.4864102183276401e-05, - "loss": 0.0837, + "epoch": 5.0452992722412, + "grad_norm": 0.6401503086090088, + "learning_rate": 4.954700727758801e-06, + "loss": 0.0511, "step": 33970 }, { - "epoch": 2.5233922471409476, - "grad_norm": 0.841391384601593, - "learning_rate": 1.4859646517154314e-05, - "loss": 0.0789, + "epoch": 5.046784494281895, + "grad_norm": 0.6273691058158875, + "learning_rate": 4.953215505718105e-06, + "loss": 0.0626, "step": 33980 }, { - "epoch": 2.524134858161295, - "grad_norm": 1.6445497274398804, - "learning_rate": 1.485519085103223e-05, - "loss": 0.0841, + "epoch": 5.04826971632259, + "grad_norm": 0.9785528779029846, + "learning_rate": 4.95173028367741e-06, + "loss": 0.0588, "step": 33990 }, { - "epoch": 2.524877469181643, - "grad_norm": 1.169519305229187, - "learning_rate": 1.4850735184910146e-05, - "loss": 0.0483, + "epoch": 5.049754938363285, + "grad_norm": 0.3964395225048065, + "learning_rate": 4.950245061636715e-06, + "loss": 0.0576, "step": 34000 }, { - "epoch": 2.5256200802019904, - "grad_norm": 1.069122076034546, - "learning_rate": 1.484627951878806e-05, - "loss": 0.0683, + "epoch": 5.051240160403981, + "grad_norm": 0.4322415292263031, + "learning_rate": 4.94875983959602e-06, + "loss": 0.0535, "step": 34010 }, { - "epoch": 2.526362691222338, - "grad_norm": 2.5198476314544678, - "learning_rate": 1.4841823852665974e-05, - "loss": 0.0837, + "epoch": 5.052725382444676, + "grad_norm": 0.8794994354248047, + "learning_rate": 4.947274617555325e-06, + "loss": 0.0588, "step": 34020 }, { - "epoch": 2.5271053022426853, - "grad_norm": 1.8398845195770264, - "learning_rate": 1.4837368186543888e-05, - "loss": 0.0589, + "epoch": 5.0542106044853705, + "grad_norm": 0.6617201566696167, + "learning_rate": 4.94578939551463e-06, + "loss": 0.0521, "step": 34030 }, { - "epoch": 2.5278479132630327, - "grad_norm": 1.1466633081436157, - "learning_rate": 1.4832912520421804e-05, - "loss": 0.0439, + "epoch": 5.055695826526065, + "grad_norm": 1.4307634830474854, + "learning_rate": 4.944304173473935e-06, + "loss": 0.0665, "step": 34040 }, { - "epoch": 2.5285905242833806, - "grad_norm": 1.7048221826553345, - "learning_rate": 1.482845685429972e-05, - "loss": 0.0828, + "epoch": 5.05718104856676, + "grad_norm": 0.6375547051429749, + "learning_rate": 4.94281895143324e-06, + "loss": 0.0585, "step": 34050 }, { - "epoch": 2.529333135303728, - "grad_norm": 1.1695541143417358, - "learning_rate": 1.4824001188177632e-05, - "loss": 0.0756, + "epoch": 5.058666270607456, + "grad_norm": 0.8604044914245605, + "learning_rate": 4.941333729392544e-06, + "loss": 0.0535, "step": 34060 }, { - "epoch": 2.5300757463240755, - "grad_norm": 0.7336133718490601, - "learning_rate": 1.4819545522055547e-05, - "loss": 0.0519, + "epoch": 5.060151492648151, + "grad_norm": 0.6541344523429871, + "learning_rate": 4.9398485073518494e-06, + "loss": 0.0677, "step": 34070 }, { - "epoch": 2.530818357344423, - "grad_norm": 3.3735294342041016, - "learning_rate": 1.4815089855933462e-05, - "loss": 0.0542, + "epoch": 5.061636714688846, + "grad_norm": 0.762144148349762, + "learning_rate": 4.938363285311155e-06, + "loss": 0.0715, "step": 34080 }, { - "epoch": 2.5315609683647704, - "grad_norm": 4.34521484375, - "learning_rate": 1.4810634189811377e-05, - "loss": 0.0878, + "epoch": 5.063121936729541, + "grad_norm": 0.5391594767570496, + "learning_rate": 4.936878063270459e-06, + "loss": 0.0642, "step": 34090 }, { - "epoch": 2.5323035793851183, - "grad_norm": 2.667654037475586, - "learning_rate": 1.480617852368929e-05, - "loss": 0.0794, + "epoch": 5.064607158770237, + "grad_norm": 1.1164581775665283, + "learning_rate": 4.9353928412297644e-06, + "loss": 0.0419, "step": 34100 }, { - "epoch": 2.5330461904054657, - "grad_norm": 3.387084722518921, - "learning_rate": 1.4801722857567206e-05, - "loss": 0.0941, + "epoch": 5.0660923808109315, + "grad_norm": 0.7693032026290894, + "learning_rate": 4.933907619189069e-06, + "loss": 0.0487, "step": 34110 }, { - "epoch": 2.533788801425813, - "grad_norm": 2.6281583309173584, - "learning_rate": 1.4797267191445122e-05, - "loss": 0.0624, + "epoch": 5.067577602851626, + "grad_norm": 0.8046525716781616, + "learning_rate": 4.932422397148374e-06, + "loss": 0.0592, "step": 34120 }, { - "epoch": 2.5345314124461606, - "grad_norm": 1.0451610088348389, - "learning_rate": 1.4792811525323036e-05, - "loss": 0.0515, + "epoch": 5.069062824892321, + "grad_norm": 0.6247583627700806, + "learning_rate": 4.930937175107679e-06, + "loss": 0.0488, "step": 34130 }, { - "epoch": 2.535274023466508, - "grad_norm": 1.5573267936706543, - "learning_rate": 1.478835585920095e-05, - "loss": 0.0878, + "epoch": 5.070548046933016, + "grad_norm": 0.9493995308876038, + "learning_rate": 4.929451953066984e-06, + "loss": 0.067, "step": 34140 }, { - "epoch": 2.536016634486856, - "grad_norm": 1.6244726181030273, - "learning_rate": 1.4783900193078866e-05, - "loss": 0.0647, + "epoch": 5.072033268973712, + "grad_norm": 1.048176884651184, + "learning_rate": 4.927966731026289e-06, + "loss": 0.0658, "step": 34150 }, { - "epoch": 2.5367592455072034, - "grad_norm": 2.02620005607605, - "learning_rate": 1.477944452695678e-05, - "loss": 0.0734, + "epoch": 5.073518491014407, + "grad_norm": 0.8611918687820435, + "learning_rate": 4.926481508985594e-06, + "loss": 0.0542, "step": 34160 }, { - "epoch": 2.537501856527551, - "grad_norm": 1.4137933254241943, - "learning_rate": 1.4774988860834696e-05, - "loss": 0.0881, + "epoch": 5.075003713055102, + "grad_norm": 1.0431722402572632, + "learning_rate": 4.924996286944899e-06, + "loss": 0.054, "step": 34170 }, { - "epoch": 2.5382444675478983, - "grad_norm": 1.8512823581695557, - "learning_rate": 1.4770533194712609e-05, - "loss": 0.0635, + "epoch": 5.076488935095797, + "grad_norm": 1.1769628524780273, + "learning_rate": 4.923511064904203e-06, + "loss": 0.0505, "step": 34180 }, { - "epoch": 2.538987078568246, - "grad_norm": 1.565675973892212, - "learning_rate": 1.4766077528590526e-05, - "loss": 0.0625, + "epoch": 5.077974157136492, + "grad_norm": 0.6343939900398254, + "learning_rate": 4.9220258428635086e-06, + "loss": 0.0526, "step": 34190 }, { - "epoch": 2.5397296895885937, - "grad_norm": 0.9743090271949768, - "learning_rate": 1.4761621862468439e-05, - "loss": 0.0686, + "epoch": 5.079459379177187, + "grad_norm": 0.9443486332893372, + "learning_rate": 4.920540620822814e-06, + "loss": 0.0564, "step": 34200 }, { - "epoch": 2.540472300608941, - "grad_norm": 1.4764388799667358, - "learning_rate": 1.4757166196346354e-05, - "loss": 0.0563, + "epoch": 5.080944601217882, + "grad_norm": 0.21072953939437866, + "learning_rate": 4.919055398782118e-06, + "loss": 0.0509, "step": 34210 }, { - "epoch": 2.5412149116292886, - "grad_norm": 1.5421714782714844, - "learning_rate": 1.4752710530224269e-05, - "loss": 0.0727, + "epoch": 5.082429823258577, + "grad_norm": 0.9125813841819763, + "learning_rate": 4.917570176741423e-06, + "loss": 0.0515, "step": 34220 }, { - "epoch": 2.541957522649636, - "grad_norm": 0.654322624206543, - "learning_rate": 1.4748254864102184e-05, - "loss": 0.0523, + "epoch": 5.083915045299272, + "grad_norm": 0.8106305003166199, + "learning_rate": 4.916084954700728e-06, + "loss": 0.0618, "step": 34230 }, { - "epoch": 2.5427001336699835, - "grad_norm": 1.733870029449463, - "learning_rate": 1.4743799197980099e-05, - "loss": 0.0854, + "epoch": 5.085400267339967, + "grad_norm": 0.6208940744400024, + "learning_rate": 4.914599732660033e-06, + "loss": 0.059, "step": 34240 }, { - "epoch": 2.5434427446903314, - "grad_norm": 2.0312111377716064, - "learning_rate": 1.4739343531858012e-05, - "loss": 0.0809, + "epoch": 5.086885489380663, + "grad_norm": 0.6824192404747009, + "learning_rate": 4.913114510619338e-06, + "loss": 0.0577, "step": 34250 }, { - "epoch": 2.544185355710679, - "grad_norm": 1.3134266138076782, - "learning_rate": 1.4734887865735929e-05, - "loss": 0.0722, + "epoch": 5.088370711421358, + "grad_norm": 1.3406444787979126, + "learning_rate": 4.911629288578643e-06, + "loss": 0.062, "step": 34260 }, { - "epoch": 2.5449279667310263, - "grad_norm": 2.5417449474334717, - "learning_rate": 1.4730432199613842e-05, - "loss": 0.0819, + "epoch": 5.089855933462053, + "grad_norm": 0.6709977388381958, + "learning_rate": 4.910144066537948e-06, + "loss": 0.0607, "step": 34270 }, { - "epoch": 2.5456705777513737, - "grad_norm": 1.3097118139266968, - "learning_rate": 1.4725976533491757e-05, - "loss": 0.0818, + "epoch": 5.0913411555027475, + "grad_norm": 1.1426851749420166, + "learning_rate": 4.908658844497253e-06, + "loss": 0.0632, "step": 34280 }, { - "epoch": 2.546413188771721, - "grad_norm": 1.2188619375228882, - "learning_rate": 1.4721520867369672e-05, - "loss": 0.0692, + "epoch": 5.092826377543442, + "grad_norm": 0.7882207036018372, + "learning_rate": 4.907173622456557e-06, + "loss": 0.0425, "step": 34290 }, { - "epoch": 2.547155799792069, - "grad_norm": 2.3822097778320312, - "learning_rate": 1.4717065201247587e-05, - "loss": 0.0847, + "epoch": 5.094311599584138, + "grad_norm": 0.7153235077857971, + "learning_rate": 4.905688400415862e-06, + "loss": 0.0523, "step": 34300 }, { - "epoch": 2.5478984108124165, - "grad_norm": 1.0267736911773682, - "learning_rate": 1.4712609535125502e-05, - "loss": 0.0844, + "epoch": 5.095796821624833, + "grad_norm": 0.745721161365509, + "learning_rate": 4.904203178375168e-06, + "loss": 0.0705, "step": 34310 }, { - "epoch": 2.548641021832764, - "grad_norm": 1.53618323802948, - "learning_rate": 1.4708153869003416e-05, - "loss": 0.0804, + "epoch": 5.097282043665528, + "grad_norm": 0.5836763978004456, + "learning_rate": 4.902717956334472e-06, + "loss": 0.0697, "step": 34320 }, { - "epoch": 2.5493836328531114, - "grad_norm": 1.5292679071426392, - "learning_rate": 1.470369820288133e-05, - "loss": 0.0752, + "epoch": 5.098767265706223, + "grad_norm": 0.8141874074935913, + "learning_rate": 4.901232734293777e-06, + "loss": 0.0689, "step": 34330 }, { - "epoch": 2.550126243873459, - "grad_norm": 0.5811061263084412, - "learning_rate": 1.4699242536759247e-05, - "loss": 0.0684, + "epoch": 5.100252487746918, + "grad_norm": 0.5741795897483826, + "learning_rate": 4.899747512253083e-06, + "loss": 0.0559, "step": 34340 }, { - "epoch": 2.5508688548938068, - "grad_norm": 0.8153356313705444, - "learning_rate": 1.469478687063716e-05, - "loss": 0.048, + "epoch": 5.1017377097876135, + "grad_norm": 1.1511962413787842, + "learning_rate": 4.898262290212387e-06, + "loss": 0.0805, "step": 34350 }, { - "epoch": 2.551611465914154, - "grad_norm": 1.236395001411438, - "learning_rate": 1.4690331204515076e-05, - "loss": 0.0614, + "epoch": 5.103222931828308, + "grad_norm": 0.4399915933609009, + "learning_rate": 4.8967770681716916e-06, + "loss": 0.0588, "step": 34360 }, { - "epoch": 2.5523540769345017, - "grad_norm": 1.525625467300415, - "learning_rate": 1.4685875538392989e-05, - "loss": 0.0802, + "epoch": 5.104708153869003, + "grad_norm": 1.116751790046692, + "learning_rate": 4.895291846130997e-06, + "loss": 0.0603, "step": 34370 }, { - "epoch": 2.553096687954849, - "grad_norm": 0.4339189827442169, - "learning_rate": 1.4681419872270906e-05, - "loss": 0.041, + "epoch": 5.106193375909698, + "grad_norm": 1.0357329845428467, + "learning_rate": 4.893806624090302e-06, + "loss": 0.0722, "step": 34380 }, { - "epoch": 2.5538392989751966, - "grad_norm": 1.1005926132202148, - "learning_rate": 1.4676964206148819e-05, - "loss": 0.0557, + "epoch": 5.107678597950393, + "grad_norm": 0.8936920166015625, + "learning_rate": 4.8923214020496066e-06, + "loss": 0.0594, "step": 34390 }, { - "epoch": 2.5545819099955445, - "grad_norm": 2.0460987091064453, - "learning_rate": 1.4672508540026734e-05, - "loss": 0.0777, + "epoch": 5.109163819991089, + "grad_norm": 0.6210498213768005, + "learning_rate": 4.890836180008912e-06, + "loss": 0.0659, "step": 34400 }, { - "epoch": 2.555324521015892, - "grad_norm": 1.1774736642837524, - "learning_rate": 1.466805287390465e-05, - "loss": 0.0499, + "epoch": 5.110649042031784, + "grad_norm": 0.8246908187866211, + "learning_rate": 4.889350957968217e-06, + "loss": 0.0665, "step": 34410 }, { - "epoch": 2.5560671320362394, - "grad_norm": 1.0830439329147339, - "learning_rate": 1.4663597207782564e-05, - "loss": 0.0378, + "epoch": 5.112134264072479, + "grad_norm": 0.7300816178321838, + "learning_rate": 4.8878657359275216e-06, + "loss": 0.0747, "step": 34420 }, { - "epoch": 2.556809743056587, - "grad_norm": 3.122680187225342, - "learning_rate": 1.4659141541660479e-05, - "loss": 0.0711, + "epoch": 5.113619486113174, + "grad_norm": 1.2019017934799194, + "learning_rate": 4.886380513886827e-06, + "loss": 0.0448, "step": 34430 }, { - "epoch": 2.5575523540769343, - "grad_norm": 1.451540231704712, - "learning_rate": 1.4654685875538392e-05, - "loss": 0.0909, + "epoch": 5.115104708153869, + "grad_norm": 0.4036964774131775, + "learning_rate": 4.884895291846131e-06, + "loss": 0.0507, "step": 34440 }, { - "epoch": 2.558294965097282, - "grad_norm": 2.591353416442871, - "learning_rate": 1.4650230209416309e-05, - "loss": 0.0714, + "epoch": 5.116589930194564, + "grad_norm": 1.3310085535049438, + "learning_rate": 4.8834100698054365e-06, + "loss": 0.0529, "step": 34450 }, { - "epoch": 2.5590375761176296, - "grad_norm": 1.4591681957244873, - "learning_rate": 1.4645774543294224e-05, - "loss": 0.0777, + "epoch": 5.118075152235259, + "grad_norm": 1.1880275011062622, + "learning_rate": 4.881924847764741e-06, + "loss": 0.0679, "step": 34460 }, { - "epoch": 2.559780187137977, - "grad_norm": 0.7905107736587524, - "learning_rate": 1.4641318877172137e-05, - "loss": 0.056, + "epoch": 5.119560374275954, + "grad_norm": 1.0951112508773804, + "learning_rate": 4.880439625724046e-06, + "loss": 0.0645, "step": 34470 }, { - "epoch": 2.5605227981583245, - "grad_norm": 2.1354310512542725, - "learning_rate": 1.4636863211050052e-05, - "loss": 0.0647, + "epoch": 5.121045596316649, + "grad_norm": 0.6842232346534729, + "learning_rate": 4.878954403683351e-06, + "loss": 0.0741, "step": 34480 }, { - "epoch": 2.561265409178672, - "grad_norm": 0.35180187225341797, - "learning_rate": 1.4632407544927967e-05, - "loss": 0.056, + "epoch": 5.122530818357345, + "grad_norm": 0.63397216796875, + "learning_rate": 4.877469181642656e-06, + "loss": 0.0609, "step": 34490 }, { - "epoch": 2.56200802019902, - "grad_norm": 1.2425521612167358, - "learning_rate": 1.4627951878805882e-05, - "loss": 0.0533, + "epoch": 5.12401604039804, + "grad_norm": 1.2980011701583862, + "learning_rate": 4.875983959601961e-06, + "loss": 0.0582, "step": 34500 }, { - "epoch": 2.5627506312193673, - "grad_norm": 2.0177273750305176, - "learning_rate": 1.4623496212683797e-05, - "loss": 0.039, + "epoch": 5.125501262438735, + "grad_norm": 0.2617751955986023, + "learning_rate": 4.874498737561266e-06, + "loss": 0.0736, "step": 34510 }, { - "epoch": 2.5634932422397148, - "grad_norm": 0.6943618059158325, - "learning_rate": 1.461904054656171e-05, - "loss": 0.0622, + "epoch": 5.1269864844794295, + "grad_norm": 0.5681473016738892, + "learning_rate": 4.87301351552057e-06, + "loss": 0.0579, "step": 34520 }, { - "epoch": 2.5642358532600626, - "grad_norm": 2.4245269298553467, - "learning_rate": 1.4614584880439627e-05, - "loss": 0.0611, + "epoch": 5.128471706520124, + "grad_norm": 1.5799667835235596, + "learning_rate": 4.871528293479875e-06, + "loss": 0.066, "step": 34530 }, { - "epoch": 2.5649784642804097, - "grad_norm": 0.730995237827301, - "learning_rate": 1.461012921431754e-05, - "loss": 0.0834, + "epoch": 5.12995692856082, + "grad_norm": 0.9241040349006653, + "learning_rate": 4.870043071439181e-06, + "loss": 0.0729, "step": 34540 }, { - "epoch": 2.5657210753007575, - "grad_norm": 0.8295930027961731, - "learning_rate": 1.4605673548195455e-05, - "loss": 0.0567, + "epoch": 5.131442150601515, + "grad_norm": 0.26680564880371094, + "learning_rate": 4.868557849398485e-06, + "loss": 0.047, "step": 34550 }, { - "epoch": 2.566463686321105, - "grad_norm": 2.3141775131225586, - "learning_rate": 1.460121788207337e-05, - "loss": 0.0663, + "epoch": 5.13292737264221, + "grad_norm": 1.0289782285690308, + "learning_rate": 4.86707262735779e-06, + "loss": 0.048, "step": 34560 }, { - "epoch": 2.5672062973414524, - "grad_norm": 1.6702011823654175, - "learning_rate": 1.4596762215951285e-05, - "loss": 0.1019, + "epoch": 5.134412594682905, + "grad_norm": 1.0188406705856323, + "learning_rate": 4.865587405317096e-06, + "loss": 0.0545, "step": 34570 }, { - "epoch": 2.5679489083618003, - "grad_norm": 1.9209109544754028, - "learning_rate": 1.45923065498292e-05, - "loss": 0.0763, + "epoch": 5.1358978167236, + "grad_norm": 0.6055546402931213, + "learning_rate": 4.8641021832764e-06, + "loss": 0.0789, "step": 34580 }, { - "epoch": 2.568691519382148, - "grad_norm": 1.5046935081481934, - "learning_rate": 1.4587850883707114e-05, - "loss": 0.0805, + "epoch": 5.137383038764296, + "grad_norm": 0.93790203332901, + "learning_rate": 4.8626169612357045e-06, + "loss": 0.0591, "step": 34590 }, { - "epoch": 2.5694341304024952, - "grad_norm": 0.8070915937423706, - "learning_rate": 1.458339521758503e-05, - "loss": 0.0334, + "epoch": 5.1388682608049905, + "grad_norm": 0.7455692291259766, + "learning_rate": 4.86113173919501e-06, + "loss": 0.059, "step": 34600 }, { - "epoch": 2.5701767414228427, - "grad_norm": 0.2428605705499649, - "learning_rate": 1.4578939551462944e-05, - "loss": 0.0457, + "epoch": 5.140353482845685, + "grad_norm": 0.9879961013793945, + "learning_rate": 4.859646517154315e-06, + "loss": 0.0687, "step": 34610 }, { - "epoch": 2.57091935244319, - "grad_norm": 0.3563167452812195, - "learning_rate": 1.4574483885340859e-05, - "loss": 0.0598, + "epoch": 5.14183870488638, + "grad_norm": 0.674579381942749, + "learning_rate": 4.8581612951136195e-06, + "loss": 0.0573, "step": 34620 }, { - "epoch": 2.571661963463538, - "grad_norm": 0.9736761450767517, - "learning_rate": 1.4570028219218774e-05, - "loss": 0.058, + "epoch": 5.143323926927075, + "grad_norm": 0.9698799848556519, + "learning_rate": 4.856676073072925e-06, + "loss": 0.0607, "step": 34630 }, { - "epoch": 2.5724045744838855, - "grad_norm": 1.3607254028320312, - "learning_rate": 1.4565572553096689e-05, - "loss": 0.069, + "epoch": 5.144809148967771, + "grad_norm": 0.29647594690322876, + "learning_rate": 4.85519085103223e-06, + "loss": 0.0559, "step": 34640 }, { - "epoch": 2.573147185504233, - "grad_norm": 0.6492049694061279, - "learning_rate": 1.4561116886974604e-05, - "loss": 0.0406, + "epoch": 5.146294371008466, + "grad_norm": 1.5583728551864624, + "learning_rate": 4.8537056289915345e-06, + "loss": 0.0736, "step": 34650 }, { - "epoch": 2.5738897965245804, - "grad_norm": 1.3823007345199585, - "learning_rate": 1.4556661220852517e-05, - "loss": 0.062, + "epoch": 5.147779593049161, + "grad_norm": 0.7042451500892639, + "learning_rate": 4.85222040695084e-06, + "loss": 0.0563, "step": 34660 }, { - "epoch": 2.574632407544928, - "grad_norm": 3.305699110031128, - "learning_rate": 1.4552205554730434e-05, - "loss": 0.1082, + "epoch": 5.149264815089856, + "grad_norm": 0.41057154536247253, + "learning_rate": 4.850735184910145e-06, + "loss": 0.0389, "step": 34670 }, { - "epoch": 2.5753750185652757, - "grad_norm": 1.8386292457580566, - "learning_rate": 1.4547749888608347e-05, - "loss": 0.0667, + "epoch": 5.1507500371305515, + "grad_norm": 1.1116803884506226, + "learning_rate": 4.8492499628694495e-06, + "loss": 0.0567, "step": 34680 }, { - "epoch": 2.576117629585623, - "grad_norm": 0.8886379599571228, - "learning_rate": 1.4543294222486262e-05, - "loss": 0.0873, + "epoch": 5.152235259171246, + "grad_norm": 0.4490911364555359, + "learning_rate": 4.847764740828754e-06, + "loss": 0.0732, "step": 34690 }, { - "epoch": 2.5768602406059706, - "grad_norm": 1.2363057136535645, - "learning_rate": 1.4538838556364177e-05, - "loss": 0.0978, + "epoch": 5.153720481211941, + "grad_norm": 0.765703558921814, + "learning_rate": 4.846279518788059e-06, + "loss": 0.0677, "step": 34700 }, { - "epoch": 2.577602851626318, - "grad_norm": 1.2348647117614746, - "learning_rate": 1.4534382890242092e-05, - "loss": 0.0801, + "epoch": 5.155205703252636, + "grad_norm": 0.9491612315177917, + "learning_rate": 4.8447942967473645e-06, + "loss": 0.0615, "step": 34710 }, { - "epoch": 2.5783454626466655, - "grad_norm": 1.7312116622924805, - "learning_rate": 1.4529927224120007e-05, - "loss": 0.0794, + "epoch": 5.156690925293331, + "grad_norm": 0.7063356041908264, + "learning_rate": 4.843309074706669e-06, + "loss": 0.0549, "step": 34720 }, { - "epoch": 2.5790880736670134, - "grad_norm": 1.0014289617538452, - "learning_rate": 1.452547155799792e-05, - "loss": 0.061, + "epoch": 5.158176147334027, + "grad_norm": 1.0340287685394287, + "learning_rate": 4.841823852665974e-06, + "loss": 0.0737, "step": 34730 }, { - "epoch": 2.579830684687361, - "grad_norm": 0.8576075434684753, - "learning_rate": 1.4521015891875835e-05, - "loss": 0.0756, + "epoch": 5.159661369374722, + "grad_norm": 0.32890424132347107, + "learning_rate": 4.840338630625279e-06, + "loss": 0.0611, "step": 34740 }, { - "epoch": 2.5805732957077083, - "grad_norm": 2.0893824100494385, - "learning_rate": 1.4516560225753752e-05, - "loss": 0.0672, + "epoch": 5.161146591415417, + "grad_norm": 0.7864364981651306, + "learning_rate": 4.838853408584584e-06, + "loss": 0.0666, "step": 34750 }, { - "epoch": 2.5813159067280558, - "grad_norm": 0.5497003197669983, - "learning_rate": 1.4512104559631665e-05, - "loss": 0.037, + "epoch": 5.1626318134561116, + "grad_norm": 1.1317648887634277, + "learning_rate": 4.837368186543888e-06, + "loss": 0.0566, "step": 34760 }, { - "epoch": 2.5820585177484032, - "grad_norm": 3.0224320888519287, - "learning_rate": 1.450764889350958e-05, - "loss": 0.0525, + "epoch": 5.1641170354968065, + "grad_norm": 0.5009697079658508, + "learning_rate": 4.835882964503194e-06, + "loss": 0.0515, "step": 34770 }, { - "epoch": 2.582801128768751, - "grad_norm": 0.8441876173019409, - "learning_rate": 1.4503193227387493e-05, - "loss": 0.0875, + "epoch": 5.165602257537502, + "grad_norm": 0.48454317450523376, + "learning_rate": 4.834397742462498e-06, + "loss": 0.0527, "step": 34780 }, { - "epoch": 2.5835437397890986, - "grad_norm": 2.5795955657958984, - "learning_rate": 1.449873756126541e-05, - "loss": 0.0817, + "epoch": 5.167087479578197, + "grad_norm": 1.3548284769058228, + "learning_rate": 4.832912520421803e-06, + "loss": 0.0682, "step": 34790 }, { - "epoch": 2.584286350809446, - "grad_norm": 0.8166103363037109, - "learning_rate": 1.4494281895143323e-05, - "loss": 0.0503, + "epoch": 5.168572701618892, + "grad_norm": 0.651645839214325, + "learning_rate": 4.831427298381109e-06, + "loss": 0.0599, "step": 34800 }, { - "epoch": 2.5850289618297935, - "grad_norm": 1.8198186159133911, - "learning_rate": 1.4489826229021238e-05, - "loss": 0.0388, + "epoch": 5.170057923659587, + "grad_norm": 1.2181872129440308, + "learning_rate": 4.829942076340413e-06, + "loss": 0.0586, "step": 34810 }, { - "epoch": 2.585771572850141, - "grad_norm": 4.351510524749756, - "learning_rate": 1.4485370562899155e-05, - "loss": 0.0762, + "epoch": 5.171543145700282, + "grad_norm": 1.0582828521728516, + "learning_rate": 4.828456854299718e-06, + "loss": 0.0513, "step": 34820 }, { - "epoch": 2.586514183870489, - "grad_norm": 0.9294065833091736, - "learning_rate": 1.4480914896777068e-05, - "loss": 0.0678, + "epoch": 5.173028367740978, + "grad_norm": 0.7807072997093201, + "learning_rate": 4.826971632259023e-06, + "loss": 0.0573, "step": 34830 }, { - "epoch": 2.5872567948908363, - "grad_norm": 1.0012507438659668, - "learning_rate": 1.4476459230654983e-05, - "loss": 0.0536, + "epoch": 5.1745135897816725, + "grad_norm": 0.5891880989074707, + "learning_rate": 4.825486410218328e-06, + "loss": 0.0467, "step": 34840 }, { - "epoch": 2.5879994059111837, - "grad_norm": 0.6957378387451172, - "learning_rate": 1.4472003564532897e-05, - "loss": 0.0958, + "epoch": 5.175998811822367, + "grad_norm": 0.9987158179283142, + "learning_rate": 4.8240011881776325e-06, + "loss": 0.0489, "step": 34850 }, { - "epoch": 2.588742016931531, - "grad_norm": 0.9014194011688232, - "learning_rate": 1.4467547898410813e-05, - "loss": 0.0647, + "epoch": 5.177484033863062, + "grad_norm": 0.3595871925354004, + "learning_rate": 4.822515966136938e-06, + "loss": 0.054, "step": 34860 }, { - "epoch": 2.5894846279518786, - "grad_norm": 1.4799509048461914, - "learning_rate": 1.4463092232288728e-05, - "loss": 0.0757, + "epoch": 5.178969255903757, + "grad_norm": 0.3779183626174927, + "learning_rate": 4.821030744096243e-06, + "loss": 0.0775, "step": 34870 }, { - "epoch": 2.5902272389722265, - "grad_norm": 1.020585060119629, - "learning_rate": 1.4458636566166642e-05, - "loss": 0.0537, + "epoch": 5.180454477944453, + "grad_norm": 1.291839838027954, + "learning_rate": 4.8195455220555475e-06, + "loss": 0.0671, "step": 34880 }, { - "epoch": 2.590969849992574, - "grad_norm": 3.012230396270752, - "learning_rate": 1.4454180900044557e-05, - "loss": 0.0807, + "epoch": 5.181939699985148, + "grad_norm": 0.7555186152458191, + "learning_rate": 4.818060300014852e-06, + "loss": 0.064, "step": 34890 }, { - "epoch": 2.5917124610129214, - "grad_norm": 1.4325774908065796, - "learning_rate": 1.4449725233922472e-05, - "loss": 0.0746, + "epoch": 5.183424922025843, + "grad_norm": 0.6942147612571716, + "learning_rate": 4.816575077974158e-06, + "loss": 0.072, "step": 34900 }, { - "epoch": 2.592455072033269, - "grad_norm": 0.7965050339698792, - "learning_rate": 1.4445269567800387e-05, - "loss": 0.0488, + "epoch": 5.184910144066538, + "grad_norm": 0.8907058238983154, + "learning_rate": 4.8150898559334625e-06, + "loss": 0.0692, "step": 34910 }, { - "epoch": 2.5931976830536163, - "grad_norm": 0.29672083258628845, - "learning_rate": 1.4440813901678302e-05, - "loss": 0.0621, + "epoch": 5.186395366107233, + "grad_norm": 0.659183144569397, + "learning_rate": 4.813604633892767e-06, + "loss": 0.0538, "step": 34920 }, { - "epoch": 2.593940294073964, - "grad_norm": 1.9577540159225464, - "learning_rate": 1.4436358235556217e-05, - "loss": 0.0845, + "epoch": 5.187880588147928, + "grad_norm": 0.8931286931037903, + "learning_rate": 4.812119411852072e-06, + "loss": 0.079, "step": 34930 }, { - "epoch": 2.5946829050943117, - "grad_norm": 2.028249502182007, - "learning_rate": 1.4431902569434132e-05, - "loss": 0.0853, + "epoch": 5.189365810188623, + "grad_norm": 0.6598271131515503, + "learning_rate": 4.8106341898113775e-06, + "loss": 0.0729, "step": 34940 }, { - "epoch": 2.595425516114659, - "grad_norm": 0.9768528342247009, - "learning_rate": 1.4427446903312045e-05, - "loss": 0.0762, + "epoch": 5.190851032229318, + "grad_norm": 0.7790974378585815, + "learning_rate": 4.809148967770682e-06, + "loss": 0.0578, "step": 34950 }, { - "epoch": 2.5961681271350066, - "grad_norm": 0.36251920461654663, - "learning_rate": 1.442299123718996e-05, - "loss": 0.0513, + "epoch": 5.192336254270013, + "grad_norm": 0.3787277936935425, + "learning_rate": 4.807663745729987e-06, + "loss": 0.0529, "step": 34960 }, { - "epoch": 2.596910738155354, - "grad_norm": 1.7192022800445557, - "learning_rate": 1.4418535571067875e-05, - "loss": 0.0643, + "epoch": 5.193821476310708, + "grad_norm": 0.6995694041252136, + "learning_rate": 4.8061785236892925e-06, + "loss": 0.0679, "step": 34970 }, { - "epoch": 2.597653349175702, - "grad_norm": 1.3157782554626465, - "learning_rate": 1.441407990494579e-05, - "loss": 0.0863, + "epoch": 5.195306698351404, + "grad_norm": 0.7316018342971802, + "learning_rate": 4.804693301648597e-06, + "loss": 0.0663, "step": 34980 }, { - "epoch": 2.5983959601960493, - "grad_norm": 1.921720027923584, - "learning_rate": 1.4409624238823705e-05, - "loss": 0.0806, + "epoch": 5.196791920392099, + "grad_norm": 1.4191354513168335, + "learning_rate": 4.803208079607901e-06, + "loss": 0.0587, "step": 34990 }, { - "epoch": 2.599138571216397, - "grad_norm": 0.9074265360832214, - "learning_rate": 1.4405168572701618e-05, - "loss": 0.0916, + "epoch": 5.198277142432794, + "grad_norm": 1.3133004903793335, + "learning_rate": 4.801722857567207e-06, + "loss": 0.0755, "step": 35000 }, { - "epoch": 2.5998811822367442, - "grad_norm": 1.1961455345153809, - "learning_rate": 1.4400712906579535e-05, - "loss": 0.0818, + "epoch": 5.1997623644734885, + "grad_norm": 0.6993300914764404, + "learning_rate": 4.800237635526512e-06, + "loss": 0.0602, "step": 35010 }, { - "epoch": 2.6006237932570917, - "grad_norm": 1.6636606454849243, - "learning_rate": 1.4396257240457448e-05, - "loss": 0.0947, + "epoch": 5.201247586514184, + "grad_norm": 0.69782555103302, + "learning_rate": 4.798752413485816e-06, + "loss": 0.0606, "step": 35020 }, { - "epoch": 2.6013664042774396, - "grad_norm": 1.7410112619400024, - "learning_rate": 1.4391801574335363e-05, - "loss": 0.0614, + "epoch": 5.202732808554879, + "grad_norm": 0.7031997442245483, + "learning_rate": 4.797267191445122e-06, + "loss": 0.0618, "step": 35030 }, { - "epoch": 2.602109015297787, - "grad_norm": 4.592065811157227, - "learning_rate": 1.4387345908213278e-05, - "loss": 0.0652, + "epoch": 5.204218030595574, + "grad_norm": 0.747891366481781, + "learning_rate": 4.795781969404426e-06, + "loss": 0.0742, "step": 35040 }, { - "epoch": 2.6028516263181345, - "grad_norm": 2.8058197498321533, - "learning_rate": 1.4382890242091193e-05, - "loss": 0.0731, + "epoch": 5.205703252636269, + "grad_norm": 0.6401386260986328, + "learning_rate": 4.794296747363731e-06, + "loss": 0.0798, "step": 35050 }, { - "epoch": 2.603594237338482, - "grad_norm": 1.1537928581237793, - "learning_rate": 1.4378434575969108e-05, - "loss": 0.0789, + "epoch": 5.207188474676964, + "grad_norm": 0.8471872210502625, + "learning_rate": 4.792811525323036e-06, + "loss": 0.0501, "step": 35060 }, { - "epoch": 2.6043368483588294, - "grad_norm": 1.5462356805801392, - "learning_rate": 1.4373978909847021e-05, - "loss": 0.0735, + "epoch": 5.20867369671766, + "grad_norm": 1.545289397239685, + "learning_rate": 4.791326303282341e-06, + "loss": 0.0782, "step": 35070 }, { - "epoch": 2.6050794593791773, - "grad_norm": 2.397684097290039, - "learning_rate": 1.4369523243724938e-05, - "loss": 0.0812, + "epoch": 5.210158918758355, + "grad_norm": 0.8901092410087585, + "learning_rate": 4.789841081241646e-06, + "loss": 0.0615, "step": 35080 }, { - "epoch": 2.6058220703995247, - "grad_norm": 0.8381139039993286, - "learning_rate": 1.4365067577602851e-05, - "loss": 0.0472, + "epoch": 5.2116441407990495, + "grad_norm": 1.0877093076705933, + "learning_rate": 4.788355859200951e-06, + "loss": 0.0654, "step": 35090 }, { - "epoch": 2.606564681419872, - "grad_norm": 0.911646842956543, - "learning_rate": 1.4360611911480766e-05, - "loss": 0.0668, + "epoch": 5.213129362839744, + "grad_norm": 0.7853423357009888, + "learning_rate": 4.786870637160256e-06, + "loss": 0.0552, "step": 35100 }, { - "epoch": 2.60730729244022, - "grad_norm": 1.0542629957199097, - "learning_rate": 1.4356156245358681e-05, - "loss": 0.0711, + "epoch": 5.214614584880439, + "grad_norm": 0.6040746569633484, + "learning_rate": 4.7853854151195605e-06, + "loss": 0.0516, "step": 35110 }, { - "epoch": 2.608049903460567, - "grad_norm": 0.4654415547847748, - "learning_rate": 1.4351700579236596e-05, - "loss": 0.0561, + "epoch": 5.216099806921135, + "grad_norm": 0.8198233246803284, + "learning_rate": 4.783900193078866e-06, + "loss": 0.0672, "step": 35120 }, { - "epoch": 2.608792514480915, - "grad_norm": 1.3107812404632568, - "learning_rate": 1.4347244913114511e-05, - "loss": 0.0691, + "epoch": 5.21758502896183, + "grad_norm": 1.4005907773971558, + "learning_rate": 4.782414971038171e-06, + "loss": 0.0624, "step": 35130 }, { - "epoch": 2.6095351255012624, - "grad_norm": 2.2061398029327393, - "learning_rate": 1.4342789246992425e-05, - "loss": 0.1093, + "epoch": 5.219070251002525, + "grad_norm": 1.1273680925369263, + "learning_rate": 4.7809297489974755e-06, + "loss": 0.0539, "step": 35140 }, { - "epoch": 2.61027773652161, - "grad_norm": 1.198928952217102, - "learning_rate": 1.433833358087034e-05, - "loss": 0.0445, + "epoch": 5.22055547304322, + "grad_norm": 0.905612051486969, + "learning_rate": 4.77944452695678e-06, + "loss": 0.0539, "step": 35150 }, { - "epoch": 2.6110203475419578, - "grad_norm": 1.055016279220581, - "learning_rate": 1.4333877914748256e-05, - "loss": 0.057, + "epoch": 5.222040695083915, + "grad_norm": 0.6797011494636536, + "learning_rate": 4.777959304916085e-06, + "loss": 0.0768, "step": 35160 }, { - "epoch": 2.6117629585623052, - "grad_norm": 1.0568102598190308, - "learning_rate": 1.432942224862617e-05, - "loss": 0.0598, + "epoch": 5.2235259171246105, + "grad_norm": 0.8509185314178467, + "learning_rate": 4.7764740828753905e-06, + "loss": 0.0522, "step": 35170 }, { - "epoch": 2.6125055695826527, - "grad_norm": 1.305461049079895, - "learning_rate": 1.4324966582504085e-05, - "loss": 0.0859, + "epoch": 5.225011139165305, + "grad_norm": 1.077875018119812, + "learning_rate": 4.774988860834695e-06, + "loss": 0.0516, "step": 35180 }, { - "epoch": 2.613248180603, - "grad_norm": 1.0581294298171997, - "learning_rate": 1.4320510916382e-05, - "loss": 0.0662, + "epoch": 5.226496361206, + "grad_norm": 1.1827176809310913, + "learning_rate": 4.773503638794e-06, + "loss": 0.0462, "step": 35190 }, { - "epoch": 2.6139907916233476, - "grad_norm": 0.9426524639129639, - "learning_rate": 1.4316055250259915e-05, - "loss": 0.0559, + "epoch": 5.227981583246695, + "grad_norm": 0.8252261281013489, + "learning_rate": 4.7720184167533055e-06, + "loss": 0.0675, "step": 35200 }, { - "epoch": 2.6147334026436955, - "grad_norm": 0.5946950316429138, - "learning_rate": 1.431159958413783e-05, - "loss": 0.0954, + "epoch": 5.22946680528739, + "grad_norm": 0.774476945400238, + "learning_rate": 4.77053319471261e-06, + "loss": 0.0569, "step": 35210 }, { - "epoch": 2.615476013664043, - "grad_norm": 1.6754854917526245, - "learning_rate": 1.4307143918015743e-05, - "loss": 0.0866, + "epoch": 5.230952027328086, + "grad_norm": 0.9155932068824768, + "learning_rate": 4.769047972671914e-06, + "loss": 0.0558, "step": 35220 }, { - "epoch": 2.6162186246843904, - "grad_norm": 1.6336374282836914, - "learning_rate": 1.430268825189366e-05, - "loss": 0.0815, + "epoch": 5.232437249368781, + "grad_norm": 0.3924713730812073, + "learning_rate": 4.76756275063122e-06, + "loss": 0.0638, "step": 35230 }, { - "epoch": 2.616961235704738, - "grad_norm": 2.549908399581909, - "learning_rate": 1.4298232585771573e-05, - "loss": 0.0529, + "epoch": 5.233922471409476, + "grad_norm": 0.5012497305870056, + "learning_rate": 4.766077528590525e-06, + "loss": 0.04, "step": 35240 }, { - "epoch": 2.6177038467250853, - "grad_norm": 1.3991200923919678, - "learning_rate": 1.4293776919649488e-05, - "loss": 0.0483, + "epoch": 5.2354076934501705, + "grad_norm": 0.6418888568878174, + "learning_rate": 4.764592306549829e-06, + "loss": 0.0638, "step": 35250 }, { - "epoch": 2.618446457745433, - "grad_norm": 0.7398178577423096, - "learning_rate": 1.4289321253527401e-05, - "loss": 0.0677, + "epoch": 5.236892915490866, + "grad_norm": 0.4431953430175781, + "learning_rate": 4.763107084509135e-06, + "loss": 0.0554, "step": 35260 }, { - "epoch": 2.6191890687657806, - "grad_norm": 1.8208078145980835, - "learning_rate": 1.4284865587405318e-05, - "loss": 0.0539, + "epoch": 5.238378137531561, + "grad_norm": 0.49925869703292847, + "learning_rate": 4.76162186246844e-06, + "loss": 0.0614, "step": 35270 }, { - "epoch": 2.619931679786128, - "grad_norm": 1.7018234729766846, - "learning_rate": 1.4280409921283233e-05, - "loss": 0.0529, + "epoch": 5.239863359572256, + "grad_norm": 0.9694715738296509, + "learning_rate": 4.760136640427744e-06, + "loss": 0.0441, "step": 35280 }, { - "epoch": 2.6206742908064755, - "grad_norm": 2.4244110584259033, - "learning_rate": 1.4275954255161146e-05, - "loss": 0.0595, + "epoch": 5.241348581612951, + "grad_norm": 0.6737119555473328, + "learning_rate": 4.758651418387049e-06, + "loss": 0.0461, "step": 35290 }, { - "epoch": 2.621416901826823, - "grad_norm": 0.505042314529419, - "learning_rate": 1.4271498589039061e-05, - "loss": 0.0865, + "epoch": 5.242833803653646, + "grad_norm": 0.8961270451545715, + "learning_rate": 4.757166196346354e-06, + "loss": 0.0561, "step": 35300 }, { - "epoch": 2.622159512847171, - "grad_norm": 1.206248164176941, - "learning_rate": 1.4267042922916976e-05, - "loss": 0.0662, + "epoch": 5.244319025694342, + "grad_norm": 0.651147186756134, + "learning_rate": 4.755680974305659e-06, + "loss": 0.0434, "step": 35310 }, { - "epoch": 2.6229021238675183, - "grad_norm": 1.3556511402130127, - "learning_rate": 1.4262587256794891e-05, - "loss": 0.0575, + "epoch": 5.245804247735037, + "grad_norm": 0.953199028968811, + "learning_rate": 4.754195752264964e-06, + "loss": 0.0578, "step": 35320 }, { - "epoch": 2.6236447348878658, - "grad_norm": 0.7870049476623535, - "learning_rate": 1.4258131590672806e-05, - "loss": 0.0614, + "epoch": 5.2472894697757315, + "grad_norm": 1.1796019077301025, + "learning_rate": 4.752710530224269e-06, + "loss": 0.0701, "step": 35330 }, { - "epoch": 2.624387345908213, - "grad_norm": 1.794494867324829, - "learning_rate": 1.4253675924550721e-05, - "loss": 0.097, + "epoch": 5.248774691816426, + "grad_norm": 0.7786931991577148, + "learning_rate": 4.751225308183574e-06, + "loss": 0.0571, "step": 35340 }, { - "epoch": 2.6251299569285607, - "grad_norm": 0.7083643078804016, - "learning_rate": 1.4249220258428636e-05, - "loss": 0.0648, + "epoch": 5.250259913857121, + "grad_norm": 1.258894443511963, + "learning_rate": 4.749740086142879e-06, + "loss": 0.0786, "step": 35350 }, { - "epoch": 2.6258725679489086, - "grad_norm": 0.7302588224411011, - "learning_rate": 1.424476459230655e-05, - "loss": 0.0578, + "epoch": 5.251745135897817, + "grad_norm": 1.521615743637085, + "learning_rate": 4.748254864102184e-06, + "loss": 0.0561, "step": 35360 }, { - "epoch": 2.626615178969256, - "grad_norm": 3.079280376434326, - "learning_rate": 1.4240308926184465e-05, - "loss": 0.0897, + "epoch": 5.253230357938512, + "grad_norm": 0.750752329826355, + "learning_rate": 4.7467696420614885e-06, + "loss": 0.053, "step": 35370 }, { - "epoch": 2.6273577899896035, - "grad_norm": 0.824223518371582, - "learning_rate": 1.423585326006238e-05, - "loss": 0.08, + "epoch": 5.254715579979207, + "grad_norm": 0.878555953502655, + "learning_rate": 4.745284420020794e-06, + "loss": 0.0724, "step": 35380 }, { - "epoch": 2.628100401009951, - "grad_norm": 1.3617218732833862, - "learning_rate": 1.4231397593940295e-05, - "loss": 0.0633, + "epoch": 5.256200802019902, + "grad_norm": 1.4904576539993286, + "learning_rate": 4.743799197980098e-06, + "loss": 0.0623, "step": 35390 }, { - "epoch": 2.6288430120302984, - "grad_norm": 0.5929654240608215, - "learning_rate": 1.422694192781821e-05, - "loss": 0.0467, + "epoch": 5.257686024060597, + "grad_norm": 0.8942855596542358, + "learning_rate": 4.7423139759394035e-06, + "loss": 0.0502, "step": 35400 }, { - "epoch": 2.6295856230506462, - "grad_norm": 2.259077310562134, - "learning_rate": 1.4222486261696123e-05, - "loss": 0.0735, + "epoch": 5.2591712461012925, + "grad_norm": 0.8960702419281006, + "learning_rate": 4.740828753898708e-06, + "loss": 0.0553, "step": 35410 }, { - "epoch": 2.6303282340709937, - "grad_norm": 1.6033036708831787, - "learning_rate": 1.421803059557404e-05, - "loss": 0.0972, + "epoch": 5.260656468141987, + "grad_norm": 0.7556365728378296, + "learning_rate": 4.739343531858013e-06, + "loss": 0.0752, "step": 35420 }, { - "epoch": 2.631070845091341, - "grad_norm": 1.9454528093338013, - "learning_rate": 1.4213574929451953e-05, - "loss": 0.0736, + "epoch": 5.262141690182682, + "grad_norm": 0.9979767799377441, + "learning_rate": 4.7378583098173185e-06, + "loss": 0.0508, "step": 35430 }, { - "epoch": 2.6318134561116886, - "grad_norm": 0.5253037810325623, - "learning_rate": 1.4209119263329868e-05, - "loss": 0.0651, + "epoch": 5.263626912223377, + "grad_norm": 1.3748942613601685, + "learning_rate": 4.736373087776623e-06, + "loss": 0.057, "step": 35440 }, { - "epoch": 2.632556067132036, - "grad_norm": 0.5184679627418518, - "learning_rate": 1.4204663597207784e-05, - "loss": 0.0403, + "epoch": 5.265112134264072, + "grad_norm": 0.3763701617717743, + "learning_rate": 4.734887865735928e-06, + "loss": 0.0622, "step": 35450 }, { - "epoch": 2.633298678152384, - "grad_norm": 0.7617425918579102, - "learning_rate": 1.4200207931085698e-05, - "loss": 0.0699, + "epoch": 5.266597356304768, + "grad_norm": 0.26023438572883606, + "learning_rate": 4.733402643695233e-06, + "loss": 0.0478, "step": 35460 }, { - "epoch": 2.6340412891727314, - "grad_norm": 1.4257124662399292, - "learning_rate": 1.4195752264963613e-05, - "loss": 0.0725, + "epoch": 5.268082578345463, + "grad_norm": 0.8818972706794739, + "learning_rate": 4.731917421654538e-06, + "loss": 0.0469, "step": 35470 }, { - "epoch": 2.634783900193079, - "grad_norm": 2.0177693367004395, - "learning_rate": 1.4191296598841526e-05, - "loss": 0.0693, + "epoch": 5.269567800386158, + "grad_norm": 0.8154204487800598, + "learning_rate": 4.730432199613842e-06, + "loss": 0.0468, "step": 35480 }, { - "epoch": 2.6355265112134263, - "grad_norm": 1.3134448528289795, - "learning_rate": 1.4186840932719443e-05, - "loss": 0.0739, + "epoch": 5.271053022426853, + "grad_norm": 0.6178163886070251, + "learning_rate": 4.728946977573148e-06, + "loss": 0.0577, "step": 35490 }, { - "epoch": 2.6362691222337737, - "grad_norm": 2.646014928817749, - "learning_rate": 1.4182385266597356e-05, - "loss": 0.0605, + "epoch": 5.2725382444675475, + "grad_norm": 0.4771282374858856, + "learning_rate": 4.727461755532453e-06, + "loss": 0.0541, "step": 35500 }, { - "epoch": 2.6370117332541216, - "grad_norm": 1.8550797700881958, - "learning_rate": 1.4177929600475271e-05, - "loss": 0.0577, + "epoch": 5.274023466508243, + "grad_norm": 0.9545867443084717, + "learning_rate": 4.725976533491757e-06, + "loss": 0.0833, "step": 35510 }, { - "epoch": 2.637754344274469, - "grad_norm": 1.6561418771743774, - "learning_rate": 1.4173473934353186e-05, - "loss": 0.0798, + "epoch": 5.275508688548938, + "grad_norm": 0.8452116847038269, + "learning_rate": 4.724491311451062e-06, + "loss": 0.061, "step": 35520 }, { - "epoch": 2.6384969552948165, - "grad_norm": 1.4984925985336304, - "learning_rate": 1.4169018268231101e-05, - "loss": 0.0764, + "epoch": 5.276993910589633, + "grad_norm": 0.7856245040893555, + "learning_rate": 4.723006089410367e-06, + "loss": 0.0682, "step": 35530 }, { - "epoch": 2.639239566315164, - "grad_norm": 2.3113274574279785, - "learning_rate": 1.4164562602109016e-05, - "loss": 0.0465, + "epoch": 5.278479132630328, + "grad_norm": 1.091416358947754, + "learning_rate": 4.721520867369672e-06, + "loss": 0.0579, "step": 35540 }, { - "epoch": 2.6399821773355114, - "grad_norm": 2.2579538822174072, - "learning_rate": 1.416010693598693e-05, - "loss": 0.0767, + "epoch": 5.279964354671023, + "grad_norm": 0.6916674971580505, + "learning_rate": 4.720035645328977e-06, + "loss": 0.0567, "step": 35550 }, { - "epoch": 2.6407247883558593, - "grad_norm": 1.6482024192810059, - "learning_rate": 1.4155651269864844e-05, - "loss": 0.0514, + "epoch": 5.281449576711719, + "grad_norm": 0.31788578629493713, + "learning_rate": 4.718550423288282e-06, + "loss": 0.053, "step": 35560 }, { - "epoch": 2.641467399376207, - "grad_norm": 2.0257744789123535, - "learning_rate": 1.4151195603742761e-05, - "loss": 0.0452, + "epoch": 5.282934798752414, + "grad_norm": 0.7865982055664062, + "learning_rate": 4.717065201247587e-06, + "loss": 0.0658, "step": 35570 }, { - "epoch": 2.6422100103965542, - "grad_norm": 1.083173394203186, - "learning_rate": 1.4146739937620674e-05, - "loss": 0.059, + "epoch": 5.2844200207931085, + "grad_norm": 1.0544800758361816, + "learning_rate": 4.715579979206892e-06, + "loss": 0.0736, "step": 35580 }, { - "epoch": 2.6429526214169017, - "grad_norm": 1.5998643636703491, - "learning_rate": 1.414228427149859e-05, - "loss": 0.0557, + "epoch": 5.285905242833803, + "grad_norm": 1.0144555568695068, + "learning_rate": 4.714094757166196e-06, + "loss": 0.0639, "step": 35590 }, { - "epoch": 2.643695232437249, - "grad_norm": 0.8439221382141113, - "learning_rate": 1.4137828605376504e-05, - "loss": 0.0582, + "epoch": 5.287390464874499, + "grad_norm": 0.7130341529846191, + "learning_rate": 4.712609535125502e-06, + "loss": 0.0569, "step": 35600 }, { - "epoch": 2.644437843457597, - "grad_norm": 2.156799793243408, - "learning_rate": 1.413337293925442e-05, - "loss": 0.0562, + "epoch": 5.288875686915194, + "grad_norm": 0.952660083770752, + "learning_rate": 4.711124313084807e-06, + "loss": 0.0642, "step": 35610 }, { - "epoch": 2.6451804544779445, - "grad_norm": 0.5263361930847168, - "learning_rate": 1.4128917273132334e-05, - "loss": 0.0844, + "epoch": 5.290360908955889, + "grad_norm": 0.8427135348320007, + "learning_rate": 4.709639091044111e-06, + "loss": 0.0543, "step": 35620 }, { - "epoch": 2.645923065498292, - "grad_norm": 2.5201919078826904, - "learning_rate": 1.4124461607010248e-05, - "loss": 0.0867, + "epoch": 5.291846130996584, + "grad_norm": 0.6305361390113831, + "learning_rate": 4.7081538690034164e-06, + "loss": 0.0621, "step": 35630 }, { - "epoch": 2.6466656765186394, - "grad_norm": 1.7519117593765259, - "learning_rate": 1.4120005940888164e-05, - "loss": 0.0578, + "epoch": 5.293331353037279, + "grad_norm": 1.340817928314209, + "learning_rate": 4.706668646962722e-06, + "loss": 0.0593, "step": 35640 }, { - "epoch": 2.647408287538987, - "grad_norm": 1.5524243116378784, - "learning_rate": 1.4115550274766078e-05, - "loss": 0.0662, + "epoch": 5.2948165750779745, + "grad_norm": 0.468432754278183, + "learning_rate": 4.705183424922026e-06, + "loss": 0.0495, "step": 35650 }, { - "epoch": 2.6481508985593347, - "grad_norm": 0.8685120344161987, - "learning_rate": 1.4111094608643993e-05, - "loss": 0.0509, + "epoch": 5.296301797118669, + "grad_norm": 0.8716316819190979, + "learning_rate": 4.7036982028813314e-06, + "loss": 0.0673, "step": 35660 }, { - "epoch": 2.648893509579682, - "grad_norm": 2.201120376586914, - "learning_rate": 1.4106638942521906e-05, - "loss": 0.057, + "epoch": 5.297787019159364, + "grad_norm": 0.9076347351074219, + "learning_rate": 4.702212980840636e-06, + "loss": 0.0635, "step": 35670 }, { - "epoch": 2.6496361206000296, - "grad_norm": 0.4596274197101593, - "learning_rate": 1.4102183276399823e-05, - "loss": 0.0714, + "epoch": 5.299272241200059, + "grad_norm": 0.8860725164413452, + "learning_rate": 4.700727758799941e-06, + "loss": 0.0555, "step": 35680 }, { - "epoch": 2.6503787316203775, - "grad_norm": 2.369061231613159, - "learning_rate": 1.4097727610277738e-05, - "loss": 0.05, + "epoch": 5.300757463240754, + "grad_norm": 1.1657594442367554, + "learning_rate": 4.699242536759246e-06, + "loss": 0.0633, "step": 35690 }, { - "epoch": 2.6511213426407245, - "grad_norm": 0.30310168862342834, - "learning_rate": 1.4093271944155651e-05, - "loss": 0.073, + "epoch": 5.30224268528145, + "grad_norm": 0.8625801205635071, + "learning_rate": 4.697757314718551e-06, + "loss": 0.0536, "step": 35700 }, { - "epoch": 2.6518639536610724, - "grad_norm": 0.8447324633598328, - "learning_rate": 1.4088816278033568e-05, - "loss": 0.0769, + "epoch": 5.303727907322145, + "grad_norm": 0.6598368883132935, + "learning_rate": 4.696272092677856e-06, + "loss": 0.0477, "step": 35710 }, { - "epoch": 2.65260656468142, - "grad_norm": 1.5331531763076782, - "learning_rate": 1.4084360611911481e-05, - "loss": 0.0837, + "epoch": 5.30521312936284, + "grad_norm": 0.6790146827697754, + "learning_rate": 4.6947868706371606e-06, + "loss": 0.0618, "step": 35720 }, { - "epoch": 2.6533491757017673, - "grad_norm": 0.9283561110496521, - "learning_rate": 1.4079904945789396e-05, - "loss": 0.0608, + "epoch": 5.306698351403535, + "grad_norm": 1.139267921447754, + "learning_rate": 4.693301648596466e-06, + "loss": 0.0502, "step": 35730 }, { - "epoch": 2.654091786722115, - "grad_norm": 1.556794285774231, - "learning_rate": 1.4075449279667311e-05, - "loss": 0.0839, + "epoch": 5.3081835734442295, + "grad_norm": 0.5348055362701416, + "learning_rate": 4.69181642655577e-06, + "loss": 0.0546, "step": 35740 }, { - "epoch": 2.6548343977424627, - "grad_norm": 2.0326085090637207, - "learning_rate": 1.4070993613545226e-05, - "loss": 0.079, + "epoch": 5.309668795484925, + "grad_norm": 0.9072430729866028, + "learning_rate": 4.6903312045150756e-06, + "loss": 0.0733, "step": 35750 }, { - "epoch": 2.65557700876281, - "grad_norm": 1.1070688962936401, - "learning_rate": 1.406653794742314e-05, - "loss": 0.0598, + "epoch": 5.31115401752562, + "grad_norm": 0.2694229483604431, + "learning_rate": 4.68884598247438e-06, + "loss": 0.0471, "step": 35760 }, { - "epoch": 2.6563196197831576, - "grad_norm": 2.687786817550659, - "learning_rate": 1.4062082281301054e-05, - "loss": 0.0528, + "epoch": 5.312639239566315, + "grad_norm": 0.8150109648704529, + "learning_rate": 4.687360760433685e-06, + "loss": 0.0565, "step": 35770 }, { - "epoch": 2.657062230803505, - "grad_norm": 2.0500941276550293, - "learning_rate": 1.4057626615178969e-05, - "loss": 0.0586, + "epoch": 5.31412446160701, + "grad_norm": 0.405373752117157, + "learning_rate": 4.68587553839299e-06, + "loss": 0.0637, "step": 35780 }, { - "epoch": 2.657804841823853, - "grad_norm": 1.7865089178085327, - "learning_rate": 1.4053170949056884e-05, - "loss": 0.0816, + "epoch": 5.315609683647705, + "grad_norm": 1.0095162391662598, + "learning_rate": 4.684390316352295e-06, + "loss": 0.054, "step": 35790 }, { - "epoch": 2.6585474528442004, - "grad_norm": 1.018984317779541, - "learning_rate": 1.4048715282934799e-05, - "loss": 0.0338, + "epoch": 5.317094905688401, + "grad_norm": 0.8304887413978577, + "learning_rate": 4.6829050943116e-06, + "loss": 0.0648, "step": 35800 }, { - "epoch": 2.659290063864548, - "grad_norm": 0.3521486520767212, - "learning_rate": 1.4044259616812714e-05, - "loss": 0.0549, + "epoch": 5.318580127729096, + "grad_norm": 0.7152771353721619, + "learning_rate": 4.681419872270905e-06, + "loss": 0.0592, "step": 35810 }, { - "epoch": 2.6600326748848953, - "grad_norm": 2.4242541790008545, - "learning_rate": 1.4039803950690627e-05, - "loss": 0.1002, + "epoch": 5.3200653497697905, + "grad_norm": 0.6170614957809448, + "learning_rate": 4.679934650230209e-06, + "loss": 0.0611, "step": 35820 }, { - "epoch": 2.6607752859052427, - "grad_norm": 1.1004574298858643, - "learning_rate": 1.4035348284568544e-05, - "loss": 0.0457, + "epoch": 5.321550571810485, + "grad_norm": 0.7695423364639282, + "learning_rate": 4.678449428189515e-06, + "loss": 0.0759, "step": 35830 }, { - "epoch": 2.6615178969255906, - "grad_norm": 1.469435214996338, - "learning_rate": 1.4030892618446457e-05, - "loss": 0.0819, + "epoch": 5.323035793851181, + "grad_norm": 0.4829961061477661, + "learning_rate": 4.67696420614882e-06, + "loss": 0.0591, "step": 35840 }, { - "epoch": 2.662260507945938, - "grad_norm": 0.20133927464485168, - "learning_rate": 1.4026436952324372e-05, - "loss": 0.0591, + "epoch": 5.324521015891876, + "grad_norm": 0.9738242030143738, + "learning_rate": 4.675478984108124e-06, + "loss": 0.0698, "step": 35850 }, { - "epoch": 2.6630031189662855, - "grad_norm": 1.4048292636871338, - "learning_rate": 1.4021981286202289e-05, - "loss": 0.0562, + "epoch": 5.326006237932571, + "grad_norm": 0.6264966726303101, + "learning_rate": 4.673993762067429e-06, + "loss": 0.0598, "step": 35860 }, { - "epoch": 2.663745729986633, - "grad_norm": 5.874467849731445, - "learning_rate": 1.4017525620080202e-05, - "loss": 0.0712, + "epoch": 5.327491459973266, + "grad_norm": 1.0743324756622314, + "learning_rate": 4.672508540026735e-06, + "loss": 0.068, "step": 35870 }, { - "epoch": 2.6644883410069804, - "grad_norm": 1.0913431644439697, - "learning_rate": 1.4013069953958117e-05, - "loss": 0.0703, + "epoch": 5.328976682013961, + "grad_norm": 0.896522581577301, + "learning_rate": 4.671023317986039e-06, + "loss": 0.0622, "step": 35880 }, { - "epoch": 2.6652309520273283, - "grad_norm": 0.3893365263938904, - "learning_rate": 1.400861428783603e-05, - "loss": 0.0593, + "epoch": 5.330461904054657, + "grad_norm": 0.8270133137702942, + "learning_rate": 4.669538095945344e-06, + "loss": 0.0661, "step": 35890 }, { - "epoch": 2.6659735630476757, - "grad_norm": 1.0970251560211182, - "learning_rate": 1.4004158621713947e-05, - "loss": 0.0716, + "epoch": 5.3319471260953515, + "grad_norm": 1.0910258293151855, + "learning_rate": 4.66805287390465e-06, + "loss": 0.0696, "step": 35900 }, { - "epoch": 2.666716174068023, - "grad_norm": 1.392922282218933, - "learning_rate": 1.3999702955591862e-05, - "loss": 0.0589, + "epoch": 5.333432348136046, + "grad_norm": 1.075138807296753, + "learning_rate": 4.666567651863954e-06, + "loss": 0.065, "step": 35910 }, { - "epoch": 2.6674587850883706, - "grad_norm": 0.6275796890258789, - "learning_rate": 1.3995247289469776e-05, - "loss": 0.037, + "epoch": 5.334917570176741, + "grad_norm": 0.6105763912200928, + "learning_rate": 4.6650824298232586e-06, + "loss": 0.0448, "step": 35920 }, { - "epoch": 2.668201396108718, - "grad_norm": 1.6049987077713013, - "learning_rate": 1.399079162334769e-05, - "loss": 0.0566, + "epoch": 5.336402792217436, + "grad_norm": 0.7854593396186829, + "learning_rate": 4.663597207782564e-06, + "loss": 0.0475, "step": 35930 }, { - "epoch": 2.668944007129066, - "grad_norm": 1.582099199295044, - "learning_rate": 1.3986335957225606e-05, - "loss": 0.0689, + "epoch": 5.337888014258132, + "grad_norm": 0.24348467588424683, + "learning_rate": 4.662111985741869e-06, + "loss": 0.0578, "step": 35940 }, { - "epoch": 2.6696866181494134, - "grad_norm": 0.8754908442497253, - "learning_rate": 1.398188029110352e-05, - "loss": 0.0926, + "epoch": 5.339373236298827, + "grad_norm": 0.2796478867530823, + "learning_rate": 4.6606267637011736e-06, + "loss": 0.0547, "step": 35950 }, { - "epoch": 2.670429229169761, - "grad_norm": 2.45027756690979, - "learning_rate": 1.3977424624981434e-05, - "loss": 0.0468, + "epoch": 5.340858458339522, + "grad_norm": 0.6760820150375366, + "learning_rate": 4.659141541660479e-06, + "loss": 0.0567, "step": 35960 }, { - "epoch": 2.6711718401901083, - "grad_norm": 1.8053902387619019, - "learning_rate": 1.3972968958859349e-05, - "loss": 0.0489, + "epoch": 5.342343680380217, + "grad_norm": 0.9601134657859802, + "learning_rate": 4.657656319619783e-06, + "loss": 0.0468, "step": 35970 }, { - "epoch": 2.671914451210456, - "grad_norm": 3.0964303016662598, - "learning_rate": 1.3968513292737266e-05, - "loss": 0.0834, + "epoch": 5.343828902420912, + "grad_norm": 1.3426578044891357, + "learning_rate": 4.6561710975790885e-06, + "loss": 0.0719, "step": 35980 }, { - "epoch": 2.6726570622308037, - "grad_norm": 2.1275410652160645, - "learning_rate": 1.3964057626615179e-05, - "loss": 0.0938, + "epoch": 5.345314124461607, + "grad_norm": 1.429280400276184, + "learning_rate": 4.654685875538393e-06, + "loss": 0.0747, "step": 35990 }, { - "epoch": 2.673399673251151, - "grad_norm": 0.8171222805976868, - "learning_rate": 1.3959601960493094e-05, - "loss": 0.0669, + "epoch": 5.346799346502302, + "grad_norm": 0.47933143377304077, + "learning_rate": 4.653200653497698e-06, + "loss": 0.0514, "step": 36000 }, { - "epoch": 2.6741422842714986, - "grad_norm": 3.5696773529052734, - "learning_rate": 1.3955146294371009e-05, - "loss": 0.0935, + "epoch": 5.348284568542997, + "grad_norm": 0.8363801836967468, + "learning_rate": 4.6517154314570035e-06, + "loss": 0.0608, "step": 36010 }, { - "epoch": 2.674884895291846, - "grad_norm": 1.2515684366226196, - "learning_rate": 1.3950690628248924e-05, - "loss": 0.0902, + "epoch": 5.349769790583692, + "grad_norm": 1.23948073387146, + "learning_rate": 4.650230209416308e-06, + "loss": 0.062, "step": 36020 }, { - "epoch": 2.6756275063121935, - "grad_norm": 3.313480854034424, - "learning_rate": 1.3946234962126839e-05, - "loss": 0.0592, + "epoch": 5.351255012624387, + "grad_norm": 0.8158882260322571, + "learning_rate": 4.648744987375613e-06, + "loss": 0.0518, "step": 36030 }, { - "epoch": 2.6763701173325414, - "grad_norm": 0.7134093046188354, - "learning_rate": 1.3941779296004752e-05, - "loss": 0.0455, + "epoch": 5.352740234665083, + "grad_norm": 1.212444543838501, + "learning_rate": 4.647259765334918e-06, + "loss": 0.0672, "step": 36040 }, { - "epoch": 2.677112728352889, - "grad_norm": 0.5225452184677124, - "learning_rate": 1.3937323629882669e-05, - "loss": 0.0487, + "epoch": 5.354225456705778, + "grad_norm": 0.9097556471824646, + "learning_rate": 4.645774543294223e-06, + "loss": 0.0586, "step": 36050 }, { - "epoch": 2.6778553393732363, - "grad_norm": 0.6673758625984192, - "learning_rate": 1.3932867963760582e-05, - "loss": 0.079, + "epoch": 5.3557106787464726, + "grad_norm": 0.4835580587387085, + "learning_rate": 4.644289321253528e-06, + "loss": 0.04, "step": 36060 }, { - "epoch": 2.6785979503935837, - "grad_norm": 2.7382137775421143, - "learning_rate": 1.3928412297638497e-05, - "loss": 0.0757, + "epoch": 5.3571959007871675, + "grad_norm": 1.1032158136367798, + "learning_rate": 4.642804099212833e-06, + "loss": 0.0684, "step": 36070 }, { - "epoch": 2.679340561413931, - "grad_norm": 0.9488750696182251, - "learning_rate": 1.392395663151641e-05, - "loss": 0.0542, + "epoch": 5.358681122827862, + "grad_norm": 0.4809795320034027, + "learning_rate": 4.641318877172137e-06, + "loss": 0.0545, "step": 36080 }, { - "epoch": 2.680083172434279, - "grad_norm": 4.239487648010254, - "learning_rate": 1.3919500965394327e-05, - "loss": 0.0986, + "epoch": 5.360166344868558, + "grad_norm": 1.5494866371154785, + "learning_rate": 4.639833655131442e-06, + "loss": 0.0663, "step": 36090 }, { - "epoch": 2.6808257834546265, - "grad_norm": 1.1247403621673584, - "learning_rate": 1.3915045299272242e-05, - "loss": 0.0535, + "epoch": 5.361651566909253, + "grad_norm": 1.4795058965682983, + "learning_rate": 4.638348433090748e-06, + "loss": 0.0548, "step": 36100 }, { - "epoch": 2.681568394474974, - "grad_norm": 1.4115970134735107, - "learning_rate": 1.3910589633150155e-05, - "loss": 0.0712, + "epoch": 5.363136788949948, + "grad_norm": 0.5212380886077881, + "learning_rate": 4.636863211050052e-06, + "loss": 0.0475, "step": 36110 }, { - "epoch": 2.6823110054953214, - "grad_norm": 1.4354156255722046, - "learning_rate": 1.3906133967028072e-05, - "loss": 0.0612, + "epoch": 5.364622010990643, + "grad_norm": 0.1950119435787201, + "learning_rate": 4.635377989009357e-06, + "loss": 0.0487, "step": 36120 }, { - "epoch": 2.683053616515669, - "grad_norm": 0.6323860287666321, - "learning_rate": 1.3901678300905985e-05, - "loss": 0.0839, + "epoch": 5.366107233031338, + "grad_norm": 0.8038704991340637, + "learning_rate": 4.633892766968663e-06, + "loss": 0.0734, "step": 36130 }, { - "epoch": 2.6837962275360168, - "grad_norm": 1.8889610767364502, - "learning_rate": 1.38972226347839e-05, - "loss": 0.0528, + "epoch": 5.3675924550720335, + "grad_norm": 0.8529590368270874, + "learning_rate": 4.632407544927967e-06, + "loss": 0.0549, "step": 36140 }, { - "epoch": 2.684538838556364, - "grad_norm": 1.292384386062622, - "learning_rate": 1.3892766968661815e-05, - "loss": 0.0699, + "epoch": 5.369077677112728, + "grad_norm": 0.5382302403450012, + "learning_rate": 4.6309223228872715e-06, + "loss": 0.0477, "step": 36150 }, { - "epoch": 2.6852814495767117, - "grad_norm": 1.048690676689148, - "learning_rate": 1.388831130253973e-05, - "loss": 0.0679, + "epoch": 5.370562899153423, + "grad_norm": 0.9232028126716614, + "learning_rate": 4.629437100846577e-06, + "loss": 0.0633, "step": 36160 }, { - "epoch": 2.686024060597059, - "grad_norm": 0.9815926551818848, - "learning_rate": 1.3883855636417645e-05, - "loss": 0.0747, + "epoch": 5.372048121194118, + "grad_norm": 1.1026906967163086, + "learning_rate": 4.627951878805882e-06, + "loss": 0.0441, "step": 36170 }, { - "epoch": 2.6867666716174066, - "grad_norm": 0.6208893060684204, - "learning_rate": 1.3879399970295559e-05, - "loss": 0.0517, + "epoch": 5.373533343234814, + "grad_norm": 1.0024158954620361, + "learning_rate": 4.6264666567651865e-06, + "loss": 0.063, "step": 36180 }, { - "epoch": 2.6875092826377545, - "grad_norm": 1.4446412324905396, - "learning_rate": 1.3874944304173474e-05, - "loss": 0.0549, + "epoch": 5.375018565275509, + "grad_norm": 0.37517932057380676, + "learning_rate": 4.624981434724492e-06, + "loss": 0.0393, "step": 36190 }, { - "epoch": 2.688251893658102, - "grad_norm": 0.39226940274238586, - "learning_rate": 1.3870488638051389e-05, - "loss": 0.0788, + "epoch": 5.376503787316204, + "grad_norm": 0.7823379635810852, + "learning_rate": 4.623496212683797e-06, + "loss": 0.0707, "step": 36200 }, { - "epoch": 2.6889945046784494, - "grad_norm": 1.446395754814148, - "learning_rate": 1.3866032971929304e-05, - "loss": 0.1031, + "epoch": 5.377989009356899, + "grad_norm": 0.8410983085632324, + "learning_rate": 4.6220109906431015e-06, + "loss": 0.0684, "step": 36210 }, { - "epoch": 2.689737115698797, - "grad_norm": 1.849453330039978, - "learning_rate": 1.3861577305807219e-05, - "loss": 0.0683, + "epoch": 5.379474231397594, + "grad_norm": 1.6391550302505493, + "learning_rate": 4.620525768602406e-06, + "loss": 0.0602, "step": 36220 }, { - "epoch": 2.6904797267191443, - "grad_norm": 1.7892287969589233, - "learning_rate": 1.3857121639685132e-05, - "loss": 0.0647, + "epoch": 5.380959453438289, + "grad_norm": 0.5223180055618286, + "learning_rate": 4.619040546561711e-06, + "loss": 0.0639, "step": 36230 }, { - "epoch": 2.691222337739492, - "grad_norm": 0.9724948406219482, - "learning_rate": 1.3852665973563049e-05, - "loss": 0.0798, + "epoch": 5.382444675478984, + "grad_norm": 0.6970897912979126, + "learning_rate": 4.6175553245210165e-06, + "loss": 0.0557, "step": 36240 }, { - "epoch": 2.6919649487598396, - "grad_norm": 1.3628493547439575, - "learning_rate": 1.3848210307440962e-05, - "loss": 0.0556, + "epoch": 5.383929897519679, + "grad_norm": 0.8759315013885498, + "learning_rate": 4.616070102480321e-06, + "loss": 0.0658, "step": 36250 }, { - "epoch": 2.692707559780187, - "grad_norm": 1.9433894157409668, - "learning_rate": 1.3843754641318877e-05, - "loss": 0.0985, + "epoch": 5.385415119560374, + "grad_norm": 1.5353213548660278, + "learning_rate": 4.614584880439626e-06, + "loss": 0.0635, "step": 36260 }, { - "epoch": 2.693450170800535, - "grad_norm": 1.5981539487838745, - "learning_rate": 1.3839298975196794e-05, - "loss": 0.0582, + "epoch": 5.386900341601069, + "grad_norm": 1.4542258977890015, + "learning_rate": 4.6130996583989315e-06, + "loss": 0.0652, "step": 36270 }, { - "epoch": 2.694192781820882, - "grad_norm": 0.5563480257987976, - "learning_rate": 1.3834843309074707e-05, - "loss": 0.0417, + "epoch": 5.388385563641765, + "grad_norm": 1.3767975568771362, + "learning_rate": 4.611614436358236e-06, + "loss": 0.0725, "step": 36280 }, { - "epoch": 2.69493539284123, - "grad_norm": 0.9371748566627502, - "learning_rate": 1.3830387642952622e-05, - "loss": 0.0775, + "epoch": 5.38987078568246, + "grad_norm": 0.8995253443717957, + "learning_rate": 4.61012921431754e-06, + "loss": 0.0785, "step": 36290 }, { - "epoch": 2.6956780038615773, - "grad_norm": 1.239343523979187, - "learning_rate": 1.3825931976830535e-05, - "loss": 0.0469, + "epoch": 5.391356007723155, + "grad_norm": 1.5716017484664917, + "learning_rate": 4.608643992276846e-06, + "loss": 0.0588, "step": 36300 }, { - "epoch": 2.6964206148819247, - "grad_norm": 0.48741811513900757, - "learning_rate": 1.3821476310708452e-05, - "loss": 0.0756, + "epoch": 5.3928412297638495, + "grad_norm": 1.171876072883606, + "learning_rate": 4.607158770236151e-06, + "loss": 0.0603, "step": 36310 }, { - "epoch": 2.6971632259022726, - "grad_norm": 3.158456802368164, - "learning_rate": 1.3817020644586367e-05, - "loss": 0.0276, + "epoch": 5.394326451804544, + "grad_norm": 1.5212937593460083, + "learning_rate": 4.605673548195455e-06, + "loss": 0.0568, "step": 36320 }, { - "epoch": 2.69790583692262, - "grad_norm": 1.2816053628921509, - "learning_rate": 1.381256497846428e-05, - "loss": 0.0553, + "epoch": 5.39581167384524, + "grad_norm": 0.8403087854385376, + "learning_rate": 4.604188326154761e-06, + "loss": 0.0756, "step": 36330 }, { - "epoch": 2.6986484479429675, - "grad_norm": 0.4111814796924591, - "learning_rate": 1.3808109312342195e-05, - "loss": 0.0452, + "epoch": 5.397296895885935, + "grad_norm": 1.2481799125671387, + "learning_rate": 4.602703104114065e-06, + "loss": 0.0688, "step": 36340 }, { - "epoch": 2.699391058963315, - "grad_norm": 1.7918380498886108, - "learning_rate": 1.380365364622011e-05, - "loss": 0.0432, + "epoch": 5.39878211792663, + "grad_norm": 1.127144455909729, + "learning_rate": 4.60121788207337e-06, + "loss": 0.0506, "step": 36350 }, { - "epoch": 2.7001336699836624, - "grad_norm": 1.718360424041748, - "learning_rate": 1.3799197980098025e-05, - "loss": 0.1107, + "epoch": 5.400267339967325, + "grad_norm": 0.590782880783081, + "learning_rate": 4.599732660032676e-06, + "loss": 0.0678, "step": 36360 }, { - "epoch": 2.7008762810040103, - "grad_norm": 0.892610490322113, - "learning_rate": 1.3794742313975939e-05, - "loss": 0.0709, + "epoch": 5.40175256200802, + "grad_norm": 0.8410061597824097, + "learning_rate": 4.59824743799198e-06, + "loss": 0.0753, "step": 36370 }, { - "epoch": 2.701618892024358, - "grad_norm": 0.5579351186752319, - "learning_rate": 1.3790286647853855e-05, - "loss": 0.048, + "epoch": 5.403237784048716, + "grad_norm": 1.1885724067687988, + "learning_rate": 4.596762215951285e-06, + "loss": 0.0668, "step": 36380 }, { - "epoch": 2.7023615030447052, - "grad_norm": 0.5769586563110352, - "learning_rate": 1.378583098173177e-05, - "loss": 0.0572, + "epoch": 5.4047230060894105, + "grad_norm": 0.5760040879249573, + "learning_rate": 4.59527699391059e-06, + "loss": 0.0651, "step": 36390 }, { - "epoch": 2.7031041140650527, - "grad_norm": 2.853304624557495, - "learning_rate": 1.3781375315609684e-05, - "loss": 0.0766, + "epoch": 5.406208228130105, + "grad_norm": 0.9846057891845703, + "learning_rate": 4.593791771869895e-06, + "loss": 0.0591, "step": 36400 }, { - "epoch": 2.7038467250854, - "grad_norm": 3.345918655395508, - "learning_rate": 1.3776919649487599e-05, - "loss": 0.0761, + "epoch": 5.4076934501708, + "grad_norm": 0.9451662302017212, + "learning_rate": 4.5923065498291995e-06, + "loss": 0.0646, "step": 36410 }, { - "epoch": 2.704589336105748, - "grad_norm": 1.423073649406433, - "learning_rate": 1.3772463983365514e-05, - "loss": 0.0698, + "epoch": 5.409178672211496, + "grad_norm": 0.6508313417434692, + "learning_rate": 4.590821327788505e-06, + "loss": 0.0575, "step": 36420 }, { - "epoch": 2.7053319471260955, - "grad_norm": 2.362412929534912, - "learning_rate": 1.3768008317243429e-05, - "loss": 0.08, + "epoch": 5.410663894252191, + "grad_norm": 0.5562500357627869, + "learning_rate": 4.58933610574781e-06, + "loss": 0.0596, "step": 36430 }, { - "epoch": 2.706074558146443, - "grad_norm": 2.3598601818084717, - "learning_rate": 1.3763552651121344e-05, - "loss": 0.0829, + "epoch": 5.412149116292886, + "grad_norm": 0.43210306763648987, + "learning_rate": 4.5878508837071145e-06, + "loss": 0.0646, "step": 36440 }, { - "epoch": 2.7068171691667904, - "grad_norm": 1.5622998476028442, - "learning_rate": 1.3759096984999257e-05, - "loss": 0.0447, + "epoch": 5.413634338333581, + "grad_norm": 0.8571392893791199, + "learning_rate": 4.586365661666419e-06, + "loss": 0.0658, "step": 36450 }, { - "epoch": 2.707559780187138, - "grad_norm": 0.6126532554626465, - "learning_rate": 1.3754641318877173e-05, - "loss": 0.0703, + "epoch": 5.415119560374276, + "grad_norm": 0.7892032265663147, + "learning_rate": 4.584880439625724e-06, + "loss": 0.0602, "step": 36460 }, { - "epoch": 2.7083023912074857, - "grad_norm": 1.302445411682129, - "learning_rate": 1.3750185652755087e-05, - "loss": 0.0636, + "epoch": 5.4166047824149715, + "grad_norm": 0.21121440827846527, + "learning_rate": 4.5833952175850295e-06, + "loss": 0.039, "step": 36470 }, { - "epoch": 2.709045002227833, - "grad_norm": 0.8420956134796143, - "learning_rate": 1.3745729986633002e-05, - "loss": 0.0789, + "epoch": 5.418090004455666, + "grad_norm": 0.858177900314331, + "learning_rate": 4.581909995544334e-06, + "loss": 0.0549, "step": 36480 }, { - "epoch": 2.7097876132481806, - "grad_norm": 1.0522314310073853, - "learning_rate": 1.3741274320510915e-05, - "loss": 0.0619, + "epoch": 5.419575226496361, + "grad_norm": 1.2030284404754639, + "learning_rate": 4.580424773503639e-06, + "loss": 0.0565, "step": 36490 }, { - "epoch": 2.710530224268528, - "grad_norm": 1.1721168756484985, - "learning_rate": 1.3736818654388832e-05, - "loss": 0.047, + "epoch": 5.421060448537056, + "grad_norm": 1.4121376276016235, + "learning_rate": 4.5789395514629445e-06, + "loss": 0.066, "step": 36500 }, { - "epoch": 2.7112728352888755, - "grad_norm": 1.1313562393188477, - "learning_rate": 1.3732362988266747e-05, - "loss": 0.0793, + "epoch": 5.422545670577751, + "grad_norm": 1.6546800136566162, + "learning_rate": 4.577454329422249e-06, + "loss": 0.0705, "step": 36510 }, { - "epoch": 2.7120154463092234, - "grad_norm": 0.656134307384491, - "learning_rate": 1.372790732214466e-05, - "loss": 0.0709, + "epoch": 5.424030892618447, + "grad_norm": 1.0694074630737305, + "learning_rate": 4.575969107381553e-06, + "loss": 0.0566, "step": 36520 }, { - "epoch": 2.712758057329571, - "grad_norm": 0.298880934715271, - "learning_rate": 1.3723451656022577e-05, - "loss": 0.0516, + "epoch": 5.425516114659142, + "grad_norm": 1.0429729223251343, + "learning_rate": 4.5744838853408595e-06, + "loss": 0.0597, "step": 36530 }, { - "epoch": 2.7135006683499183, - "grad_norm": 0.7972229719161987, - "learning_rate": 1.371899598990049e-05, - "loss": 0.0448, + "epoch": 5.427001336699837, + "grad_norm": 0.5231200456619263, + "learning_rate": 4.572998663300164e-06, + "loss": 0.0443, "step": 36540 }, { - "epoch": 2.7142432793702658, - "grad_norm": 1.8355180025100708, - "learning_rate": 1.3714540323778405e-05, - "loss": 0.0938, + "epoch": 5.4284865587405315, + "grad_norm": 0.4253857135772705, + "learning_rate": 4.571513441259468e-06, + "loss": 0.0384, "step": 36550 }, { - "epoch": 2.714985890390613, - "grad_norm": 1.5195986032485962, - "learning_rate": 1.371008465765632e-05, - "loss": 0.0733, + "epoch": 5.429971780781226, + "grad_norm": 0.5469606518745422, + "learning_rate": 4.570028219218774e-06, + "loss": 0.0416, "step": 36560 }, { - "epoch": 2.715728501410961, - "grad_norm": 0.950968861579895, - "learning_rate": 1.3705628991534235e-05, - "loss": 0.038, + "epoch": 5.431457002821922, + "grad_norm": 0.6530857682228088, + "learning_rate": 4.568542997178079e-06, + "loss": 0.063, "step": 36570 }, { - "epoch": 2.7164711124313086, - "grad_norm": 2.1783084869384766, - "learning_rate": 1.370117332541215e-05, - "loss": 0.0732, + "epoch": 5.432942224862617, + "grad_norm": 1.0885331630706787, + "learning_rate": 4.567057775137383e-06, + "loss": 0.0609, "step": 36580 }, { - "epoch": 2.717213723451656, - "grad_norm": 4.514249801635742, - "learning_rate": 1.3696717659290063e-05, - "loss": 0.0534, + "epoch": 5.434427446903312, + "grad_norm": 0.5104076862335205, + "learning_rate": 4.565572553096689e-06, + "loss": 0.0596, "step": 36590 }, { - "epoch": 2.7179563344720035, - "grad_norm": 1.7248497009277344, - "learning_rate": 1.3692261993167978e-05, - "loss": 0.0703, + "epoch": 5.435912668944007, + "grad_norm": 0.8749592900276184, + "learning_rate": 4.564087331055993e-06, + "loss": 0.055, "step": 36600 }, { - "epoch": 2.718698945492351, - "grad_norm": 2.7249276638031006, - "learning_rate": 1.3687806327045895e-05, - "loss": 0.0967, + "epoch": 5.437397890984702, + "grad_norm": 0.5066226720809937, + "learning_rate": 4.562602109015298e-06, + "loss": 0.0689, "step": 36610 }, { - "epoch": 2.719441556512699, - "grad_norm": 0.6530225276947021, - "learning_rate": 1.3683350660923808e-05, - "loss": 0.0476, + "epoch": 5.438883113025398, + "grad_norm": 0.8218633532524109, + "learning_rate": 4.561116886974603e-06, + "loss": 0.0722, "step": 36620 }, { - "epoch": 2.7201841675330463, - "grad_norm": 0.6490178108215332, - "learning_rate": 1.3678894994801723e-05, - "loss": 0.0502, + "epoch": 5.4403683350660925, + "grad_norm": 0.7993268370628357, + "learning_rate": 4.559631664933908e-06, + "loss": 0.0672, "step": 36630 }, { - "epoch": 2.7209267785533937, - "grad_norm": 2.1632354259490967, - "learning_rate": 1.3674439328679638e-05, - "loss": 0.0634, + "epoch": 5.441853557106787, + "grad_norm": 0.9641051292419434, + "learning_rate": 4.558146442893213e-06, + "loss": 0.0642, "step": 36640 }, { - "epoch": 2.721669389573741, - "grad_norm": 0.618598997592926, - "learning_rate": 1.3669983662557553e-05, - "loss": 0.0631, + "epoch": 5.443338779147482, + "grad_norm": 1.2114911079406738, + "learning_rate": 4.556661220852518e-06, + "loss": 0.0732, "step": 36650 }, { - "epoch": 2.7224120005940886, - "grad_norm": 0.579268753528595, - "learning_rate": 1.3665527996435467e-05, - "loss": 0.0632, + "epoch": 5.444824001188177, + "grad_norm": 1.2579625844955444, + "learning_rate": 4.555175998811823e-06, + "loss": 0.0651, "step": 36660 }, { - "epoch": 2.7231546116144365, - "grad_norm": 0.623193085193634, - "learning_rate": 1.3661072330313382e-05, - "loss": 0.074, + "epoch": 5.446309223228873, + "grad_norm": 1.0065919160842896, + "learning_rate": 4.5536907767711275e-06, + "loss": 0.0627, "step": 36670 }, { - "epoch": 2.723897222634784, - "grad_norm": 0.6630807518959045, - "learning_rate": 1.3656616664191298e-05, - "loss": 0.0574, + "epoch": 5.447794445269568, + "grad_norm": 0.6958107352256775, + "learning_rate": 4.552205554730433e-06, + "loss": 0.0598, "step": 36680 }, { - "epoch": 2.7246398336551314, - "grad_norm": 1.1906079053878784, - "learning_rate": 1.3652160998069212e-05, - "loss": 0.0833, + "epoch": 5.449279667310263, + "grad_norm": 1.2037479877471924, + "learning_rate": 4.550720332689737e-06, + "loss": 0.0825, "step": 36690 }, { - "epoch": 2.725382444675479, - "grad_norm": 0.7799108624458313, - "learning_rate": 1.3647705331947127e-05, - "loss": 0.0696, + "epoch": 5.450764889350958, + "grad_norm": 1.2185901403427124, + "learning_rate": 4.5492351106490425e-06, + "loss": 0.0478, "step": 36700 }, { - "epoch": 2.7261250556958263, - "grad_norm": 0.28752097487449646, - "learning_rate": 1.364324966582504e-05, - "loss": 0.0523, + "epoch": 5.452250111391653, + "grad_norm": 0.9952356219291687, + "learning_rate": 4.547749888608347e-06, + "loss": 0.0644, "step": 36710 }, { - "epoch": 2.726867666716174, - "grad_norm": 1.7490395307540894, - "learning_rate": 1.3638793999702957e-05, - "loss": 0.0729, + "epoch": 5.453735333432348, + "grad_norm": 1.9384931325912476, + "learning_rate": 4.546264666567652e-06, + "loss": 0.0546, "step": 36720 }, { - "epoch": 2.7276102777365216, - "grad_norm": 0.7951035499572754, - "learning_rate": 1.3634338333580872e-05, - "loss": 0.0777, + "epoch": 5.455220555473043, + "grad_norm": 0.6183782815933228, + "learning_rate": 4.5447794445269575e-06, + "loss": 0.0505, "step": 36730 }, { - "epoch": 2.728352888756869, - "grad_norm": 1.3298048973083496, - "learning_rate": 1.3629882667458785e-05, - "loss": 0.0631, + "epoch": 5.456705777513738, + "grad_norm": 0.5289264917373657, + "learning_rate": 4.543294222486262e-06, + "loss": 0.0601, "step": 36740 }, { - "epoch": 2.7290954997772165, - "grad_norm": 0.4356074929237366, - "learning_rate": 1.36254270013367e-05, - "loss": 0.0439, + "epoch": 5.458190999554433, + "grad_norm": 0.41108959913253784, + "learning_rate": 4.541809000445566e-06, + "loss": 0.0548, "step": 36750 }, { - "epoch": 2.729838110797564, - "grad_norm": 0.5063789486885071, - "learning_rate": 1.3620971335214615e-05, - "loss": 0.0666, + "epoch": 5.459676221595129, + "grad_norm": 0.7227884531021118, + "learning_rate": 4.540323778404872e-06, + "loss": 0.0673, "step": 36760 }, { - "epoch": 2.730580721817912, - "grad_norm": 2.056678533554077, - "learning_rate": 1.361651566909253e-05, - "loss": 0.078, + "epoch": 5.461161443635824, + "grad_norm": 0.6556276082992554, + "learning_rate": 4.538838556364177e-06, + "loss": 0.0544, "step": 36770 }, { - "epoch": 2.7313233328382593, - "grad_norm": 1.4419987201690674, - "learning_rate": 1.3612060002970443e-05, - "loss": 0.0662, + "epoch": 5.462646665676519, + "grad_norm": 1.0905522108078003, + "learning_rate": 4.537353334323481e-06, + "loss": 0.0686, "step": 36780 }, { - "epoch": 2.732065943858607, - "grad_norm": 1.547361969947815, - "learning_rate": 1.360760433684836e-05, - "loss": 0.046, + "epoch": 5.464131887717214, + "grad_norm": 0.9823871850967407, + "learning_rate": 4.535868112282787e-06, + "loss": 0.0595, "step": 36790 }, { - "epoch": 2.7328085548789542, - "grad_norm": 2.8562614917755127, - "learning_rate": 1.3603148670726275e-05, - "loss": 0.0744, + "epoch": 5.4656171097579085, + "grad_norm": 0.3967541754245758, + "learning_rate": 4.534382890242092e-06, + "loss": 0.0665, "step": 36800 }, { - "epoch": 2.7335511658993017, - "grad_norm": 3.6716110706329346, - "learning_rate": 1.3598693004604188e-05, - "loss": 0.0709, + "epoch": 5.467102331798604, + "grad_norm": 0.5711907744407654, + "learning_rate": 4.532897668201396e-06, + "loss": 0.0667, "step": 36810 }, { - "epoch": 2.7342937769196496, - "grad_norm": 1.2131446599960327, - "learning_rate": 1.3594237338482103e-05, - "loss": 0.0557, + "epoch": 5.468587553839299, + "grad_norm": 0.9890785813331604, + "learning_rate": 4.531412446160702e-06, + "loss": 0.0624, "step": 36820 }, { - "epoch": 2.735036387939997, - "grad_norm": 0.5412776470184326, - "learning_rate": 1.3589781672360018e-05, - "loss": 0.0738, + "epoch": 5.470072775879994, + "grad_norm": 0.9664349555969238, + "learning_rate": 4.529927224120007e-06, + "loss": 0.0521, "step": 36830 }, { - "epoch": 2.7357789989603445, - "grad_norm": 1.8566441535949707, - "learning_rate": 1.3585326006237933e-05, - "loss": 0.0771, + "epoch": 5.471557997920689, + "grad_norm": 1.5224270820617676, + "learning_rate": 4.528442002079311e-06, + "loss": 0.062, "step": 36840 }, { - "epoch": 2.7365216099806924, - "grad_norm": 1.2451858520507812, - "learning_rate": 1.3580870340115848e-05, - "loss": 0.0773, + "epoch": 5.473043219961384, + "grad_norm": 0.53714519739151, + "learning_rate": 4.526956780038616e-06, + "loss": 0.0652, "step": 36850 }, { - "epoch": 2.7372642210010394, - "grad_norm": 1.140763759613037, - "learning_rate": 1.3576414673993761e-05, - "loss": 0.0661, + "epoch": 5.47452844200208, + "grad_norm": 0.7480376958847046, + "learning_rate": 4.525471557997921e-06, + "loss": 0.0611, "step": 36860 }, { - "epoch": 2.7380068320213873, - "grad_norm": 1.0305567979812622, - "learning_rate": 1.3571959007871678e-05, - "loss": 0.0349, + "epoch": 5.476013664042775, + "grad_norm": 1.379319190979004, + "learning_rate": 4.523986335957226e-06, + "loss": 0.0522, "step": 36870 }, { - "epoch": 2.7387494430417347, - "grad_norm": 1.3327600955963135, - "learning_rate": 1.3567503341749591e-05, - "loss": 0.0824, + "epoch": 5.4774988860834695, + "grad_norm": 0.8469178080558777, + "learning_rate": 4.522501113916531e-06, + "loss": 0.0668, "step": 36880 }, { - "epoch": 2.739492054062082, - "grad_norm": 1.6335985660552979, - "learning_rate": 1.3563047675627506e-05, - "loss": 0.0727, + "epoch": 5.478984108124164, + "grad_norm": 0.9694294929504395, + "learning_rate": 4.521015891875836e-06, + "loss": 0.0573, "step": 36890 }, { - "epoch": 2.74023466508243, - "grad_norm": 1.2704558372497559, - "learning_rate": 1.3558592009505421e-05, - "loss": 0.0589, + "epoch": 5.480469330164859, + "grad_norm": 1.125335693359375, + "learning_rate": 4.519530669835141e-06, + "loss": 0.0659, "step": 36900 }, { - "epoch": 2.7409772761027775, - "grad_norm": 1.6128848791122437, - "learning_rate": 1.3554136343383336e-05, - "loss": 0.0584, + "epoch": 5.481954552205555, + "grad_norm": 1.0027401447296143, + "learning_rate": 4.518045447794446e-06, + "loss": 0.0548, "step": 36910 }, { - "epoch": 2.741719887123125, - "grad_norm": 1.1986207962036133, - "learning_rate": 1.3549680677261251e-05, - "loss": 0.0891, + "epoch": 5.48343977424625, + "grad_norm": 0.3352084755897522, + "learning_rate": 4.51656022575375e-06, + "loss": 0.0749, "step": 36920 }, { - "epoch": 2.7424624981434724, - "grad_norm": 0.5783780813217163, - "learning_rate": 1.3545225011139165e-05, - "loss": 0.0748, + "epoch": 5.484924996286945, + "grad_norm": 1.036489486694336, + "learning_rate": 4.5150750037130555e-06, + "loss": 0.0828, "step": 36930 }, { - "epoch": 2.74320510916382, - "grad_norm": 2.1520473957061768, - "learning_rate": 1.3540769345017081e-05, - "loss": 0.0562, + "epoch": 5.48641021832764, + "grad_norm": 0.21433958411216736, + "learning_rate": 4.513589781672361e-06, + "loss": 0.0765, "step": 36940 }, { - "epoch": 2.7439477201841678, - "grad_norm": 1.8193594217300415, - "learning_rate": 1.3536313678894995e-05, - "loss": 0.0797, + "epoch": 5.487895440368335, + "grad_norm": 0.8928041458129883, + "learning_rate": 4.512104559631665e-06, + "loss": 0.0514, "step": 36950 }, { - "epoch": 2.744690331204515, - "grad_norm": 2.4824233055114746, - "learning_rate": 1.353185801277291e-05, - "loss": 0.0791, + "epoch": 5.48938066240903, + "grad_norm": 1.3866063356399536, + "learning_rate": 4.5106193375909705e-06, + "loss": 0.0583, "step": 36960 }, { - "epoch": 2.7454329422248627, - "grad_norm": 1.889683723449707, - "learning_rate": 1.3527402346650825e-05, - "loss": 0.0832, + "epoch": 5.490865884449725, + "grad_norm": 0.5213386416435242, + "learning_rate": 4.509134115550275e-06, + "loss": 0.0583, "step": 36970 }, { - "epoch": 2.74617555324521, - "grad_norm": 1.3686025142669678, - "learning_rate": 1.352294668052874e-05, - "loss": 0.0714, + "epoch": 5.49235110649042, + "grad_norm": 0.6851054430007935, + "learning_rate": 4.50764889350958e-06, + "loss": 0.0462, "step": 36980 }, { - "epoch": 2.7469181642655576, - "grad_norm": 1.2818831205368042, - "learning_rate": 1.3518491014406655e-05, - "loss": 0.0581, + "epoch": 5.493836328531115, + "grad_norm": 0.9823634028434753, + "learning_rate": 4.506163671468885e-06, + "loss": 0.0476, "step": 36990 }, { - "epoch": 2.7476607752859055, - "grad_norm": 1.8120100498199463, - "learning_rate": 1.3514035348284568e-05, - "loss": 0.0515, + "epoch": 5.495321550571811, + "grad_norm": 0.695232093334198, + "learning_rate": 4.50467844942819e-06, + "loss": 0.0685, "step": 37000 }, { - "epoch": 2.748403386306253, - "grad_norm": 2.3514275550842285, - "learning_rate": 1.3509579682162483e-05, - "loss": 0.0766, + "epoch": 5.496806772612506, + "grad_norm": 0.4329679012298584, + "learning_rate": 4.503193227387494e-06, + "loss": 0.0561, "step": 37010 }, { - "epoch": 2.7491459973266004, - "grad_norm": 0.7103281617164612, - "learning_rate": 1.35051240160404e-05, - "loss": 0.06, + "epoch": 5.498291994653201, + "grad_norm": 0.4390914738178253, + "learning_rate": 4.5017080053468e-06, + "loss": 0.0571, "step": 37020 }, { - "epoch": 2.749888608346948, - "grad_norm": 0.8013458251953125, - "learning_rate": 1.3500668349918313e-05, - "loss": 0.042, + "epoch": 5.499777216693896, + "grad_norm": 0.7721920609474182, + "learning_rate": 4.500222783306105e-06, + "loss": 0.0643, "step": 37030 }, { - "epoch": 2.7506312193672953, - "grad_norm": 0.9104951024055481, - "learning_rate": 1.3496212683796228e-05, - "loss": 0.0657, + "epoch": 5.5012624387345905, + "grad_norm": 1.4150617122650146, + "learning_rate": 4.498737561265409e-06, + "loss": 0.0645, "step": 37040 }, { - "epoch": 2.751373830387643, - "grad_norm": 3.061896324157715, - "learning_rate": 1.3491757017674143e-05, - "loss": 0.0995, + "epoch": 5.502747660775286, + "grad_norm": 0.3090786039829254, + "learning_rate": 4.497252339224715e-06, + "loss": 0.0397, "step": 37050 }, { - "epoch": 2.7521164414079906, - "grad_norm": 2.806757688522339, - "learning_rate": 1.3487301351552058e-05, - "loss": 0.0623, + "epoch": 5.504232882815981, + "grad_norm": 1.3260048627853394, + "learning_rate": 4.49576711718402e-06, + "loss": 0.063, "step": 37060 }, { - "epoch": 2.752859052428338, - "grad_norm": 0.8061108589172363, - "learning_rate": 1.3482845685429971e-05, - "loss": 0.0434, + "epoch": 5.505718104856676, + "grad_norm": 0.3949032127857208, + "learning_rate": 4.494281895143324e-06, + "loss": 0.0639, "step": 37070 }, { - "epoch": 2.7536016634486855, - "grad_norm": 0.7972543835639954, - "learning_rate": 1.3478390019307886e-05, - "loss": 0.0756, + "epoch": 5.507203326897371, + "grad_norm": 1.1284046173095703, + "learning_rate": 4.492796673102629e-06, + "loss": 0.0691, "step": 37080 }, { - "epoch": 2.754344274469033, - "grad_norm": 1.5233701467514038, - "learning_rate": 1.3473934353185803e-05, - "loss": 0.0669, + "epoch": 5.508688548938066, + "grad_norm": 1.1532460451126099, + "learning_rate": 4.491311451061934e-06, + "loss": 0.0671, "step": 37090 }, { - "epoch": 2.755086885489381, - "grad_norm": 3.342548370361328, - "learning_rate": 1.3469478687063716e-05, - "loss": 0.0706, + "epoch": 5.510173770978762, + "grad_norm": 1.2885421514511108, + "learning_rate": 4.489826229021239e-06, + "loss": 0.0617, "step": 37100 }, { - "epoch": 2.7558294965097283, - "grad_norm": 0.6841835975646973, - "learning_rate": 1.3465023020941631e-05, - "loss": 0.0526, + "epoch": 5.511658993019457, + "grad_norm": 1.097259759902954, + "learning_rate": 4.488341006980544e-06, + "loss": 0.0509, "step": 37110 }, { - "epoch": 2.7565721075300758, - "grad_norm": 0.362078994512558, - "learning_rate": 1.3460567354819544e-05, - "loss": 0.0528, + "epoch": 5.5131442150601515, + "grad_norm": 1.5691416263580322, + "learning_rate": 4.486855784939849e-06, + "loss": 0.0539, "step": 37120 }, { - "epoch": 2.757314718550423, - "grad_norm": 2.367532968521118, - "learning_rate": 1.3456111688697461e-05, - "loss": 0.0709, + "epoch": 5.514629437100846, + "grad_norm": 0.8756474256515503, + "learning_rate": 4.485370562899154e-06, + "loss": 0.0522, "step": 37130 }, { - "epoch": 2.7580573295707707, - "grad_norm": 1.0397535562515259, - "learning_rate": 1.3451656022575376e-05, - "loss": 0.0827, + "epoch": 5.516114659141541, + "grad_norm": 1.1349800825119019, + "learning_rate": 4.483885340858459e-06, + "loss": 0.0504, "step": 37140 }, { - "epoch": 2.7587999405911185, - "grad_norm": 2.048051118850708, - "learning_rate": 1.344720035645329e-05, - "loss": 0.0707, + "epoch": 5.517599881182237, + "grad_norm": 0.840767502784729, + "learning_rate": 4.482400118817763e-06, + "loss": 0.0516, "step": 37150 }, { - "epoch": 2.759542551611466, - "grad_norm": 0.5190430283546448, - "learning_rate": 1.3442744690331206e-05, - "loss": 0.0711, + "epoch": 5.519085103222932, + "grad_norm": 1.6772485971450806, + "learning_rate": 4.4809148967770684e-06, + "loss": 0.0557, "step": 37160 }, { - "epoch": 2.7602851626318134, - "grad_norm": 2.5641369819641113, - "learning_rate": 1.343828902420912e-05, - "loss": 0.0677, + "epoch": 5.520570325263627, + "grad_norm": 1.1356531381607056, + "learning_rate": 4.479429674736374e-06, + "loss": 0.0711, "step": 37170 }, { - "epoch": 2.761027773652161, - "grad_norm": 0.1991005390882492, - "learning_rate": 1.3433833358087034e-05, - "loss": 0.0298, + "epoch": 5.522055547304322, + "grad_norm": 1.0116087198257446, + "learning_rate": 4.477944452695678e-06, + "loss": 0.0527, "step": 37180 }, { - "epoch": 2.7617703846725083, - "grad_norm": 1.7412118911743164, - "learning_rate": 1.3429377691964948e-05, - "loss": 0.061, + "epoch": 5.523540769345017, + "grad_norm": 0.8792704939842224, + "learning_rate": 4.4764592306549834e-06, + "loss": 0.0837, "step": 37190 }, { - "epoch": 2.7625129956928562, - "grad_norm": 1.8074331283569336, - "learning_rate": 1.3424922025842864e-05, - "loss": 0.0684, + "epoch": 5.5250259913857125, + "grad_norm": 0.801468014717102, + "learning_rate": 4.474974008614289e-06, + "loss": 0.0697, "step": 37200 }, { - "epoch": 2.7632556067132037, - "grad_norm": 1.0141547918319702, - "learning_rate": 1.342046635972078e-05, - "loss": 0.0713, + "epoch": 5.526511213426407, + "grad_norm": 0.8734177350997925, + "learning_rate": 4.473488786573593e-06, + "loss": 0.0578, "step": 37210 }, { - "epoch": 2.763998217733551, - "grad_norm": 1.6694709062576294, - "learning_rate": 1.3416010693598693e-05, - "loss": 0.0599, + "epoch": 5.527996435467102, + "grad_norm": 0.9292588233947754, + "learning_rate": 4.472003564532898e-06, + "loss": 0.0709, "step": 37220 }, { - "epoch": 2.7647408287538986, - "grad_norm": 2.54500675201416, - "learning_rate": 1.3411555027476608e-05, - "loss": 0.054, + "epoch": 5.529481657507797, + "grad_norm": 1.0219374895095825, + "learning_rate": 4.470518342492203e-06, + "loss": 0.0591, "step": 37230 }, { - "epoch": 2.765483439774246, - "grad_norm": 2.212883710861206, - "learning_rate": 1.3407099361354523e-05, - "loss": 0.0734, + "epoch": 5.530966879548492, + "grad_norm": 0.9166500568389893, + "learning_rate": 4.469033120451508e-06, + "loss": 0.0597, "step": 37240 }, { - "epoch": 2.766226050794594, - "grad_norm": 1.2556638717651367, - "learning_rate": 1.3402643695232438e-05, - "loss": 0.0718, + "epoch": 5.532452101589188, + "grad_norm": 0.7812015414237976, + "learning_rate": 4.4675478984108126e-06, + "loss": 0.0543, "step": 37250 }, { - "epoch": 2.7669686618149414, - "grad_norm": 2.478182792663574, - "learning_rate": 1.3398188029110353e-05, - "loss": 0.0535, + "epoch": 5.533937323629883, + "grad_norm": 0.8167303204536438, + "learning_rate": 4.466062676370118e-06, + "loss": 0.0654, "step": 37260 }, { - "epoch": 2.767711272835289, - "grad_norm": 1.532631516456604, - "learning_rate": 1.3393732362988266e-05, - "loss": 0.0807, + "epoch": 5.535422545670578, + "grad_norm": 1.0693591833114624, + "learning_rate": 4.464577454329422e-06, + "loss": 0.0457, "step": 37270 }, { - "epoch": 2.7684538838556363, - "grad_norm": 1.9082090854644775, - "learning_rate": 1.3389276696866183e-05, - "loss": 0.0805, + "epoch": 5.536907767711273, + "grad_norm": 1.1283386945724487, + "learning_rate": 4.4630922322887276e-06, + "loss": 0.0489, "step": 37280 }, { - "epoch": 2.7691964948759837, - "grad_norm": 1.0164248943328857, - "learning_rate": 1.3384821030744096e-05, - "loss": 0.0843, + "epoch": 5.5383929897519675, + "grad_norm": 1.1214085817337036, + "learning_rate": 4.461607010248033e-06, + "loss": 0.0729, "step": 37290 }, { - "epoch": 2.7699391058963316, - "grad_norm": 0.967978298664093, - "learning_rate": 1.3380365364622011e-05, - "loss": 0.0693, + "epoch": 5.539878211792663, + "grad_norm": 0.9980657696723938, + "learning_rate": 4.460121788207337e-06, + "loss": 0.0577, "step": 37300 }, { - "epoch": 2.770681716916679, - "grad_norm": 1.1831194162368774, - "learning_rate": 1.3375909698499928e-05, - "loss": 0.0716, + "epoch": 5.541363433833358, + "grad_norm": 0.49719762802124023, + "learning_rate": 4.4586365661666426e-06, + "loss": 0.062, "step": 37310 }, { - "epoch": 2.7714243279370265, - "grad_norm": 2.0037786960601807, - "learning_rate": 1.3371454032377841e-05, - "loss": 0.0813, + "epoch": 5.542848655874053, + "grad_norm": 0.40343061089515686, + "learning_rate": 4.457151344125947e-06, + "loss": 0.056, "step": 37320 }, { - "epoch": 2.772166938957374, - "grad_norm": 1.3486874103546143, - "learning_rate": 1.3366998366255756e-05, - "loss": 0.0912, + "epoch": 5.544333877914748, + "grad_norm": 0.7878409624099731, + "learning_rate": 4.455666122085252e-06, + "loss": 0.0823, "step": 37330 }, { - "epoch": 2.7729095499777214, - "grad_norm": 1.2542924880981445, - "learning_rate": 1.336254270013367e-05, - "loss": 0.067, + "epoch": 5.545819099955443, + "grad_norm": 0.6854997873306274, + "learning_rate": 4.454180900044557e-06, + "loss": 0.0468, "step": 37340 }, { - "epoch": 2.7736521609980693, - "grad_norm": 0.595507025718689, - "learning_rate": 1.3358087034011586e-05, - "loss": 0.0758, + "epoch": 5.547304321996139, + "grad_norm": 0.8148928880691528, + "learning_rate": 4.452695678003862e-06, + "loss": 0.0489, "step": 37350 }, { - "epoch": 2.7743947720184168, - "grad_norm": 1.162650465965271, - "learning_rate": 1.33536313678895e-05, - "loss": 0.0643, + "epoch": 5.5487895440368336, + "grad_norm": 1.3261892795562744, + "learning_rate": 4.451210455963167e-06, + "loss": 0.0787, "step": 37360 }, { - "epoch": 2.7751373830387642, - "grad_norm": 0.5855199098587036, - "learning_rate": 1.3349175701767414e-05, - "loss": 0.0572, + "epoch": 5.5502747660775285, + "grad_norm": 0.3939753472805023, + "learning_rate": 4.449725233922472e-06, + "loss": 0.0703, "step": 37370 }, { - "epoch": 2.7758799940591117, - "grad_norm": 3.25514554977417, - "learning_rate": 1.334472003564533e-05, - "loss": 0.0628, + "epoch": 5.551759988118223, + "grad_norm": 1.5147497653961182, + "learning_rate": 4.448240011881776e-06, + "loss": 0.0622, "step": 37380 }, { - "epoch": 2.776622605079459, - "grad_norm": 2.44706392288208, - "learning_rate": 1.3340264369523244e-05, - "loss": 0.0798, + "epoch": 5.553245210158919, + "grad_norm": 1.1918028593063354, + "learning_rate": 4.446754789841081e-06, + "loss": 0.0724, "step": 37390 }, { - "epoch": 2.777365216099807, - "grad_norm": 1.5468707084655762, - "learning_rate": 1.333580870340116e-05, - "loss": 0.0699, + "epoch": 5.554730432199614, + "grad_norm": 1.0911834239959717, + "learning_rate": 4.445269567800387e-06, + "loss": 0.0526, "step": 37400 }, { - "epoch": 2.7781078271201545, - "grad_norm": 3.0609419345855713, - "learning_rate": 1.3331353037279073e-05, - "loss": 0.0741, + "epoch": 5.556215654240309, + "grad_norm": 1.4858453273773193, + "learning_rate": 4.443784345759691e-06, + "loss": 0.0882, "step": 37410 }, { - "epoch": 2.778850438140502, - "grad_norm": 2.7245450019836426, - "learning_rate": 1.3326897371156988e-05, - "loss": 0.0528, + "epoch": 5.557700876281004, + "grad_norm": 1.0886820554733276, + "learning_rate": 4.442299123718996e-06, + "loss": 0.0693, "step": 37420 }, { - "epoch": 2.77959304916085, - "grad_norm": 0.9166297912597656, - "learning_rate": 1.3322441705034904e-05, - "loss": 0.0744, + "epoch": 5.559186098321699, + "grad_norm": 0.7892463803291321, + "learning_rate": 4.440813901678302e-06, + "loss": 0.0635, "step": 37430 }, { - "epoch": 2.780335660181197, - "grad_norm": 1.0476568937301636, - "learning_rate": 1.3317986038912818e-05, - "loss": 0.0556, + "epoch": 5.5606713203623945, + "grad_norm": 0.9498715996742249, + "learning_rate": 4.439328679637606e-06, + "loss": 0.0475, "step": 37440 }, { - "epoch": 2.7810782712015447, - "grad_norm": 2.8554935455322266, - "learning_rate": 1.3313530372790733e-05, - "loss": 0.0651, + "epoch": 5.562156542403089, + "grad_norm": 0.8907163739204407, + "learning_rate": 4.4378434575969106e-06, + "loss": 0.039, "step": 37450 }, { - "epoch": 2.781820882221892, - "grad_norm": 1.7968850135803223, - "learning_rate": 1.3309074706668648e-05, - "loss": 0.0684, + "epoch": 5.563641764443784, + "grad_norm": 0.8134555220603943, + "learning_rate": 4.436358235556216e-06, + "loss": 0.0573, "step": 37460 }, { - "epoch": 2.7825634932422396, - "grad_norm": 3.455589532852173, - "learning_rate": 1.3304619040546563e-05, - "loss": 0.0739, + "epoch": 5.565126986484479, + "grad_norm": 0.6253407597541809, + "learning_rate": 4.434873013515521e-06, + "loss": 0.0811, "step": 37470 }, { - "epoch": 2.7833061042625875, - "grad_norm": 1.8191416263580322, - "learning_rate": 1.3300163374424476e-05, - "loss": 0.0527, + "epoch": 5.566612208525174, + "grad_norm": 1.469370722770691, + "learning_rate": 4.4333877914748256e-06, + "loss": 0.0658, "step": 37480 }, { - "epoch": 2.784048715282935, - "grad_norm": 1.094232201576233, - "learning_rate": 1.329570770830239e-05, - "loss": 0.076, + "epoch": 5.56809743056587, + "grad_norm": 0.6674718260765076, + "learning_rate": 4.431902569434131e-06, + "loss": 0.0684, "step": 37490 }, { - "epoch": 2.7847913263032824, - "grad_norm": 4.273893356323242, - "learning_rate": 1.3291252042180307e-05, - "loss": 0.086, + "epoch": 5.569582652606565, + "grad_norm": 1.1165416240692139, + "learning_rate": 4.430417347393436e-06, + "loss": 0.0533, "step": 37500 }, { - "epoch": 2.78553393732363, - "grad_norm": 2.4333572387695312, - "learning_rate": 1.328679637605822e-05, - "loss": 0.102, + "epoch": 5.57106787464726, + "grad_norm": 0.9866893887519836, + "learning_rate": 4.4289321253527405e-06, + "loss": 0.0606, "step": 37510 }, { - "epoch": 2.7862765483439773, - "grad_norm": 1.4521609544754028, - "learning_rate": 1.3282340709936136e-05, - "loss": 0.0733, + "epoch": 5.572553096687955, + "grad_norm": 0.32497164607048035, + "learning_rate": 4.427446903312046e-06, + "loss": 0.0504, "step": 37520 }, { - "epoch": 2.787019159364325, - "grad_norm": 1.7854300737380981, - "learning_rate": 1.3277885043814049e-05, - "loss": 0.0426, + "epoch": 5.57403831872865, + "grad_norm": 0.6711202263832092, + "learning_rate": 4.42596168127135e-06, + "loss": 0.0581, "step": 37530 }, { - "epoch": 2.7877617703846727, - "grad_norm": 1.4833481311798096, - "learning_rate": 1.3273429377691966e-05, - "loss": 0.0501, + "epoch": 5.575523540769345, + "grad_norm": 0.4225695729255676, + "learning_rate": 4.4244764592306555e-06, + "loss": 0.0614, "step": 37540 }, { - "epoch": 2.78850438140502, - "grad_norm": 3.3596692085266113, - "learning_rate": 1.326897371156988e-05, - "loss": 0.0536, + "epoch": 5.57700876281004, + "grad_norm": 1.0005754232406616, + "learning_rate": 4.42299123718996e-06, + "loss": 0.0638, "step": 37550 }, { - "epoch": 2.7892469924253676, - "grad_norm": 3.833606481552124, - "learning_rate": 1.3264518045447794e-05, - "loss": 0.0684, + "epoch": 5.578493984850735, + "grad_norm": 1.128718614578247, + "learning_rate": 4.421506015149265e-06, + "loss": 0.0631, "step": 37560 }, { - "epoch": 2.789989603445715, - "grad_norm": 0.862786054611206, - "learning_rate": 1.326006237932571e-05, - "loss": 0.0419, + "epoch": 5.57997920689143, + "grad_norm": 0.8793702125549316, + "learning_rate": 4.4200207931085705e-06, + "loss": 0.0468, "step": 37570 }, { - "epoch": 2.790732214466063, - "grad_norm": 0.7479864954948425, - "learning_rate": 1.3255606713203624e-05, - "loss": 0.062, + "epoch": 5.581464428932126, + "grad_norm": 0.840347409248352, + "learning_rate": 4.418535571067875e-06, + "loss": 0.0619, "step": 37580 }, { - "epoch": 2.7914748254864103, - "grad_norm": 1.0688323974609375, - "learning_rate": 1.3251151047081539e-05, - "loss": 0.0577, + "epoch": 5.582949650972821, + "grad_norm": 0.5288404226303101, + "learning_rate": 4.41705034902718e-06, + "loss": 0.0544, "step": 37590 }, { - "epoch": 2.792217436506758, - "grad_norm": 1.409751057624817, - "learning_rate": 1.3246695380959454e-05, - "loss": 0.0587, + "epoch": 5.584434873013516, + "grad_norm": 0.6308549642562866, + "learning_rate": 4.415565126986485e-06, + "loss": 0.0569, "step": 37600 }, { - "epoch": 2.7929600475271052, - "grad_norm": 0.3575490415096283, - "learning_rate": 1.3242239714837369e-05, - "loss": 0.0798, + "epoch": 5.5859200950542105, + "grad_norm": 0.5083063244819641, + "learning_rate": 4.41407990494579e-06, + "loss": 0.0664, "step": 37610 }, { - "epoch": 2.7937026585474527, - "grad_norm": 1.4559156894683838, - "learning_rate": 1.3237784048715284e-05, - "loss": 0.0729, + "epoch": 5.587405317094905, + "grad_norm": 0.6433333158493042, + "learning_rate": 4.412594682905094e-06, + "loss": 0.0511, "step": 37620 }, { - "epoch": 2.7944452695678006, - "grad_norm": 1.0611257553100586, - "learning_rate": 1.3233328382593197e-05, - "loss": 0.0695, + "epoch": 5.588890539135601, + "grad_norm": 1.3996425867080688, + "learning_rate": 4.4111094608644e-06, + "loss": 0.0624, "step": 37630 }, { - "epoch": 2.795187880588148, - "grad_norm": 1.5635493993759155, - "learning_rate": 1.3228872716471112e-05, - "loss": 0.0826, + "epoch": 5.590375761176296, + "grad_norm": 0.6735121607780457, + "learning_rate": 4.409624238823704e-06, + "loss": 0.0724, "step": 37640 }, { - "epoch": 2.7959304916084955, - "grad_norm": 0.6104263663291931, - "learning_rate": 1.3224417050349027e-05, - "loss": 0.0548, + "epoch": 5.591860983216991, + "grad_norm": 0.8044751286506653, + "learning_rate": 4.408139016783009e-06, + "loss": 0.0592, "step": 37650 }, { - "epoch": 2.796673102628843, - "grad_norm": 1.3987880945205688, - "learning_rate": 1.3219961384226942e-05, - "loss": 0.0678, + "epoch": 5.593346205257686, + "grad_norm": 0.9791496396064758, + "learning_rate": 4.406653794742315e-06, + "loss": 0.0695, "step": 37660 }, { - "epoch": 2.7974157136491904, - "grad_norm": 0.8820175528526306, - "learning_rate": 1.3215505718104857e-05, - "loss": 0.0589, + "epoch": 5.594831427298381, + "grad_norm": 0.7882761359214783, + "learning_rate": 4.405168572701619e-06, + "loss": 0.0539, "step": 37670 }, { - "epoch": 2.7981583246695383, - "grad_norm": 1.4698015451431274, - "learning_rate": 1.321105005198277e-05, - "loss": 0.0607, + "epoch": 5.596316649339077, + "grad_norm": 0.5030707716941833, + "learning_rate": 4.4036833506609235e-06, + "loss": 0.052, "step": 37680 }, { - "epoch": 2.7989009356898857, - "grad_norm": 2.0641324520111084, - "learning_rate": 1.3206594385860687e-05, - "loss": 0.0633, + "epoch": 5.5978018713797715, + "grad_norm": 0.3755476474761963, + "learning_rate": 4.402198128620229e-06, + "loss": 0.0548, "step": 37690 }, { - "epoch": 2.799643546710233, - "grad_norm": 3.1822593212127686, - "learning_rate": 1.32021387197386e-05, - "loss": 0.0465, + "epoch": 5.599287093420466, + "grad_norm": 1.3941385746002197, + "learning_rate": 4.400712906579534e-06, + "loss": 0.0746, "step": 37700 }, { - "epoch": 2.8003861577305806, - "grad_norm": 1.4389050006866455, - "learning_rate": 1.3197683053616516e-05, - "loss": 0.0879, + "epoch": 5.600772315461161, + "grad_norm": 0.32942524552345276, + "learning_rate": 4.3992276845388385e-06, + "loss": 0.0443, "step": 37710 }, { - "epoch": 2.801128768750928, - "grad_norm": 1.4606937170028687, - "learning_rate": 1.3193227387494432e-05, - "loss": 0.0548, + "epoch": 5.602257537501856, + "grad_norm": 0.9542339444160461, + "learning_rate": 4.397742462498144e-06, + "loss": 0.0577, "step": 37720 }, { - "epoch": 2.801871379771276, - "grad_norm": 2.7403457164764404, - "learning_rate": 1.3188771721372346e-05, - "loss": 0.0507, + "epoch": 5.603742759542552, + "grad_norm": 0.5538778305053711, + "learning_rate": 4.396257240457449e-06, + "loss": 0.0636, "step": 37730 }, { - "epoch": 2.8026139907916234, - "grad_norm": 2.382749080657959, - "learning_rate": 1.318431605525026e-05, - "loss": 0.0725, + "epoch": 5.605227981583247, + "grad_norm": 1.0870361328125, + "learning_rate": 4.3947720184167535e-06, + "loss": 0.0775, "step": 37740 }, { - "epoch": 2.803356601811971, - "grad_norm": 1.1223398447036743, - "learning_rate": 1.3179860389128174e-05, - "loss": 0.0901, + "epoch": 5.606713203623942, + "grad_norm": 0.5325985550880432, + "learning_rate": 4.393286796376058e-06, + "loss": 0.0664, "step": 37750 }, { - "epoch": 2.8040992128323183, - "grad_norm": 1.5770460367202759, - "learning_rate": 1.317540472300609e-05, - "loss": 0.0897, + "epoch": 5.608198425664637, + "grad_norm": 0.9860812425613403, + "learning_rate": 4.391801574335364e-06, + "loss": 0.0623, "step": 37760 }, { - "epoch": 2.804841823852666, - "grad_norm": 1.267210602760315, - "learning_rate": 1.3170949056884004e-05, - "loss": 0.0532, + "epoch": 5.609683647705332, + "grad_norm": 0.6696199178695679, + "learning_rate": 4.3903163522946685e-06, + "loss": 0.0463, "step": 37770 }, { - "epoch": 2.8055844348730137, - "grad_norm": 0.7207576036453247, - "learning_rate": 1.3166493390761919e-05, - "loss": 0.0356, + "epoch": 5.611168869746027, + "grad_norm": 1.3116692304611206, + "learning_rate": 4.388831130253973e-06, + "loss": 0.0611, "step": 37780 }, { - "epoch": 2.806327045893361, - "grad_norm": 0.6520107984542847, - "learning_rate": 1.3162037724639834e-05, - "loss": 0.0663, + "epoch": 5.612654091786722, + "grad_norm": 0.8291306495666504, + "learning_rate": 4.387345908213278e-06, + "loss": 0.0662, "step": 37790 }, { - "epoch": 2.8070696569137086, - "grad_norm": 1.4991291761398315, - "learning_rate": 1.3157582058517749e-05, - "loss": 0.0732, + "epoch": 5.614139313827417, + "grad_norm": 1.2390786409378052, + "learning_rate": 4.3858606861725835e-06, + "loss": 0.059, "step": 37800 }, { - "epoch": 2.807812267934056, - "grad_norm": 2.074842691421509, - "learning_rate": 1.3153126392395664e-05, - "loss": 0.0878, + "epoch": 5.615624535868112, + "grad_norm": 0.18506869673728943, + "learning_rate": 4.384375464131888e-06, + "loss": 0.0505, "step": 37810 }, { - "epoch": 2.8085548789544035, - "grad_norm": 0.5263580083847046, - "learning_rate": 1.3148670726273577e-05, - "loss": 0.098, + "epoch": 5.617109757908807, + "grad_norm": 1.069229245185852, + "learning_rate": 4.382890242091193e-06, + "loss": 0.0607, "step": 37820 }, { - "epoch": 2.8092974899747514, - "grad_norm": 2.034339427947998, - "learning_rate": 1.3144215060151494e-05, - "loss": 0.0592, + "epoch": 5.618594979949503, + "grad_norm": 1.214794635772705, + "learning_rate": 4.3814050200504985e-06, + "loss": 0.0613, "step": 37830 }, { - "epoch": 2.810040100995099, - "grad_norm": 1.7744560241699219, - "learning_rate": 1.3139759394029409e-05, - "loss": 0.0785, + "epoch": 5.620080201990198, + "grad_norm": 0.9908319711685181, + "learning_rate": 4.379919798009803e-06, + "loss": 0.0591, "step": 37840 }, { - "epoch": 2.8107827120154463, - "grad_norm": 1.718765377998352, - "learning_rate": 1.3135303727907322e-05, - "loss": 0.0602, + "epoch": 5.6215654240308925, + "grad_norm": 1.3837628364562988, + "learning_rate": 4.378434575969107e-06, + "loss": 0.0753, "step": 37850 }, { - "epoch": 2.8115253230357937, - "grad_norm": 0.45636001229286194, - "learning_rate": 1.3130848061785237e-05, - "loss": 0.0625, + "epoch": 5.623050646071587, + "grad_norm": 1.1857173442840576, + "learning_rate": 4.376949353928413e-06, + "loss": 0.0669, "step": 37860 }, { - "epoch": 2.812267934056141, - "grad_norm": 0.8540383577346802, - "learning_rate": 1.3126392395663152e-05, - "loss": 0.0749, + "epoch": 5.624535868112282, + "grad_norm": 0.367318719625473, + "learning_rate": 4.375464131887718e-06, + "loss": 0.0433, "step": 37870 }, { - "epoch": 2.813010545076489, - "grad_norm": 0.7994318604469299, - "learning_rate": 1.3121936729541067e-05, - "loss": 0.0738, + "epoch": 5.626021090152978, + "grad_norm": 0.9903393983840942, + "learning_rate": 4.373978909847022e-06, + "loss": 0.0626, "step": 37880 }, { - "epoch": 2.8137531560968365, - "grad_norm": 2.370769739151001, - "learning_rate": 1.311748106341898e-05, - "loss": 0.0594, + "epoch": 5.627506312193673, + "grad_norm": 1.4275164604187012, + "learning_rate": 4.372493687806328e-06, + "loss": 0.0578, "step": 37890 }, { - "epoch": 2.814495767117184, - "grad_norm": 3.0861258506774902, - "learning_rate": 1.3113025397296895e-05, - "loss": 0.0758, + "epoch": 5.628991534234368, + "grad_norm": 0.587713360786438, + "learning_rate": 4.371008465765632e-06, + "loss": 0.0544, "step": 37900 }, { - "epoch": 2.8152383781375314, - "grad_norm": 0.46465158462524414, - "learning_rate": 1.3108569731174812e-05, - "loss": 0.0581, + "epoch": 5.630476756275063, + "grad_norm": 0.7679840922355652, + "learning_rate": 4.369523243724937e-06, + "loss": 0.0582, "step": 37910 }, { - "epoch": 2.815980989157879, - "grad_norm": 2.443127393722534, - "learning_rate": 1.3104114065052725e-05, - "loss": 0.0948, + "epoch": 5.631961978315758, + "grad_norm": 1.3122962713241577, + "learning_rate": 4.368038021684242e-06, + "loss": 0.0571, "step": 37920 }, { - "epoch": 2.8167236001782268, - "grad_norm": 2.299797534942627, - "learning_rate": 1.309965839893064e-05, - "loss": 0.0565, + "epoch": 5.6334472003564535, + "grad_norm": 0.521410346031189, + "learning_rate": 4.366552799643547e-06, + "loss": 0.0539, "step": 37930 }, { - "epoch": 2.817466211198574, - "grad_norm": 3.1325736045837402, - "learning_rate": 1.3095202732808554e-05, - "loss": 0.0834, + "epoch": 5.634932422397148, + "grad_norm": 1.6043994426727295, + "learning_rate": 4.3650675776028515e-06, + "loss": 0.0726, "step": 37940 }, { - "epoch": 2.8182088222189217, - "grad_norm": 1.0582780838012695, - "learning_rate": 1.309074706668647e-05, - "loss": 0.0739, + "epoch": 5.636417644437843, + "grad_norm": 0.3124872148036957, + "learning_rate": 4.363582355562157e-06, + "loss": 0.0526, "step": 37950 }, { - "epoch": 2.818951433239269, - "grad_norm": 2.284137725830078, - "learning_rate": 1.3086291400564385e-05, - "loss": 0.0861, + "epoch": 5.637902866478538, + "grad_norm": 0.7532991170883179, + "learning_rate": 4.362097133521462e-06, + "loss": 0.059, "step": 37960 }, { - "epoch": 2.8196940442596166, - "grad_norm": 0.8023969531059265, - "learning_rate": 1.3081835734442299e-05, - "loss": 0.0615, + "epoch": 5.639388088519234, + "grad_norm": 0.4675773084163666, + "learning_rate": 4.3606119114807665e-06, + "loss": 0.059, "step": 37970 }, { - "epoch": 2.8204366552799645, - "grad_norm": 1.1526970863342285, - "learning_rate": 1.3077380068320215e-05, - "loss": 0.0686, + "epoch": 5.640873310559929, + "grad_norm": 0.5681068897247314, + "learning_rate": 4.359126689440072e-06, + "loss": 0.0548, "step": 37980 }, { - "epoch": 2.821179266300312, - "grad_norm": 2.1727919578552246, - "learning_rate": 1.3072924402198129e-05, - "loss": 0.0874, + "epoch": 5.642358532600624, + "grad_norm": 1.514446496963501, + "learning_rate": 4.357641467399377e-06, + "loss": 0.0591, "step": 37990 }, { - "epoch": 2.8219218773206594, - "grad_norm": 0.8734510540962219, - "learning_rate": 1.3068468736076044e-05, - "loss": 0.0577, + "epoch": 5.643843754641319, + "grad_norm": 1.4750310182571411, + "learning_rate": 4.3561562453586815e-06, + "loss": 0.0604, "step": 38000 }, { - "epoch": 2.8226644883410072, - "grad_norm": 0.24533693492412567, - "learning_rate": 1.3064013069953959e-05, - "loss": 0.0887, + "epoch": 5.645328976682014, + "grad_norm": 0.546931266784668, + "learning_rate": 4.354671023317986e-06, + "loss": 0.0825, "step": 38010 }, { - "epoch": 2.8234070993613543, - "grad_norm": 2.325021982192993, - "learning_rate": 1.3059557403831874e-05, - "loss": 0.0681, + "epoch": 5.646814198722709, + "grad_norm": 0.9507127404212952, + "learning_rate": 4.353185801277291e-06, + "loss": 0.0393, "step": 38020 }, { - "epoch": 2.824149710381702, - "grad_norm": 1.5730549097061157, - "learning_rate": 1.3055101737709789e-05, - "loss": 0.0522, + "epoch": 5.648299420763404, + "grad_norm": 0.8796098828315735, + "learning_rate": 4.3517005792365965e-06, + "loss": 0.0421, "step": 38030 }, { - "epoch": 2.8248923214020496, - "grad_norm": 1.1065586805343628, - "learning_rate": 1.3050646071587702e-05, - "loss": 0.0581, + "epoch": 5.649784642804099, + "grad_norm": 1.5660216808319092, + "learning_rate": 4.350215357195901e-06, + "loss": 0.0827, "step": 38040 }, { - "epoch": 2.825634932422397, - "grad_norm": 0.48450005054473877, - "learning_rate": 1.3046190405465617e-05, - "loss": 0.0715, + "epoch": 5.651269864844794, + "grad_norm": 0.5498520135879517, + "learning_rate": 4.348730135155206e-06, + "loss": 0.0658, "step": 38050 }, { - "epoch": 2.826377543442745, - "grad_norm": 1.2192469835281372, - "learning_rate": 1.3041734739343532e-05, - "loss": 0.0557, + "epoch": 5.652755086885489, + "grad_norm": 0.9659833312034607, + "learning_rate": 4.3472449131145115e-06, + "loss": 0.0422, "step": 38060 }, { - "epoch": 2.8271201544630924, - "grad_norm": 1.8304122686386108, - "learning_rate": 1.3037279073221447e-05, - "loss": 0.0832, + "epoch": 5.654240308926185, + "grad_norm": 0.7959330081939697, + "learning_rate": 4.345759691073816e-06, + "loss": 0.0656, "step": 38070 }, { - "epoch": 2.82786276548344, - "grad_norm": 1.7451564073562622, - "learning_rate": 1.3032823407099362e-05, - "loss": 0.0839, + "epoch": 5.65572553096688, + "grad_norm": 0.8035279512405396, + "learning_rate": 4.34427446903312e-06, + "loss": 0.0875, "step": 38080 }, { - "epoch": 2.8286053765037873, - "grad_norm": 1.190588355064392, - "learning_rate": 1.3028367740977277e-05, - "loss": 0.0659, + "epoch": 5.657210753007575, + "grad_norm": 0.5401741862297058, + "learning_rate": 4.342789246992426e-06, + "loss": 0.0631, "step": 38090 }, { - "epoch": 2.8293479875241347, - "grad_norm": 0.46794483065605164, - "learning_rate": 1.3023912074855192e-05, - "loss": 0.0554, + "epoch": 5.6586959750482695, + "grad_norm": 0.7207291722297668, + "learning_rate": 4.341304024951731e-06, + "loss": 0.0705, "step": 38100 }, { - "epoch": 2.8300905985444826, - "grad_norm": 1.307004451751709, - "learning_rate": 1.3019456408733105e-05, - "loss": 0.0637, + "epoch": 5.660181197088965, + "grad_norm": 0.7631051540374756, + "learning_rate": 4.339818802911035e-06, + "loss": 0.0497, "step": 38110 }, { - "epoch": 2.83083320956483, - "grad_norm": 1.7343428134918213, - "learning_rate": 1.301500074261102e-05, - "loss": 0.0469, + "epoch": 5.66166641912966, + "grad_norm": 0.6719472408294678, + "learning_rate": 4.338333580870341e-06, + "loss": 0.0634, "step": 38120 }, { - "epoch": 2.8315758205851775, - "grad_norm": 1.2779510021209717, - "learning_rate": 1.3010545076488937e-05, - "loss": 0.0376, + "epoch": 5.663151641170355, + "grad_norm": 0.5348994135856628, + "learning_rate": 4.336848358829646e-06, + "loss": 0.0565, "step": 38130 }, { - "epoch": 2.832318431605525, - "grad_norm": 0.3328961431980133, - "learning_rate": 1.300608941036685e-05, - "loss": 0.0531, + "epoch": 5.66463686321105, + "grad_norm": 1.271963357925415, + "learning_rate": 4.33536313678895e-06, + "loss": 0.0578, "step": 38140 }, { - "epoch": 2.8330610426258724, - "grad_norm": 1.039929986000061, - "learning_rate": 1.3001633744244765e-05, - "loss": 0.0729, + "epoch": 5.666122085251745, + "grad_norm": 0.6291659474372864, + "learning_rate": 4.333877914748255e-06, + "loss": 0.0731, "step": 38150 }, { - "epoch": 2.8338036536462203, - "grad_norm": 1.5998934507369995, - "learning_rate": 1.2997178078122678e-05, - "loss": 0.0736, + "epoch": 5.667607307292441, + "grad_norm": 0.6707026362419128, + "learning_rate": 4.33239269270756e-06, + "loss": 0.0597, "step": 38160 }, { - "epoch": 2.834546264666568, - "grad_norm": 0.7443707585334778, - "learning_rate": 1.2992722412000595e-05, - "loss": 0.0618, + "epoch": 5.669092529333136, + "grad_norm": 1.3206006288528442, + "learning_rate": 4.330907470666865e-06, + "loss": 0.0441, "step": 38170 }, { - "epoch": 2.8352888756869152, - "grad_norm": 1.4436475038528442, - "learning_rate": 1.2988266745878508e-05, - "loss": 0.0762, + "epoch": 5.6705777513738305, + "grad_norm": 0.4817686378955841, + "learning_rate": 4.32942224862617e-06, + "loss": 0.0543, "step": 38180 }, { - "epoch": 2.8360314867072627, - "grad_norm": 1.3913630247116089, - "learning_rate": 1.2983811079756423e-05, - "loss": 0.0825, + "epoch": 5.672062973414525, + "grad_norm": 1.7800952196121216, + "learning_rate": 4.327937026585475e-06, + "loss": 0.0586, "step": 38190 }, { - "epoch": 2.83677409772761, - "grad_norm": 1.0317375659942627, - "learning_rate": 1.2979355413634338e-05, - "loss": 0.0693, + "epoch": 5.67354819545522, + "grad_norm": 0.9077200889587402, + "learning_rate": 4.3264518045447795e-06, + "loss": 0.0631, "step": 38200 }, { - "epoch": 2.837516708747958, - "grad_norm": 1.5049179792404175, - "learning_rate": 1.2974899747512253e-05, - "loss": 0.0597, + "epoch": 5.675033417495916, + "grad_norm": 1.626470685005188, + "learning_rate": 4.324966582504085e-06, + "loss": 0.0641, "step": 38210 }, { - "epoch": 2.8382593197683055, - "grad_norm": 1.5254199504852295, - "learning_rate": 1.2970444081390168e-05, - "loss": 0.0452, + "epoch": 5.676518639536611, + "grad_norm": 0.8949599862098694, + "learning_rate": 4.32348136046339e-06, + "loss": 0.0713, "step": 38220 }, { - "epoch": 2.839001930788653, - "grad_norm": 2.0400617122650146, - "learning_rate": 1.2965988415268082e-05, - "loss": 0.0726, + "epoch": 5.678003861577306, + "grad_norm": 1.184775471687317, + "learning_rate": 4.3219961384226945e-06, + "loss": 0.0648, "step": 38230 }, { - "epoch": 2.8397445418090004, - "grad_norm": 1.2715054750442505, - "learning_rate": 1.2961532749145998e-05, - "loss": 0.0657, + "epoch": 5.679489083618001, + "grad_norm": 0.5150486826896667, + "learning_rate": 4.320510916382e-06, + "loss": 0.0733, "step": 38240 }, { - "epoch": 2.840487152829348, - "grad_norm": 1.5253748893737793, - "learning_rate": 1.2957077083023913e-05, - "loss": 0.093, + "epoch": 5.680974305658696, + "grad_norm": 0.5177794098854065, + "learning_rate": 4.319025694341304e-06, + "loss": 0.0447, "step": 38250 }, { - "epoch": 2.8412297638496957, - "grad_norm": 1.2937556505203247, - "learning_rate": 1.2952621416901827e-05, - "loss": 0.0773, + "epoch": 5.682459527699391, + "grad_norm": 0.49653327465057373, + "learning_rate": 4.3175404723006095e-06, + "loss": 0.0577, "step": 38260 }, { - "epoch": 2.841972374870043, - "grad_norm": 0.9976204633712769, - "learning_rate": 1.2948165750779742e-05, - "loss": 0.0774, + "epoch": 5.683944749740086, + "grad_norm": 1.3010729551315308, + "learning_rate": 4.316055250259914e-06, + "loss": 0.0541, "step": 38270 }, { - "epoch": 2.8427149858903906, - "grad_norm": 0.886090874671936, - "learning_rate": 1.2943710084657657e-05, - "loss": 0.051, + "epoch": 5.685429971780781, + "grad_norm": 0.6356299519538879, + "learning_rate": 4.314570028219219e-06, + "loss": 0.0582, "step": 38280 }, { - "epoch": 2.843457596910738, - "grad_norm": 1.4611785411834717, - "learning_rate": 1.2939254418535572e-05, - "loss": 0.0612, + "epoch": 5.686915193821476, + "grad_norm": 1.8878618478775024, + "learning_rate": 4.3130848061785245e-06, + "loss": 0.0554, "step": 38290 }, { - "epoch": 2.8442002079310855, - "grad_norm": 0.9807224869728088, - "learning_rate": 1.2934798752413487e-05, - "loss": 0.0654, + "epoch": 5.688400415862171, + "grad_norm": 0.5616238713264465, + "learning_rate": 4.311599584137829e-06, + "loss": 0.0745, "step": 38300 }, { - "epoch": 2.8449428189514334, - "grad_norm": 1.1847294569015503, - "learning_rate": 1.29303430862914e-05, - "loss": 0.0605, + "epoch": 5.689885637902867, + "grad_norm": 0.8002229928970337, + "learning_rate": 4.310114362097133e-06, + "loss": 0.0616, "step": 38310 }, { - "epoch": 2.845685429971781, - "grad_norm": 0.535963237285614, - "learning_rate": 1.2925887420169317e-05, - "loss": 0.0763, + "epoch": 5.691370859943562, + "grad_norm": 0.7039357423782349, + "learning_rate": 4.308629140056439e-06, + "loss": 0.0526, "step": 38320 }, { - "epoch": 2.8464280409921283, - "grad_norm": 2.856031894683838, - "learning_rate": 1.292143175404723e-05, - "loss": 0.064, + "epoch": 5.692856081984257, + "grad_norm": 0.6721161007881165, + "learning_rate": 4.307143918015744e-06, + "loss": 0.0482, "step": 38330 }, { - "epoch": 2.8471706520124758, - "grad_norm": 0.5636598467826843, - "learning_rate": 1.2916976087925145e-05, - "loss": 0.0675, + "epoch": 5.6943413040249515, + "grad_norm": 1.965502381324768, + "learning_rate": 4.305658695975048e-06, + "loss": 0.0765, "step": 38340 }, { - "epoch": 2.847913263032823, - "grad_norm": 1.67021644115448, - "learning_rate": 1.291252042180306e-05, - "loss": 0.0784, + "epoch": 5.695826526065646, + "grad_norm": 1.0320897102355957, + "learning_rate": 4.304173473934354e-06, + "loss": 0.0568, "step": 38350 }, { - "epoch": 2.848655874053171, - "grad_norm": 1.2641798257827759, - "learning_rate": 1.2908064755680975e-05, - "loss": 0.063, + "epoch": 5.697311748106342, + "grad_norm": 0.753450870513916, + "learning_rate": 4.302688251893659e-06, + "loss": 0.0432, "step": 38360 }, { - "epoch": 2.8493984850735186, - "grad_norm": 0.549030601978302, - "learning_rate": 1.290360908955889e-05, - "loss": 0.0584, + "epoch": 5.698796970147037, + "grad_norm": 0.35579219460487366, + "learning_rate": 4.301203029852963e-06, + "loss": 0.0594, "step": 38370 }, { - "epoch": 2.850141096093866, - "grad_norm": 2.4092066287994385, - "learning_rate": 1.2899153423436803e-05, - "loss": 0.0935, + "epoch": 5.700282192187732, + "grad_norm": 1.503354549407959, + "learning_rate": 4.299717807812268e-06, + "loss": 0.0623, "step": 38380 }, { - "epoch": 2.8508837071142135, - "grad_norm": 0.9181311726570129, - "learning_rate": 1.289469775731472e-05, - "loss": 0.0586, + "epoch": 5.701767414228427, + "grad_norm": 1.1138559579849243, + "learning_rate": 4.298232585771573e-06, + "loss": 0.0736, "step": 38390 }, { - "epoch": 2.851626318134561, - "grad_norm": 1.6261708736419678, - "learning_rate": 1.2890242091192633e-05, - "loss": 0.0871, + "epoch": 5.703252636269122, + "grad_norm": 0.9944598078727722, + "learning_rate": 4.296747363730878e-06, + "loss": 0.0552, "step": 38400 }, { - "epoch": 2.852368929154909, - "grad_norm": 1.0663944482803345, - "learning_rate": 1.2885786425070548e-05, - "loss": 0.0464, + "epoch": 5.704737858309818, + "grad_norm": 0.48267674446105957, + "learning_rate": 4.295262141690183e-06, + "loss": 0.0543, "step": 38410 }, { - "epoch": 2.8531115401752563, - "grad_norm": 1.0980523824691772, - "learning_rate": 1.2881330758948463e-05, - "loss": 0.0805, + "epoch": 5.7062230803505125, + "grad_norm": 0.34499338269233704, + "learning_rate": 4.293776919649488e-06, + "loss": 0.0431, "step": 38420 }, { - "epoch": 2.8538541511956037, - "grad_norm": 1.2846684455871582, - "learning_rate": 1.2876875092826378e-05, - "loss": 0.1012, + "epoch": 5.707708302391207, + "grad_norm": 1.3373545408248901, + "learning_rate": 4.292291697608793e-06, + "loss": 0.0498, "step": 38430 }, { - "epoch": 2.854596762215951, - "grad_norm": 2.4955661296844482, - "learning_rate": 1.2872419426704293e-05, - "loss": 0.0585, + "epoch": 5.709193524431902, + "grad_norm": 1.3047128915786743, + "learning_rate": 4.290806475568098e-06, + "loss": 0.0512, "step": 38440 }, { - "epoch": 2.8553393732362986, - "grad_norm": 1.1014326810836792, - "learning_rate": 1.2867963760582207e-05, - "loss": 0.0656, + "epoch": 5.710678746472597, + "grad_norm": 0.5727968811988831, + "learning_rate": 4.289321253527402e-06, + "loss": 0.0593, "step": 38450 }, { - "epoch": 2.8560819842566465, - "grad_norm": 0.6860207915306091, - "learning_rate": 1.2863508094460122e-05, - "loss": 0.0522, + "epoch": 5.712163968513293, + "grad_norm": 0.6570802330970764, + "learning_rate": 4.2878360314867075e-06, + "loss": 0.062, "step": 38460 }, { - "epoch": 2.856824595276994, - "grad_norm": 1.087835669517517, - "learning_rate": 1.2859052428338037e-05, - "loss": 0.0673, + "epoch": 5.713649190553988, + "grad_norm": 0.5600692629814148, + "learning_rate": 4.286350809446013e-06, + "loss": 0.0574, "step": 38470 }, { - "epoch": 2.8575672062973414, - "grad_norm": 2.119745969772339, - "learning_rate": 1.2854596762215952e-05, - "loss": 0.0697, + "epoch": 5.715134412594683, + "grad_norm": 0.804882824420929, + "learning_rate": 4.284865587405317e-06, + "loss": 0.0655, "step": 38480 }, { - "epoch": 2.858309817317689, - "grad_norm": 1.239503026008606, - "learning_rate": 1.2850141096093867e-05, - "loss": 0.0857, + "epoch": 5.716619634635378, + "grad_norm": 1.1548188924789429, + "learning_rate": 4.2833803653646225e-06, + "loss": 0.0703, "step": 38490 }, { - "epoch": 2.8590524283380363, - "grad_norm": 4.281582832336426, - "learning_rate": 1.2845685429971781e-05, - "loss": 0.0598, + "epoch": 5.718104856676073, + "grad_norm": 1.5685232877731323, + "learning_rate": 4.281895143323928e-06, + "loss": 0.0602, "step": 38500 }, { - "epoch": 2.859795039358384, - "grad_norm": 2.14847993850708, - "learning_rate": 1.2841229763849696e-05, - "loss": 0.068, + "epoch": 5.719590078716768, + "grad_norm": 1.076205849647522, + "learning_rate": 4.280409921283232e-06, + "loss": 0.048, "step": 38510 }, { - "epoch": 2.8605376503787316, - "grad_norm": 4.647385120391846, - "learning_rate": 1.283677409772761e-05, - "loss": 0.0738, + "epoch": 5.721075300757463, + "grad_norm": 0.591611921787262, + "learning_rate": 4.2789246992425374e-06, + "loss": 0.0655, "step": 38520 }, { - "epoch": 2.861280261399079, - "grad_norm": 0.7696908116340637, - "learning_rate": 1.2832318431605525e-05, - "loss": 0.0458, + "epoch": 5.722560522798158, + "grad_norm": 0.7020032405853271, + "learning_rate": 4.277439477201842e-06, + "loss": 0.0334, "step": 38530 }, { - "epoch": 2.8620228724194265, - "grad_norm": 0.9217883348464966, - "learning_rate": 1.2827862765483441e-05, - "loss": 0.0427, + "epoch": 5.724045744838853, + "grad_norm": 1.2611887454986572, + "learning_rate": 4.275954255161147e-06, + "loss": 0.0549, "step": 38540 }, { - "epoch": 2.862765483439774, - "grad_norm": 2.1366024017333984, - "learning_rate": 1.2823407099361355e-05, - "loss": 0.0735, + "epoch": 5.725530966879549, + "grad_norm": 0.578007698059082, + "learning_rate": 4.274469033120452e-06, + "loss": 0.0585, "step": 38550 }, { - "epoch": 2.863508094460122, - "grad_norm": 1.3464000225067139, - "learning_rate": 1.281895143323927e-05, - "loss": 0.0712, + "epoch": 5.727016188920244, + "grad_norm": 0.9836631417274475, + "learning_rate": 4.272983811079757e-06, + "loss": 0.0767, "step": 38560 }, { - "epoch": 2.8642507054804693, - "grad_norm": 0.7203729152679443, - "learning_rate": 1.2814495767117183e-05, - "loss": 0.0611, + "epoch": 5.728501410960939, + "grad_norm": 0.37310218811035156, + "learning_rate": 4.271498589039061e-06, + "loss": 0.0393, "step": 38570 }, { - "epoch": 2.864993316500817, - "grad_norm": 1.2216914892196655, - "learning_rate": 1.28100401009951e-05, - "loss": 0.0697, + "epoch": 5.729986633001634, + "grad_norm": 0.5416752696037292, + "learning_rate": 4.270013366998367e-06, + "loss": 0.0739, "step": 38580 }, { - "epoch": 2.8657359275211647, - "grad_norm": 1.4661186933517456, - "learning_rate": 1.2805584434873013e-05, - "loss": 0.0771, + "epoch": 5.7314718550423285, + "grad_norm": 1.1677316427230835, + "learning_rate": 4.268528144957672e-06, + "loss": 0.0433, "step": 38590 }, { - "epoch": 2.8664785385415117, - "grad_norm": 0.9744288325309753, - "learning_rate": 1.2801128768750928e-05, - "loss": 0.0693, + "epoch": 5.732957077083024, + "grad_norm": 0.6157244443893433, + "learning_rate": 4.267042922916976e-06, + "loss": 0.0488, "step": 38600 }, { - "epoch": 2.8672211495618596, - "grad_norm": 0.961794912815094, - "learning_rate": 1.2796673102628845e-05, - "loss": 0.0956, + "epoch": 5.734442299123719, + "grad_norm": 0.7313002347946167, + "learning_rate": 4.265557700876282e-06, + "loss": 0.0512, "step": 38610 }, { - "epoch": 2.867963760582207, - "grad_norm": 2.7422969341278076, - "learning_rate": 1.2792217436506758e-05, - "loss": 0.0994, + "epoch": 5.735927521164414, + "grad_norm": 0.9613766670227051, + "learning_rate": 4.264072478835586e-06, + "loss": 0.0496, "step": 38620 }, { - "epoch": 2.8687063716025545, - "grad_norm": 1.6554310321807861, - "learning_rate": 1.2787761770384673e-05, - "loss": 0.0521, + "epoch": 5.737412743205109, + "grad_norm": 1.0196996927261353, + "learning_rate": 4.262587256794891e-06, + "loss": 0.0579, "step": 38630 }, { - "epoch": 2.8694489826229024, - "grad_norm": 2.223524808883667, - "learning_rate": 1.2783306104262586e-05, - "loss": 0.0773, + "epoch": 5.738897965245804, + "grad_norm": 1.3182339668273926, + "learning_rate": 4.261102034754196e-06, + "loss": 0.0668, "step": 38640 }, { - "epoch": 2.87019159364325, - "grad_norm": 0.8952299356460571, - "learning_rate": 1.2778850438140503e-05, - "loss": 0.0592, + "epoch": 5.7403831872865, + "grad_norm": 0.5998702645301819, + "learning_rate": 4.259616812713501e-06, + "loss": 0.0544, "step": 38650 }, { - "epoch": 2.8709342046635973, - "grad_norm": 0.7390848398208618, - "learning_rate": 1.2774394772018418e-05, - "loss": 0.0638, + "epoch": 5.7418684093271946, + "grad_norm": 0.27779215574264526, + "learning_rate": 4.258131590672806e-06, + "loss": 0.0484, "step": 38660 }, { - "epoch": 2.8716768156839447, - "grad_norm": 1.8188756704330444, - "learning_rate": 1.2769939105896331e-05, - "loss": 0.0533, + "epoch": 5.7433536313678895, + "grad_norm": 0.6996427178382874, + "learning_rate": 4.256646368632111e-06, + "loss": 0.0646, "step": 38670 }, { - "epoch": 2.872419426704292, - "grad_norm": 1.062387466430664, - "learning_rate": 1.2765483439774246e-05, - "loss": 0.0603, + "epoch": 5.744838853408584, + "grad_norm": 1.1955310106277466, + "learning_rate": 4.255161146591415e-06, + "loss": 0.0549, "step": 38680 }, { - "epoch": 2.87316203772464, - "grad_norm": 2.831735134124756, - "learning_rate": 1.2761027773652161e-05, - "loss": 0.0383, + "epoch": 5.74632407544928, + "grad_norm": 0.9063337445259094, + "learning_rate": 4.253675924550721e-06, + "loss": 0.0752, "step": 38690 }, { - "epoch": 2.8739046487449875, - "grad_norm": 0.8888131976127625, - "learning_rate": 1.2756572107530076e-05, - "loss": 0.0578, + "epoch": 5.747809297489975, + "grad_norm": 0.4556771218776703, + "learning_rate": 4.252190702510026e-06, + "loss": 0.0514, "step": 38700 }, { - "epoch": 2.874647259765335, - "grad_norm": 2.9310948848724365, - "learning_rate": 1.2752116441407991e-05, - "loss": 0.068, + "epoch": 5.74929451953067, + "grad_norm": 0.5066632032394409, + "learning_rate": 4.25070548046933e-06, + "loss": 0.0803, "step": 38710 }, { - "epoch": 2.8753898707856824, - "grad_norm": 0.4100227952003479, - "learning_rate": 1.2747660775285905e-05, - "loss": 0.0827, + "epoch": 5.750779741571365, + "grad_norm": 0.6577308773994446, + "learning_rate": 4.2492202584286354e-06, + "loss": 0.0446, "step": 38720 }, { - "epoch": 2.87613248180603, - "grad_norm": 1.1365009546279907, - "learning_rate": 1.2743205109163821e-05, - "loss": 0.0718, + "epoch": 5.75226496361206, + "grad_norm": 1.5056424140930176, + "learning_rate": 4.247735036387941e-06, + "loss": 0.0631, "step": 38730 }, { - "epoch": 2.8768750928263778, - "grad_norm": 1.4055360555648804, - "learning_rate": 1.2738749443041735e-05, - "loss": 0.0788, + "epoch": 5.7537501856527555, + "grad_norm": 1.3212007284164429, + "learning_rate": 4.246249814347245e-06, + "loss": 0.0589, "step": 38740 }, { - "epoch": 2.877617703846725, - "grad_norm": 1.0839377641677856, - "learning_rate": 1.273429377691965e-05, - "loss": 0.0649, + "epoch": 5.75523540769345, + "grad_norm": 0.6160315871238708, + "learning_rate": 4.24476459230655e-06, + "loss": 0.0705, "step": 38750 }, { - "epoch": 2.8783603148670727, - "grad_norm": 1.1226552724838257, - "learning_rate": 1.2729838110797565e-05, - "loss": 0.0717, + "epoch": 5.756720629734145, + "grad_norm": 1.3710724115371704, + "learning_rate": 4.243279370265856e-06, + "loss": 0.0415, "step": 38760 }, { - "epoch": 2.87910292588742, - "grad_norm": 0.4335779845714569, - "learning_rate": 1.272538244467548e-05, - "loss": 0.0535, + "epoch": 5.75820585177484, + "grad_norm": 0.6069679260253906, + "learning_rate": 4.24179414822516e-06, + "loss": 0.0417, "step": 38770 }, { - "epoch": 2.8798455369077676, - "grad_norm": 0.9374495148658752, - "learning_rate": 1.2720926778553395e-05, - "loss": 0.069, + "epoch": 5.759691073815535, + "grad_norm": 1.013668179512024, + "learning_rate": 4.2403089261844646e-06, + "loss": 0.0607, "step": 38780 }, { - "epoch": 2.8805881479281155, - "grad_norm": 1.4652955532073975, - "learning_rate": 1.2716471112431308e-05, - "loss": 0.0836, + "epoch": 5.761176295856231, + "grad_norm": 0.6373612880706787, + "learning_rate": 4.23882370414377e-06, + "loss": 0.0574, "step": 38790 }, { - "epoch": 2.881330758948463, - "grad_norm": 1.3489465713500977, - "learning_rate": 1.2712015446309225e-05, - "loss": 0.0757, + "epoch": 5.762661517896926, + "grad_norm": 1.0072802305221558, + "learning_rate": 4.237338482103075e-06, + "loss": 0.068, "step": 38800 }, { - "epoch": 2.8820733699688104, - "grad_norm": 3.2947144508361816, - "learning_rate": 1.2707559780187138e-05, - "loss": 0.0454, + "epoch": 5.764146739937621, + "grad_norm": 0.5579874515533447, + "learning_rate": 4.2358532600623796e-06, + "loss": 0.0537, "step": 38810 }, { - "epoch": 2.882815980989158, - "grad_norm": 1.3606735467910767, - "learning_rate": 1.2703104114065053e-05, - "loss": 0.0732, + "epoch": 5.765631961978316, + "grad_norm": 0.5878424644470215, + "learning_rate": 4.234368038021685e-06, + "loss": 0.0779, "step": 38820 }, { - "epoch": 2.8835585920095053, - "grad_norm": 0.7241372466087341, - "learning_rate": 1.2698648447942968e-05, - "loss": 0.0727, + "epoch": 5.7671171840190105, + "grad_norm": 1.0271848440170288, + "learning_rate": 4.232882815980989e-06, + "loss": 0.0647, "step": 38830 }, { - "epoch": 2.884301203029853, - "grad_norm": 1.2209150791168213, - "learning_rate": 1.2694192781820883e-05, - "loss": 0.0625, + "epoch": 5.768602406059706, + "grad_norm": 0.8676832914352417, + "learning_rate": 4.2313975939402946e-06, + "loss": 0.0509, "step": 38840 }, { - "epoch": 2.8850438140502006, - "grad_norm": 3.2763359546661377, - "learning_rate": 1.2689737115698798e-05, - "loss": 0.0619, + "epoch": 5.770087628100401, + "grad_norm": 0.5808263421058655, + "learning_rate": 4.229912371899599e-06, + "loss": 0.0587, "step": 38850 }, { - "epoch": 2.885786425070548, - "grad_norm": 0.7242000699043274, - "learning_rate": 1.2685281449576711e-05, - "loss": 0.0428, + "epoch": 5.771572850141096, + "grad_norm": 0.6637036800384521, + "learning_rate": 4.228427149858904e-06, + "loss": 0.0565, "step": 38860 }, { - "epoch": 2.8865290360908955, - "grad_norm": 1.276310682296753, - "learning_rate": 1.2680825783454628e-05, - "loss": 0.0873, + "epoch": 5.773058072181791, + "grad_norm": 0.6437532901763916, + "learning_rate": 4.2269419278182096e-06, + "loss": 0.0543, "step": 38870 }, { - "epoch": 2.887271647111243, - "grad_norm": 0.70942223072052, - "learning_rate": 1.2676370117332541e-05, - "loss": 0.0563, + "epoch": 5.774543294222486, + "grad_norm": 0.9969229102134705, + "learning_rate": 4.225456705777514e-06, + "loss": 0.0654, "step": 38880 }, { - "epoch": 2.888014258131591, - "grad_norm": 1.4560551643371582, - "learning_rate": 1.2671914451210456e-05, - "loss": 0.0754, + "epoch": 5.776028516263182, + "grad_norm": 1.494921326637268, + "learning_rate": 4.223971483736819e-06, + "loss": 0.0685, "step": 38890 }, { - "epoch": 2.8887568691519383, - "grad_norm": 3.525283098220825, - "learning_rate": 1.2667458785088371e-05, - "loss": 0.1017, + "epoch": 5.777513738303877, + "grad_norm": 1.8869636058807373, + "learning_rate": 4.222486261696124e-06, + "loss": 0.0431, "step": 38900 }, { - "epoch": 2.8894994801722858, - "grad_norm": 2.5498082637786865, - "learning_rate": 1.2663003118966286e-05, - "loss": 0.0777, + "epoch": 5.7789989603445715, + "grad_norm": 0.9608843922615051, + "learning_rate": 4.221001039655429e-06, + "loss": 0.0636, "step": 38910 }, { - "epoch": 2.890242091192633, - "grad_norm": 1.0680961608886719, - "learning_rate": 1.2658547452844201e-05, - "loss": 0.058, + "epoch": 5.780484182385266, + "grad_norm": 0.35328376293182373, + "learning_rate": 4.219515817614733e-06, + "loss": 0.0503, "step": 38920 }, { - "epoch": 2.8909847022129807, - "grad_norm": 0.860130786895752, - "learning_rate": 1.2654091786722114e-05, - "loss": 0.0519, + "epoch": 5.781969404425961, + "grad_norm": 0.5849358439445496, + "learning_rate": 4.218030595574039e-06, + "loss": 0.0447, "step": 38930 }, { - "epoch": 2.8917273132333285, - "grad_norm": 0.4104064106941223, - "learning_rate": 1.264963612060003e-05, - "loss": 0.0721, + "epoch": 5.783454626466657, + "grad_norm": 1.1339222192764282, + "learning_rate": 4.216545373533343e-06, + "loss": 0.0705, "step": 38940 }, { - "epoch": 2.892469924253676, - "grad_norm": 1.9882882833480835, - "learning_rate": 1.2645180454477946e-05, - "loss": 0.0761, + "epoch": 5.784939848507352, + "grad_norm": 0.9738719463348389, + "learning_rate": 4.215060151492648e-06, + "loss": 0.0727, "step": 38950 }, { - "epoch": 2.8932125352740234, - "grad_norm": 0.7212990522384644, - "learning_rate": 1.264072478835586e-05, - "loss": 0.0602, + "epoch": 5.786425070548047, + "grad_norm": 0.6222785115242004, + "learning_rate": 4.213574929451954e-06, + "loss": 0.0657, "step": 38960 }, { - "epoch": 2.893955146294371, - "grad_norm": 0.5965471863746643, - "learning_rate": 1.2636269122233774e-05, - "loss": 0.0764, + "epoch": 5.787910292588742, + "grad_norm": 1.2067147493362427, + "learning_rate": 4.212089707411258e-06, + "loss": 0.0539, "step": 38970 }, { - "epoch": 2.8946977573147183, - "grad_norm": 2.7292585372924805, - "learning_rate": 1.2631813456111688e-05, - "loss": 0.0523, + "epoch": 5.789395514629437, + "grad_norm": 0.5365669131278992, + "learning_rate": 4.210604485370563e-06, + "loss": 0.0572, "step": 38980 }, { - "epoch": 2.8954403683350662, - "grad_norm": 0.5721240043640137, - "learning_rate": 1.2627357789989604e-05, - "loss": 0.0278, + "epoch": 5.7908807366701325, + "grad_norm": 0.5680752396583557, + "learning_rate": 4.209119263329869e-06, + "loss": 0.0806, "step": 38990 }, { - "epoch": 2.8961829793554137, - "grad_norm": 2.3758206367492676, - "learning_rate": 1.262290212386752e-05, - "loss": 0.0495, + "epoch": 5.792365958710827, + "grad_norm": 1.4261845350265503, + "learning_rate": 4.207634041289173e-06, + "loss": 0.0598, "step": 39000 }, { - "epoch": 2.896925590375761, - "grad_norm": 0.5098469257354736, - "learning_rate": 1.2618446457745433e-05, - "loss": 0.0734, + "epoch": 5.793851180751522, + "grad_norm": 0.8928173780441284, + "learning_rate": 4.2061488192484775e-06, + "loss": 0.0544, "step": 39010 }, { - "epoch": 2.8976682013961086, - "grad_norm": 1.3292839527130127, - "learning_rate": 1.261399079162335e-05, - "loss": 0.0594, + "epoch": 5.795336402792217, + "grad_norm": 0.5879567861557007, + "learning_rate": 4.204663597207783e-06, + "loss": 0.047, "step": 39020 }, { - "epoch": 2.898410812416456, - "grad_norm": 0.4254518747329712, - "learning_rate": 1.2609535125501263e-05, - "loss": 0.0818, + "epoch": 5.796821624832912, + "grad_norm": 1.41495943069458, + "learning_rate": 4.203178375167088e-06, + "loss": 0.0717, "step": 39030 }, { - "epoch": 2.899153423436804, - "grad_norm": 1.5062333345413208, - "learning_rate": 1.2605079459379178e-05, - "loss": 0.058, + "epoch": 5.798306846873608, + "grad_norm": 1.1093374490737915, + "learning_rate": 4.2016931531263925e-06, + "loss": 0.0543, "step": 39040 }, { - "epoch": 2.8998960344571514, - "grad_norm": 1.215062141418457, - "learning_rate": 1.2600623793257091e-05, - "loss": 0.0422, + "epoch": 5.799792068914303, + "grad_norm": 1.1786057949066162, + "learning_rate": 4.200207931085698e-06, + "loss": 0.0508, "step": 39050 }, { - "epoch": 2.900638645477499, - "grad_norm": 1.1051884889602661, - "learning_rate": 1.2596168127135008e-05, - "loss": 0.0726, + "epoch": 5.801277290954998, + "grad_norm": 1.0340731143951416, + "learning_rate": 4.198722709045003e-06, + "loss": 0.0564, "step": 39060 }, { - "epoch": 2.9013812564978463, - "grad_norm": 0.9767510294914246, - "learning_rate": 1.2591712461012923e-05, - "loss": 0.0557, + "epoch": 5.802762512995693, + "grad_norm": 0.9552382230758667, + "learning_rate": 4.1972374870043075e-06, + "loss": 0.0543, "step": 39070 }, { - "epoch": 2.9021238675181937, - "grad_norm": 3.3917346000671387, - "learning_rate": 1.2587256794890836e-05, - "loss": 0.0595, + "epoch": 5.8042477350363875, + "grad_norm": 0.7804151177406311, + "learning_rate": 4.195752264963612e-06, + "loss": 0.0555, "step": 39080 }, { - "epoch": 2.9028664785385416, - "grad_norm": 2.1821186542510986, - "learning_rate": 1.2582801128768751e-05, - "loss": 0.0616, + "epoch": 5.805732957077083, + "grad_norm": 0.8787037134170532, + "learning_rate": 4.194267042922917e-06, + "loss": 0.054, "step": 39090 }, { - "epoch": 2.903609089558889, - "grad_norm": 3.1181464195251465, - "learning_rate": 1.2578345462646666e-05, - "loss": 0.0614, + "epoch": 5.807218179117778, + "grad_norm": 0.7925017476081848, + "learning_rate": 4.1927818208822225e-06, + "loss": 0.0629, "step": 39100 }, { - "epoch": 2.9043517005792365, - "grad_norm": 1.3670252561569214, - "learning_rate": 1.2573889796524581e-05, - "loss": 0.0571, + "epoch": 5.808703401158473, + "grad_norm": 0.8146625757217407, + "learning_rate": 4.191296598841527e-06, + "loss": 0.0655, "step": 39110 }, { - "epoch": 2.905094311599584, - "grad_norm": 2.3952903747558594, - "learning_rate": 1.2569434130402496e-05, - "loss": 0.0576, + "epoch": 5.810188623199168, + "grad_norm": 0.7402033805847168, + "learning_rate": 4.189811376800832e-06, + "loss": 0.0642, "step": 39120 }, { - "epoch": 2.9058369226199314, - "grad_norm": 3.1629419326782227, - "learning_rate": 1.256497846428041e-05, - "loss": 0.0895, + "epoch": 5.811673845239864, + "grad_norm": 0.7034904956817627, + "learning_rate": 4.188326154760137e-06, + "loss": 0.0608, "step": 39130 }, { - "epoch": 2.9065795336402793, - "grad_norm": 2.0828757286071777, - "learning_rate": 1.2560522798158326e-05, - "loss": 0.0773, + "epoch": 5.813159067280559, + "grad_norm": 0.8925518989562988, + "learning_rate": 4.186840932719442e-06, + "loss": 0.0531, "step": 39140 }, { - "epoch": 2.9073221446606268, - "grad_norm": 0.463571161031723, - "learning_rate": 1.255606713203624e-05, - "loss": 0.053, + "epoch": 5.8146442893212535, + "grad_norm": 1.6555513143539429, + "learning_rate": 4.185355710678746e-06, + "loss": 0.0748, "step": 39150 }, { - "epoch": 2.9080647556809742, - "grad_norm": 1.1848664283752441, - "learning_rate": 1.2551611465914154e-05, - "loss": 0.072, + "epoch": 5.8161295113619484, + "grad_norm": 0.83237624168396, + "learning_rate": 4.183870488638052e-06, + "loss": 0.0424, "step": 39160 }, { - "epoch": 2.908807366701322, - "grad_norm": 0.9706199765205383, - "learning_rate": 1.254715579979207e-05, - "loss": 0.0496, + "epoch": 5.817614733402643, + "grad_norm": 0.5995851755142212, + "learning_rate": 4.182385266597357e-06, + "loss": 0.0702, "step": 39170 }, { - "epoch": 2.909549977721669, - "grad_norm": 2.834559440612793, - "learning_rate": 1.2542700133669984e-05, - "loss": 0.0794, + "epoch": 5.819099955443339, + "grad_norm": 1.0438969135284424, + "learning_rate": 4.180900044556661e-06, + "loss": 0.0603, "step": 39180 }, { - "epoch": 2.910292588742017, - "grad_norm": 2.5755867958068848, - "learning_rate": 1.25382444675479e-05, - "loss": 0.0941, + "epoch": 5.820585177484034, + "grad_norm": 0.857422411441803, + "learning_rate": 4.179414822515967e-06, + "loss": 0.0518, "step": 39190 }, { - "epoch": 2.9110351997623645, - "grad_norm": 0.6742496490478516, - "learning_rate": 1.2533788801425812e-05, - "loss": 0.0551, + "epoch": 5.822070399524729, + "grad_norm": 1.5087437629699707, + "learning_rate": 4.177929600475271e-06, + "loss": 0.0971, "step": 39200 }, { - "epoch": 2.911777810782712, - "grad_norm": 1.3099998235702515, - "learning_rate": 1.2529333135303729e-05, - "loss": 0.0438, + "epoch": 5.823555621565424, + "grad_norm": 1.900402545928955, + "learning_rate": 4.176444378434576e-06, + "loss": 0.0759, "step": 39210 }, - { - "epoch": 2.91252042180306, - "grad_norm": 1.9794929027557373, - "learning_rate": 1.2524877469181642e-05, - "loss": 0.0746, + { + "epoch": 5.825040843606119, + "grad_norm": 1.5815813541412354, + "learning_rate": 4.174959156393882e-06, + "loss": 0.0586, "step": 39220 }, { - "epoch": 2.9132630328234073, - "grad_norm": 1.8509886264801025, - "learning_rate": 1.2520421803059557e-05, - "loss": 0.0832, + "epoch": 5.8265260656468145, + "grad_norm": 1.3800815343856812, + "learning_rate": 4.173473934353186e-06, + "loss": 0.0685, "step": 39230 }, { - "epoch": 2.9140056438437547, - "grad_norm": 1.6332608461380005, - "learning_rate": 1.2515966136937472e-05, - "loss": 0.0653, + "epoch": 5.828011287687509, + "grad_norm": 1.4719352722167969, + "learning_rate": 4.1719887123124905e-06, + "loss": 0.0649, "step": 39240 }, { - "epoch": 2.914748254864102, - "grad_norm": 1.8351725339889526, - "learning_rate": 1.2511510470815387e-05, - "loss": 0.075, + "epoch": 5.829496509728204, + "grad_norm": 0.8187944889068604, + "learning_rate": 4.170503490271796e-06, + "loss": 0.0594, "step": 39250 }, { - "epoch": 2.9154908658844496, - "grad_norm": 2.058716058731079, - "learning_rate": 1.2507054804693302e-05, - "loss": 0.0559, + "epoch": 5.830981731768899, + "grad_norm": 0.8237382769584656, + "learning_rate": 4.169018268231101e-06, + "loss": 0.0583, "step": 39260 }, { - "epoch": 2.9162334769047975, - "grad_norm": 0.9268501996994019, - "learning_rate": 1.2502599138571216e-05, - "loss": 0.0599, + "epoch": 5.832466953809595, + "grad_norm": 1.1916723251342773, + "learning_rate": 4.1675330461904055e-06, + "loss": 0.0523, "step": 39270 }, { - "epoch": 2.916976087925145, - "grad_norm": 1.5155894756317139, - "learning_rate": 1.2498143472449132e-05, - "loss": 0.0994, + "epoch": 5.83395217585029, + "grad_norm": 1.322054386138916, + "learning_rate": 4.166047824149711e-06, + "loss": 0.0549, "step": 39280 }, { - "epoch": 2.9177186989454924, - "grad_norm": 0.5851942896842957, - "learning_rate": 1.2493687806327046e-05, - "loss": 0.0485, + "epoch": 5.835437397890985, + "grad_norm": 0.6644608974456787, + "learning_rate": 4.164562602109016e-06, + "loss": 0.0529, "step": 39290 }, { - "epoch": 2.91846130996584, - "grad_norm": 3.38140606880188, - "learning_rate": 1.248923214020496e-05, - "loss": 0.0576, + "epoch": 5.83692261993168, + "grad_norm": 1.1946402788162231, + "learning_rate": 4.1630773800683205e-06, + "loss": 0.0561, "step": 39300 }, { - "epoch": 2.9192039209861873, - "grad_norm": 1.3043699264526367, - "learning_rate": 1.2484776474082876e-05, - "loss": 0.0433, + "epoch": 5.838407841972375, + "grad_norm": 0.6778384447097778, + "learning_rate": 4.161592158027625e-06, + "loss": 0.0559, "step": 39310 }, { - "epoch": 2.919946532006535, - "grad_norm": 2.2233948707580566, - "learning_rate": 1.248032080796079e-05, - "loss": 0.0585, + "epoch": 5.83989306401307, + "grad_norm": 0.8052584528923035, + "learning_rate": 4.16010693598693e-06, + "loss": 0.0523, "step": 39320 }, { - "epoch": 2.9206891430268827, - "grad_norm": 2.5557191371917725, - "learning_rate": 1.2475865141838706e-05, - "loss": 0.0506, + "epoch": 5.841378286053765, + "grad_norm": 0.5099238157272339, + "learning_rate": 4.1586217139462355e-06, + "loss": 0.0264, "step": 39330 }, { - "epoch": 2.92143175404723, - "grad_norm": 1.088620662689209, - "learning_rate": 1.2471409475716619e-05, - "loss": 0.0709, + "epoch": 5.84286350809446, + "grad_norm": 0.740264356136322, + "learning_rate": 4.15713649190554e-06, + "loss": 0.0518, "step": 39340 }, { - "epoch": 2.9221743650675776, - "grad_norm": 1.4036482572555542, - "learning_rate": 1.2466953809594534e-05, - "loss": 0.0976, + "epoch": 5.844348730135155, + "grad_norm": 0.8734014630317688, + "learning_rate": 4.155651269864845e-06, + "loss": 0.0476, "step": 39350 }, { - "epoch": 2.922916976087925, - "grad_norm": 2.0633914470672607, - "learning_rate": 1.246249814347245e-05, - "loss": 0.0545, + "epoch": 5.84583395217585, + "grad_norm": 0.38284748792648315, + "learning_rate": 4.1541660478241505e-06, + "loss": 0.0489, "step": 39360 }, { - "epoch": 2.923659587108273, - "grad_norm": 1.7512578964233398, - "learning_rate": 1.2458042477350364e-05, - "loss": 0.0749, + "epoch": 5.847319174216546, + "grad_norm": 0.6769605875015259, + "learning_rate": 4.152680825783455e-06, + "loss": 0.0602, "step": 39370 }, { - "epoch": 2.9244021981286203, - "grad_norm": 0.6414874792098999, - "learning_rate": 1.2453586811228279e-05, - "loss": 0.0569, + "epoch": 5.848804396257241, + "grad_norm": 0.6371337175369263, + "learning_rate": 4.151195603742759e-06, + "loss": 0.0623, "step": 39380 }, { - "epoch": 2.925144809148968, - "grad_norm": 1.033894419670105, - "learning_rate": 1.2449131145106192e-05, - "loss": 0.0706, + "epoch": 5.850289618297936, + "grad_norm": 0.7483164072036743, + "learning_rate": 4.149710381702065e-06, + "loss": 0.0693, "step": 39390 }, { - "epoch": 2.9258874201693152, - "grad_norm": 1.335205316543579, - "learning_rate": 1.2444675478984109e-05, - "loss": 0.085, + "epoch": 5.8517748403386305, + "grad_norm": 0.4191286563873291, + "learning_rate": 4.14822515966137e-06, + "loss": 0.0354, "step": 39400 }, { - "epoch": 2.9266300311896627, - "grad_norm": 1.1902940273284912, - "learning_rate": 1.2440219812862024e-05, - "loss": 0.0359, + "epoch": 5.853260062379325, + "grad_norm": 0.699856698513031, + "learning_rate": 4.146739937620674e-06, + "loss": 0.0464, "step": 39410 }, { - "epoch": 2.9273726422100106, - "grad_norm": 2.9166228771209717, - "learning_rate": 1.2435764146739937e-05, - "loss": 0.0584, + "epoch": 5.854745284420021, + "grad_norm": 0.8900712132453918, + "learning_rate": 4.14525471557998e-06, + "loss": 0.0673, "step": 39420 }, { - "epoch": 2.928115253230358, - "grad_norm": 0.8290153741836548, - "learning_rate": 1.2431308480617854e-05, - "loss": 0.0669, + "epoch": 5.856230506460716, + "grad_norm": 0.9745095372200012, + "learning_rate": 4.143769493539285e-06, + "loss": 0.0704, "step": 39430 }, { - "epoch": 2.9288578642507055, - "grad_norm": 0.8835294842720032, - "learning_rate": 1.2426852814495767e-05, - "loss": 0.0545, + "epoch": 5.857715728501411, + "grad_norm": 0.7951921820640564, + "learning_rate": 4.142284271498589e-06, + "loss": 0.0636, "step": 39440 }, { - "epoch": 2.929600475271053, - "grad_norm": 0.4865367114543915, - "learning_rate": 1.2422397148373682e-05, - "loss": 0.0727, + "epoch": 5.859200950542106, + "grad_norm": 0.7359218001365662, + "learning_rate": 4.140799049457895e-06, + "loss": 0.0449, "step": 39450 }, { - "epoch": 2.9303430862914004, - "grad_norm": 0.7041136622428894, - "learning_rate": 1.2417941482251596e-05, - "loss": 0.0369, + "epoch": 5.860686172582801, + "grad_norm": 1.602457046508789, + "learning_rate": 4.139313827417199e-06, + "loss": 0.047, "step": 39460 }, { - "epoch": 2.9310856973117483, - "grad_norm": 1.066720724105835, - "learning_rate": 1.2413485816129512e-05, - "loss": 0.0801, + "epoch": 5.862171394623497, + "grad_norm": 0.7292214035987854, + "learning_rate": 4.137828605376504e-06, + "loss": 0.0506, "step": 39470 }, { - "epoch": 2.9318283083320957, - "grad_norm": 0.8632923364639282, - "learning_rate": 1.2409030150007427e-05, - "loss": 0.0296, + "epoch": 5.8636566166641915, + "grad_norm": 1.0408436059951782, + "learning_rate": 4.136343383335809e-06, + "loss": 0.0598, "step": 39480 }, { - "epoch": 2.932570919352443, - "grad_norm": 0.2960319221019745, - "learning_rate": 1.240457448388534e-05, - "loss": 0.0488, + "epoch": 5.865141838704886, + "grad_norm": 1.1956384181976318, + "learning_rate": 4.134858161295114e-06, + "loss": 0.0491, "step": 39490 }, { - "epoch": 2.9333135303727906, - "grad_norm": 2.5065932273864746, - "learning_rate": 1.2400118817763256e-05, - "loss": 0.068, + "epoch": 5.866627060745581, + "grad_norm": 1.9306631088256836, + "learning_rate": 4.1333729392544185e-06, + "loss": 0.0554, "step": 39500 }, { - "epoch": 2.934056141393138, - "grad_norm": 0.5187915563583374, - "learning_rate": 1.239566315164117e-05, - "loss": 0.0766, + "epoch": 5.868112282786276, + "grad_norm": 0.30741629004478455, + "learning_rate": 4.131887717213724e-06, + "loss": 0.0447, "step": 39510 }, { - "epoch": 2.934798752413486, - "grad_norm": 1.234108805656433, - "learning_rate": 1.2391207485519085e-05, - "loss": 0.0883, + "epoch": 5.869597504826972, + "grad_norm": 0.909333348274231, + "learning_rate": 4.130402495173029e-06, + "loss": 0.0551, "step": 39520 }, { - "epoch": 2.9355413634338334, - "grad_norm": 1.0708197355270386, - "learning_rate": 1.2386751819397e-05, - "loss": 0.0661, + "epoch": 5.871082726867667, + "grad_norm": 0.5839402675628662, + "learning_rate": 4.1289172731323335e-06, + "loss": 0.054, "step": 39530 }, { - "epoch": 2.936283974454181, - "grad_norm": 0.3668792247772217, - "learning_rate": 1.2382296153274915e-05, - "loss": 0.0474, + "epoch": 5.872567948908362, + "grad_norm": 1.0948971509933472, + "learning_rate": 4.127432051091639e-06, + "loss": 0.064, "step": 39540 }, { - "epoch": 2.9370265854745283, - "grad_norm": 0.8638660311698914, - "learning_rate": 1.237784048715283e-05, - "loss": 0.0541, + "epoch": 5.874053170949057, + "grad_norm": 0.7635152339935303, + "learning_rate": 4.125946829050943e-06, + "loss": 0.0534, "step": 39550 }, { - "epoch": 2.937769196494876, - "grad_norm": 2.86773681640625, - "learning_rate": 1.2373384821030744e-05, - "loss": 0.0799, + "epoch": 5.875538392989752, + "grad_norm": 0.6703165173530579, + "learning_rate": 4.1244616070102485e-06, + "loss": 0.0603, "step": 39560 }, { - "epoch": 2.9385118075152237, - "grad_norm": 3.684232473373413, - "learning_rate": 1.2368929154908659e-05, - "loss": 0.0852, + "epoch": 5.877023615030447, + "grad_norm": 0.819165825843811, + "learning_rate": 4.122976384969553e-06, + "loss": 0.0544, "step": 39570 }, { - "epoch": 2.939254418535571, - "grad_norm": 1.3619695901870728, - "learning_rate": 1.2364473488786574e-05, - "loss": 0.0468, + "epoch": 5.878508837071142, + "grad_norm": 1.707190990447998, + "learning_rate": 4.121491162928858e-06, + "loss": 0.0638, "step": 39580 }, { - "epoch": 2.9399970295559186, - "grad_norm": 1.2432461977005005, - "learning_rate": 1.2360017822664489e-05, - "loss": 0.0836, + "epoch": 5.879994059111837, + "grad_norm": 0.5266190767288208, + "learning_rate": 4.1200059408881635e-06, + "loss": 0.0563, "step": 39590 }, { - "epoch": 2.940739640576266, - "grad_norm": 0.7977986335754395, - "learning_rate": 1.2355562156542404e-05, - "loss": 0.076, + "epoch": 5.881479281152532, + "grad_norm": 1.176369547843933, + "learning_rate": 4.118520718847468e-06, + "loss": 0.0657, "step": 39600 }, { - "epoch": 2.9414822515966135, - "grad_norm": 1.1581476926803589, - "learning_rate": 1.2351106490420317e-05, - "loss": 0.0622, + "epoch": 5.882964503193227, + "grad_norm": 1.1126110553741455, + "learning_rate": 4.117035496806772e-06, + "loss": 0.0436, "step": 39610 }, { - "epoch": 2.9422248626169614, - "grad_norm": 3.3563334941864014, - "learning_rate": 1.2346650824298234e-05, - "loss": 0.0713, + "epoch": 5.884449725233923, + "grad_norm": 0.798716127872467, + "learning_rate": 4.115550274766078e-06, + "loss": 0.0571, "step": 39620 }, { - "epoch": 2.942967473637309, - "grad_norm": 1.305243968963623, - "learning_rate": 1.2342195158176147e-05, - "loss": 0.0886, + "epoch": 5.885934947274618, + "grad_norm": 0.43753018975257874, + "learning_rate": 4.114065052725383e-06, + "loss": 0.0524, "step": 39630 }, { - "epoch": 2.9437100846576563, - "grad_norm": 0.5178385376930237, - "learning_rate": 1.2337739492054062e-05, - "loss": 0.0861, + "epoch": 5.8874201693153125, + "grad_norm": 0.7865732908248901, + "learning_rate": 4.112579830684687e-06, + "loss": 0.0595, "step": 39640 }, { - "epoch": 2.9444526956780037, - "grad_norm": 0.6791629195213318, - "learning_rate": 1.2333283825931977e-05, - "loss": 0.0655, + "epoch": 5.888905391356007, + "grad_norm": 0.4022800028324127, + "learning_rate": 4.111094608643993e-06, + "loss": 0.0661, "step": 39650 }, { - "epoch": 2.945195306698351, - "grad_norm": 1.124177098274231, - "learning_rate": 1.2328828159809892e-05, - "loss": 0.08, + "epoch": 5.890390613396702, + "grad_norm": 1.2196767330169678, + "learning_rate": 4.109609386603298e-06, + "loss": 0.0477, "step": 39660 }, { - "epoch": 2.945937917718699, - "grad_norm": 1.458204984664917, - "learning_rate": 1.2324372493687807e-05, - "loss": 0.0777, + "epoch": 5.891875835437398, + "grad_norm": 0.7821894288063049, + "learning_rate": 4.108124164562602e-06, + "loss": 0.0499, "step": 39670 }, { - "epoch": 2.9466805287390465, - "grad_norm": 2.084416389465332, - "learning_rate": 1.231991682756572e-05, - "loss": 0.0814, + "epoch": 5.893361057478093, + "grad_norm": 1.9917155504226685, + "learning_rate": 4.106638942521908e-06, + "loss": 0.0934, "step": 39680 }, { - "epoch": 2.947423139759394, - "grad_norm": 1.0431878566741943, - "learning_rate": 1.2315461161443637e-05, - "loss": 0.0686, + "epoch": 5.894846279518788, + "grad_norm": 0.9341873526573181, + "learning_rate": 4.105153720481213e-06, + "loss": 0.0558, "step": 39690 }, { - "epoch": 2.9481657507797414, - "grad_norm": 0.5064348578453064, - "learning_rate": 1.2311005495321552e-05, - "loss": 0.0676, + "epoch": 5.896331501559483, + "grad_norm": 0.6423032879829407, + "learning_rate": 4.103668498440517e-06, + "loss": 0.052, "step": 39700 }, { - "epoch": 2.948908361800089, - "grad_norm": 2.466647148132324, - "learning_rate": 1.2306549829199465e-05, - "loss": 0.072, + "epoch": 5.897816723600179, + "grad_norm": 0.8528935313224792, + "learning_rate": 4.102183276399822e-06, + "loss": 0.0638, "step": 39710 }, { - "epoch": 2.9496509728204368, - "grad_norm": 0.9796644449234009, - "learning_rate": 1.230209416307738e-05, - "loss": 0.0493, + "epoch": 5.8993019456408735, + "grad_norm": 0.3458503484725952, + "learning_rate": 4.100698054359127e-06, + "loss": 0.0495, "step": 39720 }, { - "epoch": 2.950393583840784, - "grad_norm": 1.4864760637283325, - "learning_rate": 1.2297638496955295e-05, - "loss": 0.068, + "epoch": 5.900787167681568, + "grad_norm": 0.5680827498435974, + "learning_rate": 4.099212832318432e-06, + "loss": 0.0576, "step": 39730 }, { - "epoch": 2.9511361948611317, - "grad_norm": 1.9162673950195312, - "learning_rate": 1.229318283083321e-05, - "loss": 0.0799, + "epoch": 5.902272389722263, + "grad_norm": 0.38053596019744873, + "learning_rate": 4.097727610277737e-06, + "loss": 0.0563, "step": 39740 }, { - "epoch": 2.9518788058814796, - "grad_norm": 1.276904582977295, - "learning_rate": 1.2288727164711124e-05, - "loss": 0.0578, + "epoch": 5.903757611762958, + "grad_norm": 0.666007936000824, + "learning_rate": 4.096242388237042e-06, + "loss": 0.0556, "step": 39750 }, { - "epoch": 2.9526214169018266, - "grad_norm": 1.9684635400772095, - "learning_rate": 1.2284271498589039e-05, - "loss": 0.0608, + "epoch": 5.905242833803654, + "grad_norm": 0.8363524079322815, + "learning_rate": 4.0947571661963465e-06, + "loss": 0.0613, "step": 39760 }, { - "epoch": 2.9533640279221745, - "grad_norm": 1.3042725324630737, - "learning_rate": 1.2279815832466955e-05, - "loss": 0.0509, + "epoch": 5.906728055844349, + "grad_norm": 1.3122344017028809, + "learning_rate": 4.093271944155652e-06, + "loss": 0.0713, "step": 39770 }, { - "epoch": 2.954106638942522, - "grad_norm": 1.1622141599655151, - "learning_rate": 1.2275360166344869e-05, - "loss": 0.0634, + "epoch": 5.908213277885044, + "grad_norm": 0.5094563364982605, + "learning_rate": 4.091786722114956e-06, + "loss": 0.0596, "step": 39780 }, { - "epoch": 2.9548492499628694, - "grad_norm": 3.6292014122009277, - "learning_rate": 1.2270904500222784e-05, - "loss": 0.0781, + "epoch": 5.909698499925739, + "grad_norm": 1.2744078636169434, + "learning_rate": 4.0903015000742615e-06, + "loss": 0.0558, "step": 39790 }, { - "epoch": 2.9555918609832172, - "grad_norm": 0.8196433186531067, - "learning_rate": 1.2266448834100699e-05, - "loss": 0.0627, + "epoch": 5.911183721966434, + "grad_norm": 0.8067665100097656, + "learning_rate": 4.088816278033567e-06, + "loss": 0.0535, "step": 39800 }, { - "epoch": 2.9563344720035647, - "grad_norm": 2.011394739151001, - "learning_rate": 1.2261993167978614e-05, - "loss": 0.0516, + "epoch": 5.912668944007129, + "grad_norm": 0.9969848990440369, + "learning_rate": 4.087331055992871e-06, + "loss": 0.067, "step": 39810 }, { - "epoch": 2.957077083023912, - "grad_norm": 0.2600031793117523, - "learning_rate": 1.2257537501856529e-05, - "loss": 0.0698, + "epoch": 5.914154166047824, + "grad_norm": 0.7039811015129089, + "learning_rate": 4.0858458339521765e-06, + "loss": 0.0712, "step": 39820 }, { - "epoch": 2.9578196940442596, - "grad_norm": 1.4970747232437134, - "learning_rate": 1.2253081835734442e-05, - "loss": 0.0725, + "epoch": 5.915639388088519, + "grad_norm": 0.5828683972358704, + "learning_rate": 4.084360611911481e-06, + "loss": 0.0605, "step": 39830 }, { - "epoch": 2.958562305064607, - "grad_norm": 0.6336455941200256, - "learning_rate": 1.2248626169612359e-05, - "loss": 0.0471, + "epoch": 5.917124610129214, + "grad_norm": 1.289592981338501, + "learning_rate": 4.082875389870786e-06, + "loss": 0.0649, "step": 39840 }, { - "epoch": 2.959304916084955, - "grad_norm": 1.8660321235656738, - "learning_rate": 1.2244170503490272e-05, - "loss": 0.0905, + "epoch": 5.91860983216991, + "grad_norm": 0.8193347454071045, + "learning_rate": 4.081390167830091e-06, + "loss": 0.0717, "step": 39850 }, { - "epoch": 2.9600475271053024, - "grad_norm": 2.4397964477539062, - "learning_rate": 1.2239714837368187e-05, - "loss": 0.0487, + "epoch": 5.920095054210605, + "grad_norm": 0.9378249049186707, + "learning_rate": 4.079904945789396e-06, + "loss": 0.0591, "step": 39860 }, { - "epoch": 2.96079013812565, - "grad_norm": 2.365588426589966, - "learning_rate": 1.22352591712461e-05, - "loss": 0.0774, + "epoch": 5.9215802762513, + "grad_norm": 1.126220941543579, + "learning_rate": 4.0784197237487e-06, + "loss": 0.0723, "step": 39870 }, { - "epoch": 2.9615327491459973, - "grad_norm": 2.236955404281616, - "learning_rate": 1.2230803505124017e-05, - "loss": 0.0712, + "epoch": 5.923065498291995, + "grad_norm": 1.3452033996582031, + "learning_rate": 4.076934501708006e-06, + "loss": 0.0636, "step": 39880 }, { - "epoch": 2.9622753601663447, - "grad_norm": 0.8604252338409424, - "learning_rate": 1.2226347839001932e-05, - "loss": 0.0643, + "epoch": 5.9245507203326895, + "grad_norm": 0.8854779601097107, + "learning_rate": 4.075449279667311e-06, + "loss": 0.074, "step": 39890 }, { - "epoch": 2.9630179711866926, - "grad_norm": 2.3100244998931885, - "learning_rate": 1.2221892172879845e-05, - "loss": 0.062, + "epoch": 5.926035942373385, + "grad_norm": 0.6700276136398315, + "learning_rate": 4.073964057626615e-06, + "loss": 0.0575, "step": 39900 }, { - "epoch": 2.96376058220704, - "grad_norm": 1.03587007522583, - "learning_rate": 1.221743650675776e-05, - "loss": 0.0636, + "epoch": 5.92752116441408, + "grad_norm": 0.5441814661026001, + "learning_rate": 4.07247883558592e-06, + "loss": 0.0543, "step": 39910 }, { - "epoch": 2.9645031932273875, - "grad_norm": 0.37145113945007324, - "learning_rate": 1.2212980840635675e-05, - "loss": 0.0633, + "epoch": 5.929006386454775, + "grad_norm": 0.8362489342689514, + "learning_rate": 4.070993613545226e-06, + "loss": 0.0563, "step": 39920 }, { - "epoch": 2.965245804247735, - "grad_norm": 2.6632423400878906, - "learning_rate": 1.220852517451359e-05, - "loss": 0.0567, + "epoch": 5.93049160849547, + "grad_norm": 0.7308693528175354, + "learning_rate": 4.06950839150453e-06, + "loss": 0.0797, "step": 39930 }, { - "epoch": 2.9659884152680824, - "grad_norm": 1.9122623205184937, - "learning_rate": 1.2204069508391505e-05, - "loss": 0.0449, + "epoch": 5.931976830536165, + "grad_norm": 0.3876124322414398, + "learning_rate": 4.068023169463835e-06, + "loss": 0.0688, "step": 39940 }, { - "epoch": 2.9667310262884303, - "grad_norm": 0.8057365417480469, - "learning_rate": 1.219961384226942e-05, - "loss": 0.0468, + "epoch": 5.933462052576861, + "grad_norm": 0.6457473635673523, + "learning_rate": 4.06653794742314e-06, + "loss": 0.0607, "step": 39950 }, { - "epoch": 2.967473637308778, - "grad_norm": 0.6970472931861877, - "learning_rate": 1.2195158176147335e-05, - "loss": 0.0478, + "epoch": 5.934947274617556, + "grad_norm": 0.6994035243988037, + "learning_rate": 4.065052725382445e-06, + "loss": 0.055, "step": 39960 }, { - "epoch": 2.9682162483291252, - "grad_norm": 1.0414628982543945, - "learning_rate": 1.2190702510025248e-05, - "loss": 0.0621, + "epoch": 5.9364324966582505, + "grad_norm": 2.109799861907959, + "learning_rate": 4.06356750334175e-06, + "loss": 0.0857, "step": 39970 }, { - "epoch": 2.9689588593494727, - "grad_norm": 1.28219735622406, - "learning_rate": 1.2186246843903163e-05, - "loss": 0.0494, + "epoch": 5.937917718698945, + "grad_norm": 1.9049144983291626, + "learning_rate": 4.062082281301055e-06, + "loss": 0.0681, "step": 39980 }, { - "epoch": 2.96970147036982, - "grad_norm": 2.46976900100708, - "learning_rate": 1.2181791177781078e-05, - "loss": 0.0879, + "epoch": 5.93940294073964, + "grad_norm": 1.0914199352264404, + "learning_rate": 4.06059705926036e-06, + "loss": 0.0597, "step": 39990 }, { - "epoch": 2.970444081390168, - "grad_norm": 1.4787884950637817, - "learning_rate": 1.2177335511658993e-05, - "loss": 0.0699, + "epoch": 5.940888162780336, + "grad_norm": 0.5895785689353943, + "learning_rate": 4.059111837219665e-06, + "loss": 0.0526, "step": 40000 }, { - "epoch": 2.9711866924105155, - "grad_norm": 1.21670401096344, - "learning_rate": 1.2172879845536908e-05, - "loss": 0.073, + "epoch": 5.942373384821031, + "grad_norm": 1.0604206323623657, + "learning_rate": 4.057626615178969e-06, + "loss": 0.061, "step": 40010 }, { - "epoch": 2.971929303430863, - "grad_norm": 0.8801470398902893, - "learning_rate": 1.2168424179414822e-05, - "loss": 0.0288, + "epoch": 5.943858606861726, + "grad_norm": 0.4064539670944214, + "learning_rate": 4.0561413931382744e-06, + "loss": 0.0541, "step": 40020 }, { - "epoch": 2.9726719144512104, - "grad_norm": 1.1613928079605103, - "learning_rate": 1.2163968513292738e-05, - "loss": 0.0737, + "epoch": 5.945343828902421, + "grad_norm": 1.6338391304016113, + "learning_rate": 4.05465617109758e-06, + "loss": 0.0765, "step": 40030 }, { - "epoch": 2.973414525471558, - "grad_norm": 1.54849112033844, - "learning_rate": 1.2159512847170652e-05, - "loss": 0.1064, + "epoch": 5.946829050943116, + "grad_norm": 0.4018402695655823, + "learning_rate": 4.053170949056884e-06, + "loss": 0.0599, "step": 40040 }, { - "epoch": 2.9741571364919057, - "grad_norm": 0.6201350688934326, - "learning_rate": 1.2155057181048567e-05, - "loss": 0.0259, + "epoch": 5.948314272983811, + "grad_norm": 0.8363461494445801, + "learning_rate": 4.0516857270161894e-06, + "loss": 0.0542, "step": 40050 }, { - "epoch": 2.974899747512253, - "grad_norm": 1.4951953887939453, - "learning_rate": 1.2150601514926483e-05, - "loss": 0.0576, + "epoch": 5.949799495024506, + "grad_norm": 1.554994821548462, + "learning_rate": 4.050200504975495e-06, + "loss": 0.0495, "step": 40060 }, { - "epoch": 2.9756423585326006, - "grad_norm": 2.557687520980835, - "learning_rate": 1.2146145848804397e-05, - "loss": 0.0804, + "epoch": 5.951284717065201, + "grad_norm": 0.9533831477165222, + "learning_rate": 4.048715282934799e-06, + "loss": 0.0451, "step": 40070 }, { - "epoch": 2.976384969552948, - "grad_norm": 1.9214116334915161, - "learning_rate": 1.2141690182682312e-05, - "loss": 0.0941, + "epoch": 5.952769939105896, + "grad_norm": 0.3587993085384369, + "learning_rate": 4.047230060894104e-06, + "loss": 0.0463, "step": 40080 }, { - "epoch": 2.9771275805732955, - "grad_norm": 1.7784258127212524, - "learning_rate": 1.2137234516560225e-05, - "loss": 0.0808, + "epoch": 5.954255161146591, + "grad_norm": 1.0116937160491943, + "learning_rate": 4.045744838853409e-06, + "loss": 0.0614, "step": 40090 }, { - "epoch": 2.9778701915936434, - "grad_norm": 2.0200514793395996, - "learning_rate": 1.2132778850438142e-05, - "loss": 0.0691, + "epoch": 5.955740383187287, + "grad_norm": 0.9308059215545654, + "learning_rate": 4.044259616812714e-06, + "loss": 0.0542, "step": 40100 }, { - "epoch": 2.978612802613991, - "grad_norm": 2.5777747631073, - "learning_rate": 1.2128323184316057e-05, - "loss": 0.079, + "epoch": 5.957225605227982, + "grad_norm": 1.0829479694366455, + "learning_rate": 4.042774394772019e-06, + "loss": 0.0552, "step": 40110 }, { - "epoch": 2.9793554136343383, - "grad_norm": 0.5303300619125366, - "learning_rate": 1.212386751819397e-05, - "loss": 0.0507, + "epoch": 5.958710827268677, + "grad_norm": 0.3777959644794464, + "learning_rate": 4.041289172731324e-06, + "loss": 0.0669, "step": 40120 }, { - "epoch": 2.9800980246546858, - "grad_norm": 1.3025041818618774, - "learning_rate": 1.2119411852071885e-05, - "loss": 0.0579, + "epoch": 5.9601960493093715, + "grad_norm": 1.892651915550232, + "learning_rate": 4.039803950690628e-06, + "loss": 0.0546, "step": 40130 }, { - "epoch": 2.980840635675033, - "grad_norm": 1.2491486072540283, - "learning_rate": 1.21149561859498e-05, - "loss": 0.1129, + "epoch": 5.961681271350066, + "grad_norm": 0.3805493414402008, + "learning_rate": 4.038318728649934e-06, + "loss": 0.0494, "step": 40140 }, { - "epoch": 2.981583246695381, - "grad_norm": 1.2781689167022705, - "learning_rate": 1.2110500519827715e-05, - "loss": 0.0695, + "epoch": 5.963166493390762, + "grad_norm": 1.2674118280410767, + "learning_rate": 4.036833506609239e-06, + "loss": 0.0408, "step": 40150 }, { - "epoch": 2.9823258577157286, - "grad_norm": 0.6310214400291443, - "learning_rate": 1.2106044853705628e-05, - "loss": 0.0625, + "epoch": 5.964651715431457, + "grad_norm": 1.2300618886947632, + "learning_rate": 4.035348284568543e-06, + "loss": 0.0579, "step": 40160 }, { - "epoch": 2.983068468736076, - "grad_norm": 0.8008638024330139, - "learning_rate": 1.2101589187583543e-05, - "loss": 0.0615, + "epoch": 5.966136937472152, + "grad_norm": 1.122583031654358, + "learning_rate": 4.033863062527848e-06, + "loss": 0.0691, "step": 40170 }, { - "epoch": 2.9838110797564235, - "grad_norm": 1.496964454650879, - "learning_rate": 1.209713352146146e-05, - "loss": 0.0569, + "epoch": 5.967622159512847, + "grad_norm": 0.8715793490409851, + "learning_rate": 4.032377840487153e-06, + "loss": 0.0592, "step": 40180 }, { - "epoch": 2.984553690776771, - "grad_norm": 1.446394681930542, - "learning_rate": 1.2092677855339373e-05, - "loss": 0.0606, + "epoch": 5.969107381553542, + "grad_norm": 0.9388965368270874, + "learning_rate": 4.030892618446458e-06, + "loss": 0.0593, "step": 40190 }, { - "epoch": 2.985296301797119, - "grad_norm": 2.3738341331481934, - "learning_rate": 1.2088222189217288e-05, - "loss": 0.0918, + "epoch": 5.970592603594238, + "grad_norm": 0.6892751455307007, + "learning_rate": 4.029407396405763e-06, + "loss": 0.0458, "step": 40200 }, { - "epoch": 2.9860389128174663, - "grad_norm": 2.0323574542999268, - "learning_rate": 1.2083766523095203e-05, - "loss": 0.0548, + "epoch": 5.9720778256349325, + "grad_norm": 0.6505733132362366, + "learning_rate": 4.027922174365068e-06, + "loss": 0.0557, "step": 40210 }, { - "epoch": 2.9867815238378137, - "grad_norm": 1.84878408908844, - "learning_rate": 1.2079310856973118e-05, - "loss": 0.0715, + "epoch": 5.973563047675627, + "grad_norm": 0.536372184753418, + "learning_rate": 4.026436952324373e-06, + "loss": 0.0512, "step": 40220 }, { - "epoch": 2.987524134858161, - "grad_norm": 3.0860447883605957, - "learning_rate": 1.2074855190851033e-05, - "loss": 0.0663, + "epoch": 5.975048269716322, + "grad_norm": 1.115814208984375, + "learning_rate": 4.024951730283678e-06, + "loss": 0.0546, "step": 40230 }, { - "epoch": 2.9882667458785086, - "grad_norm": 2.3522326946258545, - "learning_rate": 1.2070399524728946e-05, - "loss": 0.0642, + "epoch": 5.976533491757017, + "grad_norm": 1.299026608467102, + "learning_rate": 4.023466508242982e-06, + "loss": 0.0513, "step": 40240 }, { - "epoch": 2.9890093568988565, - "grad_norm": 3.3070790767669678, - "learning_rate": 1.2065943858606863e-05, - "loss": 0.0727, + "epoch": 5.978018713797713, + "grad_norm": 0.5905170440673828, + "learning_rate": 4.0219812862022874e-06, + "loss": 0.0584, "step": 40250 }, { - "epoch": 2.989751967919204, - "grad_norm": 1.6434651613235474, - "learning_rate": 1.2061488192484776e-05, - "loss": 0.0662, + "epoch": 5.979503935838408, + "grad_norm": 0.8045508861541748, + "learning_rate": 4.020496064161593e-06, + "loss": 0.0638, "step": 40260 }, { - "epoch": 2.9904945789395514, - "grad_norm": 1.9186336994171143, - "learning_rate": 1.2057032526362691e-05, - "loss": 0.0882, + "epoch": 5.980989157879103, + "grad_norm": 1.1870166063308716, + "learning_rate": 4.019010842120897e-06, + "loss": 0.0553, "step": 40270 }, { - "epoch": 2.991237189959899, - "grad_norm": 2.3194291591644287, - "learning_rate": 1.2052576860240605e-05, - "loss": 0.0749, + "epoch": 5.982474379919798, + "grad_norm": 0.40031322836875916, + "learning_rate": 4.017525620080202e-06, + "loss": 0.0481, "step": 40280 }, { - "epoch": 2.9919798009802463, - "grad_norm": 1.5101096630096436, - "learning_rate": 1.2048121194118521e-05, - "loss": 0.0563, + "epoch": 5.9839596019604935, + "grad_norm": 0.9006701707839966, + "learning_rate": 4.016040398039508e-06, + "loss": 0.0577, "step": 40290 }, { - "epoch": 2.992722412000594, - "grad_norm": 0.4890212118625641, - "learning_rate": 1.2043665527996436e-05, - "loss": 0.0693, + "epoch": 5.985444824001188, + "grad_norm": 0.5803741812705994, + "learning_rate": 4.014555175998812e-06, + "loss": 0.0607, "step": 40300 }, { - "epoch": 2.9934650230209416, - "grad_norm": 2.9807426929473877, - "learning_rate": 1.203920986187435e-05, - "loss": 0.0661, + "epoch": 5.986930046041883, + "grad_norm": 0.7242305874824524, + "learning_rate": 4.0130699539581166e-06, + "loss": 0.0644, "step": 40310 }, { - "epoch": 2.994207634041289, - "grad_norm": 2.466383934020996, - "learning_rate": 1.2034754195752266e-05, - "loss": 0.0578, + "epoch": 5.988415268082578, + "grad_norm": 1.0055646896362305, + "learning_rate": 4.011584731917422e-06, + "loss": 0.0649, "step": 40320 }, { - "epoch": 2.994950245061637, - "grad_norm": 2.2628114223480225, - "learning_rate": 1.203029852963018e-05, - "loss": 0.0521, + "epoch": 5.989900490123273, + "grad_norm": 0.3593951463699341, + "learning_rate": 4.010099509876727e-06, + "loss": 0.0382, "step": 40330 }, { - "epoch": 2.995692856081984, - "grad_norm": 0.5551472306251526, - "learning_rate": 1.2025842863508095e-05, - "loss": 0.0203, + "epoch": 5.991385712163969, + "grad_norm": 0.23760199546813965, + "learning_rate": 4.0086142878360316e-06, + "loss": 0.0543, "step": 40340 }, { - "epoch": 2.996435467102332, - "grad_norm": 0.6742831468582153, - "learning_rate": 1.202138719738601e-05, - "loss": 0.0729, + "epoch": 5.992870934204664, + "grad_norm": 0.9924305081367493, + "learning_rate": 4.007129065795337e-06, + "loss": 0.0602, "step": 40350 }, { - "epoch": 2.9971780781226793, - "grad_norm": 2.0665626525878906, - "learning_rate": 1.2016931531263925e-05, - "loss": 0.0778, + "epoch": 5.994356156245359, + "grad_norm": 0.8447095155715942, + "learning_rate": 4.005643843754642e-06, + "loss": 0.0511, "step": 40360 }, { - "epoch": 2.997920689143027, - "grad_norm": 1.7712310552597046, - "learning_rate": 1.201247586514184e-05, - "loss": 0.0635, + "epoch": 5.995841378286054, + "grad_norm": 1.5200226306915283, + "learning_rate": 4.0041586217139466e-06, + "loss": 0.0543, "step": 40370 }, { - "epoch": 2.9986633001633747, - "grad_norm": 0.27435049414634705, - "learning_rate": 1.2008020199019753e-05, - "loss": 0.051, + "epoch": 5.9973266003267485, + "grad_norm": 0.6162705421447754, + "learning_rate": 4.002673399673251e-06, + "loss": 0.0462, "step": 40380 }, { - "epoch": 2.999405911183722, - "grad_norm": 2.0611684322357178, - "learning_rate": 1.2003564532897668e-05, - "loss": 0.0496, + "epoch": 5.998811822367444, + "grad_norm": 0.6710197329521179, + "learning_rate": 4.001188177632556e-06, + "loss": 0.0659, "step": 40390 }, { - "epoch": 3.0, - "eval_f1": 0.0, - "eval_loss": 0.055565182119607925, - "eval_runtime": 795.9474, - "eval_samples_per_second": 47.766, - "eval_steps_per_second": 2.986, + "epoch": 6.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.05773457512259483, + "eval_runtime": 212.5572, + "eval_samples_per_second": 178.865, + "eval_steps_per_second": 5.594, "step": 40398 }, { - "epoch": 3.0001485222040696, - "grad_norm": 2.1062331199645996, - "learning_rate": 1.1999108866775585e-05, - "loss": 0.0856, + "epoch": 6.000297044408139, + "grad_norm": 0.789023756980896, + "learning_rate": 3.9997029555918616e-06, + "loss": 0.0558, "step": 40400 }, { - "epoch": 3.000891133224417, - "grad_norm": 0.7246180772781372, - "learning_rate": 1.1994653200653498e-05, - "loss": 0.0521, + "epoch": 6.001782266448834, + "grad_norm": 0.887060821056366, + "learning_rate": 3.998217733551166e-06, + "loss": 0.0514, "step": 40410 }, { - "epoch": 3.0016337442447645, - "grad_norm": 0.7559748888015747, - "learning_rate": 1.1990197534531413e-05, - "loss": 0.0641, + "epoch": 6.003267488489529, + "grad_norm": 0.6187435388565063, + "learning_rate": 3.996732511510471e-06, + "loss": 0.0489, "step": 40420 }, { - "epoch": 3.002376355265112, - "grad_norm": 1.5347598791122437, - "learning_rate": 1.1985741868409326e-05, - "loss": 0.0732, + "epoch": 6.004752710530224, + "grad_norm": 0.9915494918823242, + "learning_rate": 3.995247289469776e-06, + "loss": 0.0643, "step": 40430 }, { - "epoch": 3.00311896628546, - "grad_norm": 0.6457234025001526, - "learning_rate": 1.1981286202287243e-05, - "loss": 0.058, + "epoch": 6.00623793257092, + "grad_norm": 0.8194555640220642, + "learning_rate": 3.993762067429081e-06, + "loss": 0.0729, "step": 40440 }, { - "epoch": 3.0038615773058073, - "grad_norm": 0.716690719127655, - "learning_rate": 1.1976830536165156e-05, - "loss": 0.0528, + "epoch": 6.0077231546116145, + "grad_norm": 0.6049264669418335, + "learning_rate": 3.992276845388386e-06, + "loss": 0.0503, "step": 40450 }, { - "epoch": 3.0046041883261547, - "grad_norm": 3.154327630996704, - "learning_rate": 1.1972374870043071e-05, - "loss": 0.09, + "epoch": 6.0092083766523094, + "grad_norm": 0.9943747520446777, + "learning_rate": 3.990791623347691e-06, + "loss": 0.0452, "step": 40460 }, { - "epoch": 3.005346799346502, - "grad_norm": 0.7682334780693054, - "learning_rate": 1.1967919203920988e-05, - "loss": 0.0764, + "epoch": 6.010693598693004, + "grad_norm": 0.8540377020835876, + "learning_rate": 3.989306401306996e-06, + "loss": 0.0512, "step": 40470 }, { - "epoch": 3.0060894103668496, - "grad_norm": 0.4691618084907532, - "learning_rate": 1.1963463537798901e-05, - "loss": 0.0437, + "epoch": 6.012178820733699, + "grad_norm": 0.9365292191505432, + "learning_rate": 3.9878211792663e-06, + "loss": 0.0633, "step": 40480 }, { - "epoch": 3.0068320213871975, - "grad_norm": 1.4054096937179565, - "learning_rate": 1.1959007871676816e-05, - "loss": 0.065, + "epoch": 6.013664042774395, + "grad_norm": 0.8296148180961609, + "learning_rate": 3.986335957225606e-06, + "loss": 0.0633, "step": 40490 }, { - "epoch": 3.007574632407545, - "grad_norm": 2.800178050994873, - "learning_rate": 1.195455220555473e-05, - "loss": 0.1019, + "epoch": 6.01514926481509, + "grad_norm": 0.6906760334968567, + "learning_rate": 3.98485073518491e-06, + "loss": 0.0568, "step": 40500 }, { - "epoch": 3.0083172434278924, - "grad_norm": 1.4787908792495728, - "learning_rate": 1.1950096539432646e-05, - "loss": 0.0959, + "epoch": 6.016634486855785, + "grad_norm": 0.9065415263175964, + "learning_rate": 3.983365513144215e-06, + "loss": 0.0665, "step": 40510 }, { - "epoch": 3.00905985444824, - "grad_norm": 0.8632726073265076, - "learning_rate": 1.1945640873310561e-05, - "loss": 0.062, + "epoch": 6.01811970889648, + "grad_norm": 0.38411182165145874, + "learning_rate": 3.981880291103521e-06, + "loss": 0.056, "step": 40520 }, { - "epoch": 3.0098024654685878, - "grad_norm": 0.41822558641433716, - "learning_rate": 1.1941185207188475e-05, - "loss": 0.0676, + "epoch": 6.0196049309371755, + "grad_norm": 0.6814888119697571, + "learning_rate": 3.980395069062825e-06, + "loss": 0.0489, "step": 40530 }, { - "epoch": 3.010545076488935, - "grad_norm": 1.6045604944229126, - "learning_rate": 1.193672954106639e-05, - "loss": 0.0548, + "epoch": 6.02109015297787, + "grad_norm": 1.3805969953536987, + "learning_rate": 3.9789098470221295e-06, + "loss": 0.0561, "step": 40540 }, { - "epoch": 3.0112876875092827, - "grad_norm": 4.851860523223877, - "learning_rate": 1.1932273874944304e-05, - "loss": 0.0509, + "epoch": 6.022575375018565, + "grad_norm": 1.1943320035934448, + "learning_rate": 3.977424624981435e-06, + "loss": 0.0547, "step": 40550 }, { - "epoch": 3.01203029852963, - "grad_norm": 1.4133330583572388, - "learning_rate": 1.192781820882222e-05, - "loss": 0.0475, + "epoch": 6.02406059705926, + "grad_norm": 0.49964407086372375, + "learning_rate": 3.97593940294074e-06, + "loss": 0.0422, "step": 40560 }, { - "epoch": 3.0127729095499776, - "grad_norm": 1.6727176904678345, - "learning_rate": 1.1923362542700133e-05, - "loss": 0.0687, + "epoch": 6.025545819099955, + "grad_norm": 0.6587172150611877, + "learning_rate": 3.9744541809000445e-06, + "loss": 0.0711, "step": 40570 }, { - "epoch": 3.0135155205703255, - "grad_norm": 1.9212983846664429, - "learning_rate": 1.1918906876578048e-05, - "loss": 0.0982, + "epoch": 6.027031041140651, + "grad_norm": 1.0862164497375488, + "learning_rate": 3.97296895885935e-06, + "loss": 0.0494, "step": 40580 }, { - "epoch": 3.014258131590673, - "grad_norm": 2.5236656665802, - "learning_rate": 1.1914451210455964e-05, - "loss": 0.0831, + "epoch": 6.028516263181346, + "grad_norm": 1.1919859647750854, + "learning_rate": 3.971483736818655e-06, + "loss": 0.0492, "step": 40590 }, { - "epoch": 3.0150007426110204, - "grad_norm": 0.8461244106292725, - "learning_rate": 1.1909995544333878e-05, - "loss": 0.0635, + "epoch": 6.030001485222041, + "grad_norm": 1.0630143880844116, + "learning_rate": 3.9699985147779595e-06, + "loss": 0.0666, "step": 40600 }, { - "epoch": 3.015743353631368, - "grad_norm": 1.5794192552566528, - "learning_rate": 1.1905539878211793e-05, - "loss": 0.0718, + "epoch": 6.031486707262736, + "grad_norm": 0.7935609221458435, + "learning_rate": 3.968513292737264e-06, + "loss": 0.0298, "step": 40610 }, { - "epoch": 3.0164859646517153, - "grad_norm": 0.7722788453102112, - "learning_rate": 1.1901084212089708e-05, - "loss": 0.0726, + "epoch": 6.0329719293034305, + "grad_norm": 0.8281997442245483, + "learning_rate": 3.96702807069657e-06, + "loss": 0.0451, "step": 40620 }, { - "epoch": 3.017228575672063, - "grad_norm": 1.9075877666473389, - "learning_rate": 1.1896628545967623e-05, - "loss": 0.0537, + "epoch": 6.034457151344126, + "grad_norm": 0.31102851033210754, + "learning_rate": 3.9655428486558745e-06, + "loss": 0.0701, "step": 40630 }, { - "epoch": 3.0179711866924106, - "grad_norm": 1.057822585105896, - "learning_rate": 1.1892172879845538e-05, - "loss": 0.0839, + "epoch": 6.035942373384821, + "grad_norm": 0.6675470471382141, + "learning_rate": 3.964057626615179e-06, + "loss": 0.0577, "step": 40640 }, { - "epoch": 3.018713797712758, - "grad_norm": 2.4780874252319336, - "learning_rate": 1.1887717213723451e-05, - "loss": 0.0492, + "epoch": 6.037427595425516, + "grad_norm": 1.168430209159851, + "learning_rate": 3.962572404574484e-06, + "loss": 0.0434, "step": 40650 }, { - "epoch": 3.0194564087331055, - "grad_norm": 1.614044189453125, - "learning_rate": 1.1883261547601368e-05, - "loss": 0.0488, + "epoch": 6.038912817466211, + "grad_norm": 1.6854634284973145, + "learning_rate": 3.9610871825337895e-06, + "loss": 0.0683, "step": 40660 }, { - "epoch": 3.020199019753453, - "grad_norm": 0.8229920268058777, - "learning_rate": 1.1878805881479281e-05, - "loss": 0.0803, + "epoch": 6.040398039506906, + "grad_norm": 0.688122034072876, + "learning_rate": 3.959601960493094e-06, + "loss": 0.0568, "step": 40670 }, { - "epoch": 3.020941630773801, - "grad_norm": 0.7119345664978027, - "learning_rate": 1.1874350215357196e-05, - "loss": 0.048, + "epoch": 6.041883261547602, + "grad_norm": 0.9928855895996094, + "learning_rate": 3.958116738452399e-06, + "loss": 0.0485, "step": 40680 }, { - "epoch": 3.0216842417941483, - "grad_norm": 1.8598228693008423, - "learning_rate": 1.1869894549235111e-05, - "loss": 0.0658, + "epoch": 6.043368483588297, + "grad_norm": 0.8839887380599976, + "learning_rate": 3.956631516411704e-06, + "loss": 0.0527, "step": 40690 }, { - "epoch": 3.0224268528144957, - "grad_norm": 0.4016118347644806, - "learning_rate": 1.1865438883113026e-05, - "loss": 0.0351, + "epoch": 6.0448537056289915, + "grad_norm": 2.176649570465088, + "learning_rate": 3.955146294371009e-06, + "loss": 0.0759, "step": 40700 }, { - "epoch": 3.023169463834843, - "grad_norm": 1.3350958824157715, - "learning_rate": 1.1860983216990941e-05, - "loss": 0.0553, + "epoch": 6.046338927669686, + "grad_norm": 0.7272756099700928, + "learning_rate": 3.953661072330313e-06, + "loss": 0.0462, "step": 40710 }, { - "epoch": 3.0239120748551906, - "grad_norm": 0.8724972605705261, - "learning_rate": 1.1856527550868854e-05, - "loss": 0.0677, + "epoch": 6.047824149710381, + "grad_norm": 0.5668994784355164, + "learning_rate": 3.952175850289619e-06, + "loss": 0.0561, "step": 40720 }, { - "epoch": 3.0246546858755385, - "grad_norm": 0.5378849506378174, - "learning_rate": 1.1852071884746771e-05, - "loss": 0.0623, + "epoch": 6.049309371751077, + "grad_norm": 0.7731595635414124, + "learning_rate": 3.950690628248924e-06, + "loss": 0.074, "step": 40730 }, { - "epoch": 3.025397296895886, - "grad_norm": 0.3792591094970703, - "learning_rate": 1.1847616218624684e-05, - "loss": 0.0654, + "epoch": 6.050794593791772, + "grad_norm": 0.7913956642150879, + "learning_rate": 3.949205406208228e-06, + "loss": 0.0644, "step": 40740 }, { - "epoch": 3.0261399079162334, - "grad_norm": 0.3281194567680359, - "learning_rate": 1.18431605525026e-05, - "loss": 0.0862, + "epoch": 6.052279815832467, + "grad_norm": 0.48246562480926514, + "learning_rate": 3.947720184167534e-06, + "loss": 0.0449, "step": 40750 }, { - "epoch": 3.026882518936581, - "grad_norm": 1.0597580671310425, - "learning_rate": 1.1838704886380514e-05, - "loss": 0.0651, + "epoch": 6.053765037873162, + "grad_norm": 1.6649562120437622, + "learning_rate": 3.946234962126838e-06, + "loss": 0.0695, "step": 40760 }, { - "epoch": 3.0276251299569283, - "grad_norm": 0.7202780842781067, - "learning_rate": 1.183424922025843e-05, - "loss": 0.0742, + "epoch": 6.055250259913857, + "grad_norm": 0.8541234135627747, + "learning_rate": 3.944749740086143e-06, + "loss": 0.0526, "step": 40770 }, { - "epoch": 3.0283677409772762, - "grad_norm": 0.5692980885505676, - "learning_rate": 1.1829793554136344e-05, - "loss": 0.0685, + "epoch": 6.0567354819545525, + "grad_norm": 1.0169918537139893, + "learning_rate": 3.943264518045448e-06, + "loss": 0.0619, "step": 40780 }, { - "epoch": 3.0291103519976237, - "grad_norm": 0.774348258972168, - "learning_rate": 1.1825337888014258e-05, - "loss": 0.0549, + "epoch": 6.058220703995247, + "grad_norm": 1.1719691753387451, + "learning_rate": 3.941779296004753e-06, + "loss": 0.0566, "step": 40790 }, { - "epoch": 3.029852963017971, - "grad_norm": 1.022932529449463, - "learning_rate": 1.1820882221892173e-05, - "loss": 0.0687, + "epoch": 6.059705926035942, + "grad_norm": 0.8269428014755249, + "learning_rate": 3.9402940739640575e-06, + "loss": 0.0501, "step": 40800 }, { - "epoch": 3.0305955740383186, - "grad_norm": 0.4947283864021301, - "learning_rate": 1.181642655577009e-05, - "loss": 0.056, + "epoch": 6.061191148076637, + "grad_norm": 1.1896958351135254, + "learning_rate": 3.938808851923363e-06, + "loss": 0.0516, "step": 40810 }, { - "epoch": 3.0313381850586665, - "grad_norm": 1.4515385627746582, - "learning_rate": 1.1811970889648003e-05, - "loss": 0.0671, + "epoch": 6.062676370117333, + "grad_norm": 1.3691949844360352, + "learning_rate": 3.937323629882668e-06, + "loss": 0.0591, "step": 40820 }, { - "epoch": 3.032080796079014, - "grad_norm": 2.044039487838745, - "learning_rate": 1.1807515223525918e-05, - "loss": 0.0713, + "epoch": 6.064161592158028, + "grad_norm": 0.9588063359260559, + "learning_rate": 3.9358384078419725e-06, + "loss": 0.0489, "step": 40830 }, { - "epoch": 3.0328234070993614, - "grad_norm": 0.6720436215400696, - "learning_rate": 1.1803059557403831e-05, - "loss": 0.0685, + "epoch": 6.065646814198723, + "grad_norm": 1.0517445802688599, + "learning_rate": 3.934353185801277e-06, + "loss": 0.0603, "step": 40840 }, { - "epoch": 3.033566018119709, - "grad_norm": 1.2701702117919922, - "learning_rate": 1.1798603891281748e-05, - "loss": 0.0871, + "epoch": 6.067132036239418, + "grad_norm": 1.123839020729065, + "learning_rate": 3.932867963760583e-06, + "loss": 0.0402, "step": 40850 }, { - "epoch": 3.0343086291400563, - "grad_norm": 0.6899190545082092, - "learning_rate": 1.179414822515966e-05, - "loss": 0.0761, + "epoch": 6.068617258280113, + "grad_norm": 1.0038114786148071, + "learning_rate": 3.9313827417198875e-06, + "loss": 0.0615, "step": 40860 }, { - "epoch": 3.035051240160404, - "grad_norm": 2.306637763977051, - "learning_rate": 1.1789692559037576e-05, - "loss": 0.0697, + "epoch": 6.070102480320808, + "grad_norm": 0.554094672203064, + "learning_rate": 3.929897519679192e-06, + "loss": 0.06, "step": 40870 }, { - "epoch": 3.0357938511807516, - "grad_norm": 0.6029354333877563, - "learning_rate": 1.1785236892915493e-05, - "loss": 0.0677, + "epoch": 6.071587702361503, + "grad_norm": 1.453340768814087, + "learning_rate": 3.928412297638497e-06, + "loss": 0.0546, "step": 40880 }, { - "epoch": 3.036536462201099, - "grad_norm": 2.0085299015045166, - "learning_rate": 1.1780781226793406e-05, - "loss": 0.0726, + "epoch": 6.073072924402198, + "grad_norm": 1.0778160095214844, + "learning_rate": 3.9269270755978025e-06, + "loss": 0.0594, "step": 40890 }, { - "epoch": 3.0372790732214465, - "grad_norm": 1.0071104764938354, - "learning_rate": 1.177632556067132e-05, - "loss": 0.0556, + "epoch": 6.074558146442893, + "grad_norm": 1.0551393032073975, + "learning_rate": 3.925441853557107e-06, + "loss": 0.052, "step": 40900 }, { - "epoch": 3.038021684241794, - "grad_norm": 1.3485918045043945, - "learning_rate": 1.1771869894549234e-05, - "loss": 0.0403, + "epoch": 6.076043368483588, + "grad_norm": 1.485189437866211, + "learning_rate": 3.923956631516412e-06, + "loss": 0.0466, "step": 40910 }, { - "epoch": 3.038764295262142, - "grad_norm": 0.6336653232574463, - "learning_rate": 1.176741422842715e-05, - "loss": 0.0556, + "epoch": 6.077528590524284, + "grad_norm": 0.8034722208976746, + "learning_rate": 3.9224714094757175e-06, + "loss": 0.0542, "step": 40920 }, { - "epoch": 3.0395069062824893, - "grad_norm": 0.9142085909843445, - "learning_rate": 1.1762958562305066e-05, - "loss": 0.0506, + "epoch": 6.079013812564979, + "grad_norm": 0.6016208529472351, + "learning_rate": 3.920986187435022e-06, + "loss": 0.0586, "step": 40930 }, { - "epoch": 3.0402495173028368, - "grad_norm": 2.9091007709503174, - "learning_rate": 1.1758502896182979e-05, - "loss": 0.075, + "epoch": 6.0804990346056735, + "grad_norm": 1.325735092163086, + "learning_rate": 3.919500965394326e-06, + "loss": 0.0545, "step": 40940 }, { - "epoch": 3.040992128323184, - "grad_norm": 1.0477584600448608, - "learning_rate": 1.1754047230060894e-05, - "loss": 0.0697, + "epoch": 6.081984256646368, + "grad_norm": 1.393202304840088, + "learning_rate": 3.918015743353632e-06, + "loss": 0.0534, "step": 40950 }, { - "epoch": 3.0417347393435317, - "grad_norm": 0.5181246995925903, - "learning_rate": 1.1749591563938809e-05, - "loss": 0.0808, + "epoch": 6.083469478687063, + "grad_norm": 0.6158031225204468, + "learning_rate": 3.916530521312937e-06, + "loss": 0.0582, "step": 40960 }, { - "epoch": 3.0424773503638796, - "grad_norm": 2.0747780799865723, - "learning_rate": 1.1745135897816724e-05, - "loss": 0.0793, + "epoch": 6.084954700727759, + "grad_norm": 0.6937658190727234, + "learning_rate": 3.915045299272241e-06, + "loss": 0.0434, "step": 40970 }, { - "epoch": 3.043219961384227, - "grad_norm": 1.4035779237747192, - "learning_rate": 1.1740680231694637e-05, - "loss": 0.0503, + "epoch": 6.086439922768454, + "grad_norm": 0.5090669989585876, + "learning_rate": 3.913560077231547e-06, + "loss": 0.0585, "step": 40980 }, { - "epoch": 3.0439625724045745, - "grad_norm": 1.336506962776184, - "learning_rate": 1.1736224565572554e-05, - "loss": 0.027, + "epoch": 6.087925144809149, + "grad_norm": 0.6960013508796692, + "learning_rate": 3.912074855190852e-06, + "loss": 0.0682, "step": 40990 }, { - "epoch": 3.044705183424922, - "grad_norm": 1.5096478462219238, - "learning_rate": 1.1731768899450469e-05, - "loss": 0.0526, + "epoch": 6.089410366849844, + "grad_norm": 1.6729462146759033, + "learning_rate": 3.910589633150156e-06, + "loss": 0.0846, "step": 41000 }, { - "epoch": 3.0454477944452694, - "grad_norm": 0.9661771655082703, - "learning_rate": 1.1727313233328382e-05, - "loss": 0.0593, + "epoch": 6.090895588890539, + "grad_norm": 0.41924574971199036, + "learning_rate": 3.909104411109461e-06, + "loss": 0.0695, "step": 41010 }, { - "epoch": 3.0461904054656173, - "grad_norm": 2.019800901412964, - "learning_rate": 1.1722857567206297e-05, - "loss": 0.0584, + "epoch": 6.0923808109312345, + "grad_norm": 0.9897835850715637, + "learning_rate": 3.907619189068766e-06, + "loss": 0.0571, "step": 41020 }, { - "epoch": 3.0469330164859647, - "grad_norm": 1.9931749105453491, - "learning_rate": 1.1718401901084212e-05, - "loss": 0.0661, + "epoch": 6.093866032971929, + "grad_norm": 0.7119272947311401, + "learning_rate": 3.906133967028071e-06, + "loss": 0.0722, "step": 41030 }, { - "epoch": 3.047675627506312, - "grad_norm": 1.6220228672027588, - "learning_rate": 1.1713946234962127e-05, - "loss": 0.0753, + "epoch": 6.095351255012624, + "grad_norm": 0.5422357320785522, + "learning_rate": 3.904648744987376e-06, + "loss": 0.0471, "step": 41040 }, { - "epoch": 3.0484182385266596, - "grad_norm": 0.8533129692077637, - "learning_rate": 1.1709490568840042e-05, - "loss": 0.0538, + "epoch": 6.096836477053319, + "grad_norm": 0.6966010928153992, + "learning_rate": 3.903163522946681e-06, + "loss": 0.0579, "step": 41050 }, { - "epoch": 3.0491608495470075, - "grad_norm": 2.1048035621643066, - "learning_rate": 1.1705034902717956e-05, - "loss": 0.0699, + "epoch": 6.098321699094015, + "grad_norm": 1.064351201057434, + "learning_rate": 3.9016783009059855e-06, + "loss": 0.0526, "step": 41060 }, { - "epoch": 3.049903460567355, - "grad_norm": 1.1397531032562256, - "learning_rate": 1.1700579236595872e-05, - "loss": 0.0385, + "epoch": 6.09980692113471, + "grad_norm": 0.25419992208480835, + "learning_rate": 3.900193078865291e-06, + "loss": 0.0447, "step": 41070 }, { - "epoch": 3.0506460715877024, - "grad_norm": 1.218272089958191, - "learning_rate": 1.1696123570473786e-05, - "loss": 0.0511, + "epoch": 6.101292143175405, + "grad_norm": 1.352564811706543, + "learning_rate": 3.898707856824595e-06, + "loss": 0.0686, "step": 41080 }, { - "epoch": 3.05138868260805, - "grad_norm": 1.6609820127487183, - "learning_rate": 1.16916679043517e-05, - "loss": 0.0693, + "epoch": 6.1027773652161, + "grad_norm": 0.6406208872795105, + "learning_rate": 3.8972226347839005e-06, + "loss": 0.05, "step": 41090 }, { - "epoch": 3.0521312936283973, - "grad_norm": 0.7362266778945923, - "learning_rate": 1.1687212238229616e-05, - "loss": 0.0834, + "epoch": 6.104262587256795, + "grad_norm": 0.680209755897522, + "learning_rate": 3.895737412743205e-06, + "loss": 0.0462, "step": 41100 }, { - "epoch": 3.052873904648745, - "grad_norm": 1.8617407083511353, - "learning_rate": 1.168275657210753e-05, - "loss": 0.0672, + "epoch": 6.10574780929749, + "grad_norm": 0.8571810126304626, + "learning_rate": 3.89425219070251e-06, + "loss": 0.0793, "step": 41110 }, { - "epoch": 3.0536165156690926, - "grad_norm": 1.8097994327545166, - "learning_rate": 1.1678300905985446e-05, - "loss": 0.0693, + "epoch": 6.107233031338185, + "grad_norm": 0.9361696243286133, + "learning_rate": 3.8927669686618155e-06, + "loss": 0.0585, "step": 41120 }, { - "epoch": 3.05435912668944, - "grad_norm": 0.9621978402137756, - "learning_rate": 1.1673845239863359e-05, - "loss": 0.0697, + "epoch": 6.10871825337888, + "grad_norm": 1.5825921297073364, + "learning_rate": 3.89128174662112e-06, + "loss": 0.0554, "step": 41130 }, { - "epoch": 3.0551017377097875, - "grad_norm": 0.6268504858016968, - "learning_rate": 1.1669389573741276e-05, - "loss": 0.0801, + "epoch": 6.110203475419575, + "grad_norm": 0.8863343596458435, + "learning_rate": 3.889796524580425e-06, + "loss": 0.0589, "step": 41140 }, { - "epoch": 3.055844348730135, - "grad_norm": 0.5146762132644653, - "learning_rate": 1.1664933907619189e-05, - "loss": 0.0516, + "epoch": 6.11168869746027, + "grad_norm": 1.2165135145187378, + "learning_rate": 3.8883113025397305e-06, + "loss": 0.0718, "step": 41150 }, { - "epoch": 3.056586959750483, - "grad_norm": 1.8789063692092896, - "learning_rate": 1.1660478241497104e-05, - "loss": 0.0604, + "epoch": 6.113173919500966, + "grad_norm": 1.1842724084854126, + "learning_rate": 3.886826080499035e-06, + "loss": 0.0721, "step": 41160 }, { - "epoch": 3.0573295707708303, - "grad_norm": 1.16319739818573, - "learning_rate": 1.1656022575375019e-05, - "loss": 0.0819, + "epoch": 6.114659141541661, + "grad_norm": 0.7539393901824951, + "learning_rate": 3.885340858458339e-06, + "loss": 0.0548, "step": 41170 }, { - "epoch": 3.058072181791178, - "grad_norm": 1.3086446523666382, - "learning_rate": 1.1651566909252934e-05, - "loss": 0.0805, + "epoch": 6.116144363582356, + "grad_norm": 0.5141851902008057, + "learning_rate": 3.883855636417645e-06, + "loss": 0.036, "step": 41180 }, { - "epoch": 3.0588147928115252, - "grad_norm": 1.2489312887191772, - "learning_rate": 1.1647111243130849e-05, - "loss": 0.07, + "epoch": 6.1176295856230505, + "grad_norm": 1.1090915203094482, + "learning_rate": 3.88237041437695e-06, + "loss": 0.0628, "step": 41190 }, { - "epoch": 3.0595574038318727, - "grad_norm": 1.8217955827713013, - "learning_rate": 1.1642655577008762e-05, - "loss": 0.0798, + "epoch": 6.119114807663745, + "grad_norm": 0.4115223288536072, + "learning_rate": 3.880885192336254e-06, + "loss": 0.0512, "step": 41200 }, { - "epoch": 3.0603000148522206, - "grad_norm": 1.6634607315063477, - "learning_rate": 1.1638199910886677e-05, - "loss": 0.0426, + "epoch": 6.120600029704441, + "grad_norm": 0.7853782176971436, + "learning_rate": 3.87939997029556e-06, + "loss": 0.0588, "step": 41210 }, { - "epoch": 3.061042625872568, - "grad_norm": 2.512523889541626, - "learning_rate": 1.1633744244764594e-05, - "loss": 0.0639, + "epoch": 6.122085251745136, + "grad_norm": 0.8141002058982849, + "learning_rate": 3.877914748254865e-06, + "loss": 0.057, "step": 41220 }, { - "epoch": 3.0617852368929155, - "grad_norm": 1.9372293949127197, - "learning_rate": 1.1629288578642507e-05, - "loss": 0.1061, + "epoch": 6.123570473785831, + "grad_norm": 1.1658971309661865, + "learning_rate": 3.876429526214169e-06, + "loss": 0.0595, "step": 41230 }, { - "epoch": 3.062527847913263, - "grad_norm": 1.2639974355697632, - "learning_rate": 1.1624832912520422e-05, - "loss": 0.0663, + "epoch": 6.125055695826526, + "grad_norm": 0.9855948090553284, + "learning_rate": 3.874944304173474e-06, + "loss": 0.0437, "step": 41240 }, { - "epoch": 3.0632704589336104, - "grad_norm": 1.19036066532135, - "learning_rate": 1.1620377246398337e-05, - "loss": 0.0901, + "epoch": 6.126540917867221, + "grad_norm": 0.8863463401794434, + "learning_rate": 3.873459082132779e-06, + "loss": 0.0607, "step": 41250 }, { - "epoch": 3.0640130699539583, - "grad_norm": 2.261476993560791, - "learning_rate": 1.1615921580276252e-05, - "loss": 0.1083, + "epoch": 6.128026139907917, + "grad_norm": 0.5298382639884949, + "learning_rate": 3.871973860092084e-06, + "loss": 0.0523, "step": 41260 }, { - "epoch": 3.0647556809743057, - "grad_norm": 1.9970070123672485, - "learning_rate": 1.1611465914154165e-05, - "loss": 0.0546, + "epoch": 6.1295113619486115, + "grad_norm": 0.23882558941841125, + "learning_rate": 3.870488638051389e-06, + "loss": 0.052, "step": 41270 }, { - "epoch": 3.065498291994653, - "grad_norm": 1.31303870677948, - "learning_rate": 1.160701024803208e-05, - "loss": 0.0756, + "epoch": 6.130996583989306, + "grad_norm": 1.4826135635375977, + "learning_rate": 3.869003416010694e-06, + "loss": 0.0688, "step": 41280 }, { - "epoch": 3.0662409030150006, - "grad_norm": 1.9843600988388062, - "learning_rate": 1.1602554581909997e-05, - "loss": 0.0427, + "epoch": 6.132481806030001, + "grad_norm": 0.44090694189071655, + "learning_rate": 3.867518193969999e-06, + "loss": 0.0564, "step": 41290 }, { - "epoch": 3.066983514035348, - "grad_norm": 2.369338035583496, - "learning_rate": 1.159809891578791e-05, - "loss": 0.0688, + "epoch": 6.133967028070696, + "grad_norm": 1.0146280527114868, + "learning_rate": 3.866032971929304e-06, + "loss": 0.0625, "step": 41300 }, { - "epoch": 3.067726125055696, - "grad_norm": 1.3789310455322266, - "learning_rate": 1.1593643249665825e-05, - "loss": 0.0755, + "epoch": 6.135452250111392, + "grad_norm": 0.3917138874530792, + "learning_rate": 3.864547749888608e-06, + "loss": 0.0495, "step": 41310 }, { - "epoch": 3.0684687360760434, - "grad_norm": 2.209085702896118, - "learning_rate": 1.1589187583543739e-05, - "loss": 0.08, + "epoch": 6.136937472152087, + "grad_norm": 0.6822810769081116, + "learning_rate": 3.8630625278479135e-06, + "loss": 0.066, "step": 41320 }, { - "epoch": 3.069211347096391, - "grad_norm": 1.1442301273345947, - "learning_rate": 1.1584731917421655e-05, - "loss": 0.0652, + "epoch": 6.138422694192782, + "grad_norm": 0.5941704511642456, + "learning_rate": 3.861577305807219e-06, + "loss": 0.0604, "step": 41330 }, { - "epoch": 3.0699539581167383, - "grad_norm": 1.0715082883834839, - "learning_rate": 1.158027625129957e-05, - "loss": 0.0595, + "epoch": 6.139907916233477, + "grad_norm": 0.5531407594680786, + "learning_rate": 3.860092083766523e-06, + "loss": 0.073, "step": 41340 }, { - "epoch": 3.0706965691370858, - "grad_norm": 2.274426221847534, - "learning_rate": 1.1575820585177484e-05, - "loss": 0.0624, + "epoch": 6.1413931382741715, + "grad_norm": 1.3868801593780518, + "learning_rate": 3.8586068617258285e-06, + "loss": 0.0564, "step": 41350 }, { - "epoch": 3.0714391801574337, - "grad_norm": 2.2110979557037354, - "learning_rate": 1.1571364919055399e-05, - "loss": 0.0696, + "epoch": 6.142878360314867, + "grad_norm": 0.8457816243171692, + "learning_rate": 3.857121639685133e-06, + "loss": 0.04, "step": 41360 }, { - "epoch": 3.072181791177781, - "grad_norm": 0.7689408659934998, - "learning_rate": 1.1566909252933314e-05, - "loss": 0.0498, + "epoch": 6.144363582355562, + "grad_norm": 0.671506404876709, + "learning_rate": 3.855636417644438e-06, + "loss": 0.0342, "step": 41370 }, { - "epoch": 3.0729244021981286, - "grad_norm": 1.7183451652526855, - "learning_rate": 1.1562453586811229e-05, - "loss": 0.0641, + "epoch": 6.145848804396257, + "grad_norm": 0.5517304539680481, + "learning_rate": 3.8541511956037435e-06, + "loss": 0.0502, "step": 41380 }, { - "epoch": 3.073667013218476, - "grad_norm": 0.8684793710708618, - "learning_rate": 1.1557997920689144e-05, - "loss": 0.0747, + "epoch": 6.147334026436952, + "grad_norm": 0.5606401562690735, + "learning_rate": 3.852665973563048e-06, + "loss": 0.0605, "step": 41390 }, { - "epoch": 3.074409624238824, - "grad_norm": 1.1358157396316528, - "learning_rate": 1.1553542254567059e-05, - "loss": 0.0625, + "epoch": 6.148819248477648, + "grad_norm": 1.4839414358139038, + "learning_rate": 3.851180751522353e-06, + "loss": 0.0797, "step": 41400 }, { - "epoch": 3.0751522352591714, - "grad_norm": 1.4486446380615234, - "learning_rate": 1.1549086588444974e-05, - "loss": 0.0625, + "epoch": 6.150304470518343, + "grad_norm": 1.2912324666976929, + "learning_rate": 3.849695529481658e-06, + "loss": 0.0874, "step": 41410 }, { - "epoch": 3.075894846279519, - "grad_norm": 0.8137945532798767, - "learning_rate": 1.1544630922322887e-05, - "loss": 0.0589, + "epoch": 6.151789692559038, + "grad_norm": 1.0865097045898438, + "learning_rate": 3.848210307440963e-06, + "loss": 0.0436, "step": 41420 }, { - "epoch": 3.0766374572998663, - "grad_norm": 2.614501476287842, - "learning_rate": 1.1540175256200802e-05, - "loss": 0.0617, + "epoch": 6.1532749145997325, + "grad_norm": 0.8545838594436646, + "learning_rate": 3.846725085400267e-06, + "loss": 0.0463, "step": 41430 }, { - "epoch": 3.0773800683202137, - "grad_norm": 1.8231887817382812, - "learning_rate": 1.1535719590078717e-05, - "loss": 0.0626, + "epoch": 6.154760136640427, + "grad_norm": 0.6500738859176636, + "learning_rate": 3.845239863359573e-06, + "loss": 0.0564, "step": 41440 }, { - "epoch": 3.0781226793405616, - "grad_norm": 1.6901496648788452, - "learning_rate": 1.1531263923956632e-05, - "loss": 0.0533, + "epoch": 6.156245358681123, + "grad_norm": 1.0389630794525146, + "learning_rate": 3.843754641318878e-06, + "loss": 0.0582, "step": 41450 }, { - "epoch": 3.078865290360909, - "grad_norm": 2.780428171157837, - "learning_rate": 1.1526808257834547e-05, - "loss": 0.0599, + "epoch": 6.157730580721818, + "grad_norm": 1.0748189687728882, + "learning_rate": 3.842269419278182e-06, + "loss": 0.0662, "step": 41460 }, { - "epoch": 3.0796079013812565, - "grad_norm": 0.7014702558517456, - "learning_rate": 1.152235259171246e-05, - "loss": 0.0325, + "epoch": 6.159215802762513, + "grad_norm": 1.0013229846954346, + "learning_rate": 3.840784197237487e-06, + "loss": 0.0622, "step": 41470 }, { - "epoch": 3.080350512401604, - "grad_norm": 1.5613539218902588, - "learning_rate": 1.1517896925590377e-05, - "loss": 0.0723, + "epoch": 6.160701024803208, + "grad_norm": 1.26486074924469, + "learning_rate": 3.839298975196792e-06, + "loss": 0.0456, "step": 41480 }, { - "epoch": 3.0810931234219514, - "grad_norm": 0.42727401852607727, - "learning_rate": 1.151344125946829e-05, - "loss": 0.0387, + "epoch": 6.162186246843903, + "grad_norm": 0.8088564276695251, + "learning_rate": 3.837813753156097e-06, + "loss": 0.0574, "step": 41490 }, { - "epoch": 3.0818357344422993, - "grad_norm": 0.7476786971092224, - "learning_rate": 1.1508985593346205e-05, - "loss": 0.0964, + "epoch": 6.163671468884599, + "grad_norm": 0.7139611840248108, + "learning_rate": 3.836328531115402e-06, + "loss": 0.074, "step": 41500 }, { - "epoch": 3.0825783454626468, - "grad_norm": 0.780316948890686, - "learning_rate": 1.1504529927224122e-05, - "loss": 0.0734, + "epoch": 6.1651566909252935, + "grad_norm": 0.7177110910415649, + "learning_rate": 3.834843309074707e-06, + "loss": 0.0488, "step": 41510 }, { - "epoch": 3.083320956482994, - "grad_norm": 1.4033887386322021, - "learning_rate": 1.1500074261102035e-05, - "loss": 0.068, + "epoch": 6.166641912965988, + "grad_norm": 1.611185908317566, + "learning_rate": 3.833358087034012e-06, + "loss": 0.0572, "step": 41520 }, { - "epoch": 3.0840635675033417, - "grad_norm": 2.3569159507751465, - "learning_rate": 1.149561859497995e-05, - "loss": 0.0773, + "epoch": 6.168127135006683, + "grad_norm": 0.7332130074501038, + "learning_rate": 3.831872864993317e-06, + "loss": 0.0672, "step": 41530 }, { - "epoch": 3.084806178523689, - "grad_norm": 2.889099359512329, - "learning_rate": 1.1491162928857864e-05, - "loss": 0.0708, + "epoch": 6.169612357047378, + "grad_norm": 1.156199336051941, + "learning_rate": 3.830387642952621e-06, + "loss": 0.0387, "step": 41540 }, { - "epoch": 3.085548789544037, - "grad_norm": 0.9511964321136475, - "learning_rate": 1.148670726273578e-05, - "loss": 0.0688, + "epoch": 6.171097579088074, + "grad_norm": 0.6302946209907532, + "learning_rate": 3.828902420911927e-06, + "loss": 0.0488, "step": 41550 }, { - "epoch": 3.0862914005643844, - "grad_norm": 0.7817783951759338, - "learning_rate": 1.1482251596613693e-05, - "loss": 0.0624, + "epoch": 6.172582801128769, + "grad_norm": 0.9586188793182373, + "learning_rate": 3.827417198871232e-06, + "loss": 0.0664, "step": 41560 }, { - "epoch": 3.087034011584732, - "grad_norm": 1.0421441793441772, - "learning_rate": 1.1477795930491608e-05, - "loss": 0.0506, + "epoch": 6.174068023169464, + "grad_norm": 0.8568915128707886, + "learning_rate": 3.825931976830536e-06, + "loss": 0.0438, "step": 41570 }, { - "epoch": 3.0877766226050793, - "grad_norm": 1.8338831663131714, - "learning_rate": 1.1473340264369523e-05, - "loss": 0.0341, + "epoch": 6.175553245210159, + "grad_norm": 0.9365776181221008, + "learning_rate": 3.8244467547898414e-06, + "loss": 0.051, "step": 41580 }, { - "epoch": 3.088519233625427, - "grad_norm": 0.8225168585777283, - "learning_rate": 1.1468884598247438e-05, - "loss": 0.0541, + "epoch": 6.177038467250854, + "grad_norm": 0.6824678778648376, + "learning_rate": 3.822961532749147e-06, + "loss": 0.0568, "step": 41590 }, { - "epoch": 3.0892618446457747, - "grad_norm": 0.7224980592727661, - "learning_rate": 1.1464428932125353e-05, - "loss": 0.0588, + "epoch": 6.178523689291549, + "grad_norm": 0.9566531777381897, + "learning_rate": 3.821476310708451e-06, + "loss": 0.0667, "step": 41600 }, { - "epoch": 3.090004455666122, - "grad_norm": 1.8559728860855103, - "learning_rate": 1.1459973266003267e-05, - "loss": 0.0867, + "epoch": 6.180008911332244, + "grad_norm": 1.0182305574417114, + "learning_rate": 3.8199910886677564e-06, + "loss": 0.0707, "step": 41610 }, { - "epoch": 3.0907470666864696, - "grad_norm": 2.965693712234497, - "learning_rate": 1.1455517599881182e-05, - "loss": 0.0735, + "epoch": 6.181494133372939, + "grad_norm": 1.1408592462539673, + "learning_rate": 3.818505866627061e-06, + "loss": 0.074, "step": 41620 }, { - "epoch": 3.091489677706817, - "grad_norm": 1.150742530822754, - "learning_rate": 1.1451061933759098e-05, - "loss": 0.0847, + "epoch": 6.182979355413634, + "grad_norm": 1.2050893306732178, + "learning_rate": 3.817020644586366e-06, + "loss": 0.0684, "step": 41630 }, { - "epoch": 3.092232288727165, - "grad_norm": 2.492800712585449, - "learning_rate": 1.1446606267637012e-05, - "loss": 0.0813, + "epoch": 6.18446457745433, + "grad_norm": 0.6228426098823547, + "learning_rate": 3.815535422545671e-06, + "loss": 0.0509, "step": 41640 }, { - "epoch": 3.0929748997475124, - "grad_norm": 1.1938602924346924, - "learning_rate": 1.1442150601514927e-05, - "loss": 0.0843, + "epoch": 6.185949799495025, + "grad_norm": 0.21518337726593018, + "learning_rate": 3.8140502005049754e-06, + "loss": 0.0554, "step": 41650 }, { - "epoch": 3.09371751076786, - "grad_norm": 1.2840536832809448, - "learning_rate": 1.1437694935392842e-05, - "loss": 0.0537, + "epoch": 6.18743502153572, + "grad_norm": 0.8894339799880981, + "learning_rate": 3.812564978464281e-06, + "loss": 0.0525, "step": 41660 }, { - "epoch": 3.0944601217882073, - "grad_norm": 0.45311644673347473, - "learning_rate": 1.1433239269270757e-05, - "loss": 0.0419, + "epoch": 6.188920243576415, + "grad_norm": 0.8132939338684082, + "learning_rate": 3.8110797564235856e-06, + "loss": 0.0754, "step": 41670 }, { - "epoch": 3.0952027328085547, - "grad_norm": 1.7243297100067139, - "learning_rate": 1.142878360314867e-05, - "loss": 0.0568, + "epoch": 6.1904054656171095, + "grad_norm": 1.0716118812561035, + "learning_rate": 3.8095945343828904e-06, + "loss": 0.0672, "step": 41680 }, { - "epoch": 3.0959453438289026, - "grad_norm": 1.793365240097046, - "learning_rate": 1.1424327937026585e-05, - "loss": 0.0749, + "epoch": 6.191890687657805, + "grad_norm": 1.131106972694397, + "learning_rate": 3.8081093123421953e-06, + "loss": 0.0582, "step": 41690 }, { - "epoch": 3.09668795484925, - "grad_norm": 3.0934557914733887, - "learning_rate": 1.1419872270904502e-05, - "loss": 0.0775, + "epoch": 6.1933759096985, + "grad_norm": 1.8588736057281494, + "learning_rate": 3.8066240903015006e-06, + "loss": 0.0522, "step": 41700 }, { - "epoch": 3.0974305658695975, - "grad_norm": 0.9718724489212036, - "learning_rate": 1.1415416604782415e-05, - "loss": 0.0614, + "epoch": 6.194861131739195, + "grad_norm": 0.807196319103241, + "learning_rate": 3.8051388682608054e-06, + "loss": 0.0569, "step": 41710 }, { - "epoch": 3.098173176889945, - "grad_norm": 1.1722973585128784, - "learning_rate": 1.141096093866033e-05, - "loss": 0.0926, + "epoch": 6.19634635377989, + "grad_norm": 0.8074870109558105, + "learning_rate": 3.8036536462201103e-06, + "loss": 0.0626, "step": 41720 }, { - "epoch": 3.0989157879102924, - "grad_norm": 2.2873425483703613, - "learning_rate": 1.1406505272538243e-05, - "loss": 0.0894, + "epoch": 6.197831575820585, + "grad_norm": 0.7318360805511475, + "learning_rate": 3.8021684241794147e-06, + "loss": 0.047, "step": 41730 }, { - "epoch": 3.0996583989306403, - "grad_norm": 0.9070118069648743, - "learning_rate": 1.140204960641616e-05, - "loss": 0.0558, + "epoch": 6.199316797861281, + "grad_norm": 0.2228417545557022, + "learning_rate": 3.8006832021387204e-06, + "loss": 0.0573, "step": 41740 }, { - "epoch": 3.1004010099509878, - "grad_norm": 2.142220973968506, - "learning_rate": 1.1397593940294075e-05, - "loss": 0.0754, + "epoch": 6.2008020199019755, + "grad_norm": 0.7176522016525269, + "learning_rate": 3.799197980098025e-06, + "loss": 0.0484, "step": 41750 }, { - "epoch": 3.1011436209713352, - "grad_norm": 0.7095875144004822, - "learning_rate": 1.1393138274171988e-05, - "loss": 0.0474, + "epoch": 6.2022872419426704, + "grad_norm": 1.6052658557891846, + "learning_rate": 3.7977127580573297e-06, + "loss": 0.0629, "step": 41760 }, { - "epoch": 3.1018862319916827, - "grad_norm": 0.9688817262649536, - "learning_rate": 1.1388682608049905e-05, - "loss": 0.0508, + "epoch": 6.203772463983365, + "grad_norm": 0.9158141016960144, + "learning_rate": 3.796227536016635e-06, + "loss": 0.0677, "step": 41770 }, { - "epoch": 3.10262884301203, - "grad_norm": 1.3128563165664673, - "learning_rate": 1.1384226941927818e-05, - "loss": 0.0785, + "epoch": 6.20525768602406, + "grad_norm": 0.7459770441055298, + "learning_rate": 3.79474231397594e-06, + "loss": 0.0731, "step": 41780 }, { - "epoch": 3.103371454032378, - "grad_norm": 1.9782556295394897, - "learning_rate": 1.1379771275805733e-05, - "loss": 0.0642, + "epoch": 6.206742908064756, + "grad_norm": 1.1536628007888794, + "learning_rate": 3.7932570919352447e-06, + "loss": 0.0617, "step": 41790 }, { - "epoch": 3.1041140650527255, - "grad_norm": 2.7178022861480713, - "learning_rate": 1.1375315609683648e-05, - "loss": 0.0747, + "epoch": 6.208228130105451, + "grad_norm": 0.9005663394927979, + "learning_rate": 3.7917718698945496e-06, + "loss": 0.0525, "step": 41800 }, { - "epoch": 3.104856676073073, - "grad_norm": 0.8021518588066101, - "learning_rate": 1.1370859943561563e-05, - "loss": 0.0784, + "epoch": 6.209713352146146, + "grad_norm": 0.64149409532547, + "learning_rate": 3.790286647853855e-06, + "loss": 0.0592, "step": 41810 }, { - "epoch": 3.1055992870934204, - "grad_norm": 1.1395660638809204, - "learning_rate": 1.1366404277439478e-05, - "loss": 0.0693, + "epoch": 6.211198574186841, + "grad_norm": 0.7573428153991699, + "learning_rate": 3.7888014258131593e-06, + "loss": 0.0608, "step": 41820 }, { - "epoch": 3.106341898113768, - "grad_norm": 0.3719038963317871, - "learning_rate": 1.1361948611317392e-05, - "loss": 0.0576, + "epoch": 6.212683796227536, + "grad_norm": 0.9015231728553772, + "learning_rate": 3.787316203772464e-06, + "loss": 0.0783, "step": 41830 }, { - "epoch": 3.1070845091341157, - "grad_norm": 3.742137908935547, - "learning_rate": 1.1357492945195307e-05, - "loss": 0.0891, + "epoch": 6.214169018268231, + "grad_norm": 0.5844115018844604, + "learning_rate": 3.785830981731769e-06, + "loss": 0.0541, "step": 41840 }, { - "epoch": 3.107827120154463, - "grad_norm": 1.4700413942337036, - "learning_rate": 1.1353037279073222e-05, - "loss": 0.0648, + "epoch": 6.215654240308926, + "grad_norm": 0.7862566709518433, + "learning_rate": 3.7843457596910743e-06, + "loss": 0.0405, "step": 41850 }, { - "epoch": 3.1085697311748106, - "grad_norm": 1.799091100692749, - "learning_rate": 1.1348581612951137e-05, - "loss": 0.0811, + "epoch": 6.217139462349621, + "grad_norm": 0.7765684723854065, + "learning_rate": 3.782860537650379e-06, + "loss": 0.0626, "step": 41860 }, { - "epoch": 3.109312342195158, - "grad_norm": 0.4236965477466583, - "learning_rate": 1.1344125946829052e-05, - "loss": 0.0569, + "epoch": 6.218624684390316, + "grad_norm": 1.4600820541381836, + "learning_rate": 3.781375315609684e-06, + "loss": 0.0644, "step": 41870 }, { - "epoch": 3.1100549532155055, - "grad_norm": 0.9053602814674377, - "learning_rate": 1.1339670280706965e-05, - "loss": 0.0476, + "epoch": 6.220109906431011, + "grad_norm": 0.4247475564479828, + "learning_rate": 3.7798900935689884e-06, + "loss": 0.0493, "step": 41880 }, { - "epoch": 3.1107975642358534, - "grad_norm": 1.6792991161346436, - "learning_rate": 1.1335214614584882e-05, - "loss": 0.0675, + "epoch": 6.221595128471707, + "grad_norm": 1.0825570821762085, + "learning_rate": 3.778404871528294e-06, + "loss": 0.0753, "step": 41890 }, { - "epoch": 3.111540175256201, - "grad_norm": 0.48913243412971497, - "learning_rate": 1.1330758948462795e-05, - "loss": 0.0632, + "epoch": 6.223080350512402, + "grad_norm": 0.660193681716919, + "learning_rate": 3.7769196494875986e-06, + "loss": 0.0658, "step": 41900 }, { - "epoch": 3.1122827862765483, - "grad_norm": 2.1380555629730225, - "learning_rate": 1.132630328234071e-05, - "loss": 0.0699, + "epoch": 6.224565572553097, + "grad_norm": 0.6365493535995483, + "learning_rate": 3.7754344274469034e-06, + "loss": 0.0538, "step": 41910 }, { - "epoch": 3.1130253972968958, - "grad_norm": 3.3821558952331543, - "learning_rate": 1.1321847616218627e-05, - "loss": 0.0529, + "epoch": 6.2260507945937915, + "grad_norm": 0.7567954659461975, + "learning_rate": 3.7739492054062087e-06, + "loss": 0.0612, "step": 41920 }, { - "epoch": 3.113768008317243, - "grad_norm": 1.7938247919082642, - "learning_rate": 1.131739195009654e-05, - "loss": 0.0707, + "epoch": 6.227536016634486, + "grad_norm": 0.9321280717849731, + "learning_rate": 3.7724639833655136e-06, + "loss": 0.0716, "step": 41930 }, { - "epoch": 3.114510619337591, - "grad_norm": 1.2361664772033691, - "learning_rate": 1.1312936283974455e-05, - "loss": 0.0633, + "epoch": 6.229021238675182, + "grad_norm": 1.1202092170715332, + "learning_rate": 3.7709787613248184e-06, + "loss": 0.0496, "step": 41940 }, { - "epoch": 3.1152532303579386, - "grad_norm": 1.2739269733428955, - "learning_rate": 1.1308480617852368e-05, - "loss": 0.0466, + "epoch": 6.230506460715877, + "grad_norm": 0.5768749713897705, + "learning_rate": 3.7694935392841233e-06, + "loss": 0.0493, "step": 41950 }, { - "epoch": 3.115995841378286, - "grad_norm": 2.4915049076080322, - "learning_rate": 1.1304024951730285e-05, - "loss": 0.0761, + "epoch": 6.231991682756572, + "grad_norm": 0.6443141102790833, + "learning_rate": 3.7680083172434285e-06, + "loss": 0.0634, "step": 41960 }, { - "epoch": 3.1167384523986335, - "grad_norm": 1.7226744890213013, - "learning_rate": 1.1299569285608198e-05, - "loss": 0.0604, + "epoch": 6.233476904797267, + "grad_norm": 0.8601978421211243, + "learning_rate": 3.766523095202733e-06, + "loss": 0.062, "step": 41970 }, { - "epoch": 3.1174810634189813, - "grad_norm": 1.8600285053253174, - "learning_rate": 1.1295113619486113e-05, - "loss": 0.0596, + "epoch": 6.234962126837963, + "grad_norm": 0.9269450902938843, + "learning_rate": 3.765037873162038e-06, + "loss": 0.0753, "step": 41980 }, { - "epoch": 3.118223674439329, - "grad_norm": 2.710045099258423, - "learning_rate": 1.1290657953364028e-05, - "loss": 0.0905, + "epoch": 6.236447348878658, + "grad_norm": 0.7924551367759705, + "learning_rate": 3.7635526511213427e-06, + "loss": 0.0643, "step": 41990 }, { - "epoch": 3.1189662854596762, - "grad_norm": 0.9297268390655518, - "learning_rate": 1.1286202287241943e-05, - "loss": 0.0513, + "epoch": 6.2379325709193525, + "grad_norm": 0.8593719005584717, + "learning_rate": 3.762067429080648e-06, + "loss": 0.0726, "step": 42000 }, { - "epoch": 3.1197088964800237, - "grad_norm": 2.148306131362915, - "learning_rate": 1.1281746621119858e-05, - "loss": 0.0729, + "epoch": 6.239417792960047, + "grad_norm": 0.5367026329040527, + "learning_rate": 3.760582207039953e-06, + "loss": 0.0598, "step": 42010 }, { - "epoch": 3.120451507500371, - "grad_norm": 1.510986328125, - "learning_rate": 1.1277290954997771e-05, - "loss": 0.082, + "epoch": 6.240903015000742, + "grad_norm": 0.9355032444000244, + "learning_rate": 3.7590969849992577e-06, + "loss": 0.0626, "step": 42020 }, { - "epoch": 3.121194118520719, - "grad_norm": 2.6012394428253174, - "learning_rate": 1.1272835288875686e-05, - "loss": 0.0769, + "epoch": 6.242388237041438, + "grad_norm": 1.0359206199645996, + "learning_rate": 3.7576117629585625e-06, + "loss": 0.0491, "step": 42030 }, { - "epoch": 3.1219367295410665, - "grad_norm": 2.248591184616089, - "learning_rate": 1.1268379622753603e-05, - "loss": 0.0889, + "epoch": 6.243873459082133, + "grad_norm": 0.44826602935791016, + "learning_rate": 3.756126540917868e-06, + "loss": 0.0636, "step": 42040 }, { - "epoch": 3.122679340561414, - "grad_norm": 1.4806914329528809, - "learning_rate": 1.1263923956631516e-05, - "loss": 0.0694, + "epoch": 6.245358681122828, + "grad_norm": 0.7758882641792297, + "learning_rate": 3.7546413188771723e-06, + "loss": 0.0519, "step": 42050 }, { - "epoch": 3.1234219515817614, - "grad_norm": 2.7519147396087646, - "learning_rate": 1.1259468290509431e-05, - "loss": 0.0503, + "epoch": 6.246843903163523, + "grad_norm": 1.433592677116394, + "learning_rate": 3.753156096836477e-06, + "loss": 0.0608, "step": 42060 }, { - "epoch": 3.124164562602109, - "grad_norm": 2.996624708175659, - "learning_rate": 1.1255012624387346e-05, - "loss": 0.0769, + "epoch": 6.248329125204218, + "grad_norm": 0.5968450307846069, + "learning_rate": 3.7516708747957824e-06, + "loss": 0.054, "step": 42070 }, { - "epoch": 3.1249071736224567, - "grad_norm": 0.765396237373352, - "learning_rate": 1.1250556958265261e-05, - "loss": 0.0529, + "epoch": 6.2498143472449135, + "grad_norm": 0.689393162727356, + "learning_rate": 3.7501856527550873e-06, + "loss": 0.0664, "step": 42080 }, { - "epoch": 3.125649784642804, - "grad_norm": 0.4686828553676605, - "learning_rate": 1.1246101292143176e-05, - "loss": 0.0445, + "epoch": 6.251299569285608, + "grad_norm": 0.7492974400520325, + "learning_rate": 3.748700430714392e-06, + "loss": 0.0659, "step": 42090 }, { - "epoch": 3.1263923956631516, - "grad_norm": 0.8828471899032593, - "learning_rate": 1.124164562602109e-05, - "loss": 0.0721, + "epoch": 6.252784791326303, + "grad_norm": 0.3340299129486084, + "learning_rate": 3.747215208673697e-06, + "loss": 0.0448, "step": 42100 }, { - "epoch": 3.127135006683499, - "grad_norm": 3.2565178871154785, - "learning_rate": 1.1237189959899006e-05, - "loss": 0.0749, + "epoch": 6.254270013366998, + "grad_norm": 1.1328703165054321, + "learning_rate": 3.7457299866330022e-06, + "loss": 0.0377, "step": 42110 }, { - "epoch": 3.1278776177038465, - "grad_norm": 1.6550853252410889, - "learning_rate": 1.123273429377692e-05, - "loss": 0.0714, + "epoch": 6.255755235407693, + "grad_norm": 0.6826351881027222, + "learning_rate": 3.744244764592307e-06, + "loss": 0.0641, "step": 42120 }, { - "epoch": 3.1286202287241944, - "grad_norm": 0.7988404631614685, - "learning_rate": 1.1228278627654835e-05, - "loss": 0.0889, + "epoch": 6.257240457448389, + "grad_norm": 0.5043858885765076, + "learning_rate": 3.7427595425516115e-06, + "loss": 0.0525, "step": 42130 }, { - "epoch": 3.129362839744542, - "grad_norm": 0.8032083511352539, - "learning_rate": 1.1223822961532748e-05, - "loss": 0.0761, + "epoch": 6.258725679489084, + "grad_norm": 0.7694330811500549, + "learning_rate": 3.7412743205109164e-06, + "loss": 0.0574, "step": 42140 }, { - "epoch": 3.1301054507648893, - "grad_norm": 1.479958176612854, - "learning_rate": 1.1219367295410665e-05, - "loss": 0.0505, + "epoch": 6.260210901529779, + "grad_norm": 0.6579335331916809, + "learning_rate": 3.7397890984702217e-06, + "loss": 0.0419, "step": 42150 }, { - "epoch": 3.130848061785237, - "grad_norm": 1.5908888578414917, - "learning_rate": 1.121491162928858e-05, - "loss": 0.0723, + "epoch": 6.261696123570474, + "grad_norm": 0.3473973572254181, + "learning_rate": 3.7383038764295265e-06, + "loss": 0.0327, "step": 42160 }, { - "epoch": 3.1315906728055842, - "grad_norm": 0.9829261302947998, - "learning_rate": 1.1210455963166493e-05, - "loss": 0.0565, + "epoch": 6.2631813456111685, + "grad_norm": 0.5329101085662842, + "learning_rate": 3.7368186543888314e-06, + "loss": 0.0743, "step": 42170 }, { - "epoch": 3.132333283825932, - "grad_norm": 0.9932742118835449, - "learning_rate": 1.120600029704441e-05, - "loss": 0.0773, + "epoch": 6.264666567651864, + "grad_norm": 1.2124110460281372, + "learning_rate": 3.7353334323481367e-06, + "loss": 0.0701, "step": 42180 }, { - "epoch": 3.1330758948462796, - "grad_norm": 1.5291361808776855, - "learning_rate": 1.1201544630922323e-05, - "loss": 0.0945, + "epoch": 6.266151789692559, + "grad_norm": 1.588112711906433, + "learning_rate": 3.7338482103074415e-06, + "loss": 0.0664, "step": 42190 }, { - "epoch": 3.133818505866627, - "grad_norm": 0.7759218215942383, - "learning_rate": 1.1197088964800238e-05, - "loss": 0.0567, + "epoch": 6.267637011733254, + "grad_norm": 0.6495673656463623, + "learning_rate": 3.732362988266746e-06, + "loss": 0.0528, "step": 42200 }, { - "epoch": 3.1345611168869745, - "grad_norm": 1.5172669887542725, - "learning_rate": 1.1192633298678153e-05, - "loss": 0.041, + "epoch": 6.269122233773949, + "grad_norm": 0.4462796747684479, + "learning_rate": 3.730877766226051e-06, + "loss": 0.0413, "step": 42210 }, { - "epoch": 3.1353037279073224, - "grad_norm": 2.5315630435943604, - "learning_rate": 1.1188177632556068e-05, - "loss": 0.0596, + "epoch": 6.270607455814645, + "grad_norm": 0.7272733449935913, + "learning_rate": 3.729392544185356e-06, + "loss": 0.0767, "step": 42220 }, { - "epoch": 3.13604633892767, - "grad_norm": 1.723301649093628, - "learning_rate": 1.1183721966433983e-05, - "loss": 0.0474, + "epoch": 6.27209267785534, + "grad_norm": 1.4103509187698364, + "learning_rate": 3.727907322144661e-06, + "loss": 0.0541, "step": 42230 }, { - "epoch": 3.1367889499480173, - "grad_norm": 1.322437047958374, - "learning_rate": 1.1179266300311896e-05, - "loss": 0.0513, + "epoch": 6.2735778998960345, + "grad_norm": 0.6002963781356812, + "learning_rate": 3.726422100103966e-06, + "loss": 0.0602, "step": 42240 }, { - "epoch": 3.1375315609683647, - "grad_norm": 1.4333685636520386, - "learning_rate": 1.1174810634189811e-05, - "loss": 0.0602, + "epoch": 6.275063121936729, + "grad_norm": 0.6196624636650085, + "learning_rate": 3.7249368780632707e-06, + "loss": 0.0539, "step": 42250 }, { - "epoch": 3.138274171988712, - "grad_norm": 1.3001521825790405, - "learning_rate": 1.1170354968067726e-05, - "loss": 0.0504, + "epoch": 6.276548343977424, + "grad_norm": 0.47498103976249695, + "learning_rate": 3.723451656022576e-06, + "loss": 0.0648, "step": 42260 }, { - "epoch": 3.13901678300906, - "grad_norm": 0.24273203313350677, - "learning_rate": 1.1165899301945641e-05, - "loss": 0.033, + "epoch": 6.27803356601812, + "grad_norm": 0.428314208984375, + "learning_rate": 3.721966433981881e-06, + "loss": 0.0481, "step": 42270 }, { - "epoch": 3.1397593940294075, - "grad_norm": 2.059208393096924, - "learning_rate": 1.1161443635823556e-05, - "loss": 0.0636, + "epoch": 6.279518788058815, + "grad_norm": 0.5692898631095886, + "learning_rate": 3.7204812119411852e-06, + "loss": 0.0421, "step": 42280 }, { - "epoch": 3.140502005049755, - "grad_norm": 1.3567062616348267, - "learning_rate": 1.115698796970147e-05, - "loss": 0.0693, + "epoch": 6.28100401009951, + "grad_norm": 2.004471778869629, + "learning_rate": 3.71899598990049e-06, + "loss": 0.0619, "step": 42290 }, { - "epoch": 3.1412446160701024, - "grad_norm": 0.29561829566955566, - "learning_rate": 1.1152532303579386e-05, - "loss": 0.0617, + "epoch": 6.282489232140205, + "grad_norm": 0.504784882068634, + "learning_rate": 3.7175107678597954e-06, + "loss": 0.0467, "step": 42300 }, { - "epoch": 3.14198722709045, - "grad_norm": 1.4350517988204956, - "learning_rate": 1.11480766374573e-05, - "loss": 0.0671, + "epoch": 6.2839744541809, + "grad_norm": 0.5373015403747559, + "learning_rate": 3.7160255458191002e-06, + "loss": 0.0622, "step": 42310 }, { - "epoch": 3.1427298381107978, - "grad_norm": 1.5640891790390015, - "learning_rate": 1.1143620971335214e-05, - "loss": 0.069, + "epoch": 6.2854596762215955, + "grad_norm": 0.6683239340782166, + "learning_rate": 3.714540323778405e-06, + "loss": 0.0548, "step": 42320 }, { - "epoch": 3.143472449131145, - "grad_norm": 1.763852596282959, - "learning_rate": 1.1139165305213131e-05, - "loss": 0.0703, + "epoch": 6.28694489826229, + "grad_norm": 0.7173686623573303, + "learning_rate": 3.7130551017377104e-06, + "loss": 0.0595, "step": 42330 }, { - "epoch": 3.1442150601514927, - "grad_norm": 2.7091476917266846, - "learning_rate": 1.1134709639091044e-05, - "loss": 0.0564, + "epoch": 6.288430120302985, + "grad_norm": 0.7080352902412415, + "learning_rate": 3.7115698796970152e-06, + "loss": 0.0579, "step": 42340 }, { - "epoch": 3.14495767117184, - "grad_norm": 1.8148163557052612, - "learning_rate": 1.113025397296896e-05, - "loss": 0.066, + "epoch": 6.28991534234368, + "grad_norm": 0.9369353652000427, + "learning_rate": 3.7100846576563197e-06, + "loss": 0.0715, "step": 42350 }, { - "epoch": 3.1457002821921876, - "grad_norm": 1.7284859418869019, - "learning_rate": 1.1125798306846873e-05, - "loss": 0.0552, + "epoch": 6.291400564384375, + "grad_norm": 0.34376099705696106, + "learning_rate": 3.7085994356156245e-06, + "loss": 0.0415, "step": 42360 }, { - "epoch": 3.1464428932125355, - "grad_norm": 0.8898233771324158, - "learning_rate": 1.112134264072479e-05, - "loss": 0.0448, + "epoch": 6.292885786425071, + "grad_norm": 0.711609423160553, + "learning_rate": 3.70711421357493e-06, + "loss": 0.0469, "step": 42370 }, { - "epoch": 3.147185504232883, - "grad_norm": 0.5898759365081787, - "learning_rate": 1.1116886974602703e-05, - "loss": 0.0547, + "epoch": 6.294371008465766, + "grad_norm": 1.4193964004516602, + "learning_rate": 3.7056289915342347e-06, + "loss": 0.065, "step": 42380 }, { - "epoch": 3.1479281152532304, - "grad_norm": 0.21336278319358826, - "learning_rate": 1.1112431308480618e-05, - "loss": 0.0413, + "epoch": 6.295856230506461, + "grad_norm": 0.6497589349746704, + "learning_rate": 3.7041437694935395e-06, + "loss": 0.053, "step": 42390 }, { - "epoch": 3.148670726273578, - "grad_norm": 1.7114509344100952, - "learning_rate": 1.1107975642358533e-05, - "loss": 0.099, + "epoch": 6.297341452547156, + "grad_norm": 1.2740793228149414, + "learning_rate": 3.7026585474528444e-06, + "loss": 0.0514, "step": 42400 }, { - "epoch": 3.1494133372939253, - "grad_norm": 0.577693521976471, - "learning_rate": 1.1103519976236448e-05, - "loss": 0.0631, + "epoch": 6.2988266745878505, + "grad_norm": 0.4431326985359192, + "learning_rate": 3.7011733254121496e-06, + "loss": 0.0641, "step": 42410 }, { - "epoch": 3.150155948314273, - "grad_norm": 0.7681446075439453, - "learning_rate": 1.1099064310114363e-05, - "loss": 0.0605, + "epoch": 6.300311896628546, + "grad_norm": 0.61473149061203, + "learning_rate": 3.6996881033714545e-06, + "loss": 0.0434, "step": 42420 }, { - "epoch": 3.1508985593346206, - "grad_norm": 0.7933735251426697, - "learning_rate": 1.1094608643992276e-05, - "loss": 0.0827, + "epoch": 6.301797118669241, + "grad_norm": 0.9502345323562622, + "learning_rate": 3.698202881330759e-06, + "loss": 0.0632, "step": 42430 }, { - "epoch": 3.151641170354968, - "grad_norm": 0.5828210115432739, - "learning_rate": 1.1090152977870193e-05, - "loss": 0.0485, + "epoch": 6.303282340709936, + "grad_norm": 0.5800604820251465, + "learning_rate": 3.6967176592900646e-06, + "loss": 0.0564, "step": 42440 }, { - "epoch": 3.1523837813753155, - "grad_norm": 0.2343250811100006, - "learning_rate": 1.1085697311748108e-05, - "loss": 0.0362, + "epoch": 6.304767562750631, + "grad_norm": 0.6123915910720825, + "learning_rate": 3.695232437249369e-06, + "loss": 0.0532, "step": 42450 }, { - "epoch": 3.153126392395663, - "grad_norm": 2.2434585094451904, - "learning_rate": 1.1081241645626021e-05, - "loss": 0.0569, + "epoch": 6.306252784791326, + "grad_norm": 1.1906459331512451, + "learning_rate": 3.693747215208674e-06, + "loss": 0.062, "step": 42460 }, { - "epoch": 3.153869003416011, - "grad_norm": 1.9830187559127808, - "learning_rate": 1.1076785979503936e-05, - "loss": 0.0823, + "epoch": 6.307738006832022, + "grad_norm": 0.4894062876701355, + "learning_rate": 3.692261993167979e-06, + "loss": 0.0729, "step": 42470 }, { - "epoch": 3.1546116144363583, - "grad_norm": 1.5530307292938232, - "learning_rate": 1.1072330313381851e-05, - "loss": 0.0859, + "epoch": 6.309223228872717, + "grad_norm": 0.6452047824859619, + "learning_rate": 3.690776771127284e-06, + "loss": 0.0458, "step": 42480 }, { - "epoch": 3.1553542254567057, - "grad_norm": 1.3739688396453857, - "learning_rate": 1.1067874647259766e-05, - "loss": 0.0796, + "epoch": 6.3107084509134115, + "grad_norm": 0.8813139200210571, + "learning_rate": 3.689291549086589e-06, + "loss": 0.052, "step": 42490 }, { - "epoch": 3.156096836477053, - "grad_norm": 0.9068493247032166, - "learning_rate": 1.1063418981137681e-05, - "loss": 0.0461, + "epoch": 6.312193672954106, + "grad_norm": 0.7368887066841125, + "learning_rate": 3.6878063270458938e-06, + "loss": 0.0642, "step": 42500 }, { - "epoch": 3.1568394474974006, - "grad_norm": 1.6886134147644043, - "learning_rate": 1.1058963315015594e-05, - "loss": 0.093, + "epoch": 6.313678894994801, + "grad_norm": 0.7231339812278748, + "learning_rate": 3.6863211050051982e-06, + "loss": 0.0519, "step": 42510 }, { - "epoch": 3.1575820585177485, - "grad_norm": 2.079350233078003, - "learning_rate": 1.1054507648893511e-05, - "loss": 0.0642, + "epoch": 6.315164117035497, + "grad_norm": 0.9437358975410461, + "learning_rate": 3.6848358829645035e-06, + "loss": 0.0581, "step": 42520 }, { - "epoch": 3.158324669538096, - "grad_norm": 1.6049011945724487, - "learning_rate": 1.1050051982771424e-05, - "loss": 0.0613, + "epoch": 6.316649339076192, + "grad_norm": 0.8325836658477783, + "learning_rate": 3.6833506609238084e-06, + "loss": 0.0667, "step": 42530 }, { - "epoch": 3.1590672805584434, - "grad_norm": 0.36164718866348267, - "learning_rate": 1.104559631664934e-05, - "loss": 0.0669, + "epoch": 6.318134561116887, + "grad_norm": 1.0628379583358765, + "learning_rate": 3.681865438883113e-06, + "loss": 0.0598, "step": 42540 }, { - "epoch": 3.159809891578791, - "grad_norm": 1.008207082748413, - "learning_rate": 1.1041140650527253e-05, - "loss": 0.0988, + "epoch": 6.319619783157582, + "grad_norm": 1.2732975482940674, + "learning_rate": 3.680380216842418e-06, + "loss": 0.067, "step": 42550 }, { - "epoch": 3.1605525025991383, - "grad_norm": 2.440133571624756, - "learning_rate": 1.103668498440517e-05, - "loss": 0.0465, + "epoch": 6.321105005198277, + "grad_norm": 0.9922568202018738, + "learning_rate": 3.6788949948017233e-06, + "loss": 0.0597, "step": 42560 }, { - "epoch": 3.1612951136194862, - "grad_norm": 0.5878588557243347, - "learning_rate": 1.1032229318283084e-05, - "loss": 0.04, + "epoch": 6.3225902272389725, + "grad_norm": 0.9544100761413574, + "learning_rate": 3.677409772761028e-06, + "loss": 0.0458, "step": 42570 }, { - "epoch": 3.1620377246398337, - "grad_norm": 1.898363709449768, - "learning_rate": 1.1027773652160997e-05, - "loss": 0.0494, + "epoch": 6.324075449279667, + "grad_norm": 1.7845814228057861, + "learning_rate": 3.6759245507203326e-06, + "loss": 0.0558, "step": 42580 }, { - "epoch": 3.162780335660181, - "grad_norm": 1.029447078704834, - "learning_rate": 1.1023317986038914e-05, - "loss": 0.0658, + "epoch": 6.325560671320362, + "grad_norm": 0.8888192772865295, + "learning_rate": 3.6744393286796383e-06, + "loss": 0.0806, "step": 42590 }, { - "epoch": 3.1635229466805286, - "grad_norm": 1.3765453100204468, - "learning_rate": 1.1018862319916827e-05, - "loss": 0.0479, + "epoch": 6.327045893361057, + "grad_norm": 0.7353573441505432, + "learning_rate": 3.6729541066389428e-06, + "loss": 0.0476, "step": 42600 }, { - "epoch": 3.1642655577008765, - "grad_norm": 0.8187096118927002, - "learning_rate": 1.1014406653794742e-05, - "loss": 0.0677, + "epoch": 6.328531115401753, + "grad_norm": 1.0673481225967407, + "learning_rate": 3.6714688845982476e-06, + "loss": 0.0604, "step": 42610 }, { - "epoch": 3.165008168721224, - "grad_norm": 3.0219662189483643, - "learning_rate": 1.1009950987672657e-05, - "loss": 0.0544, + "epoch": 6.330016337442448, + "grad_norm": 0.9482265710830688, + "learning_rate": 3.6699836625575525e-06, + "loss": 0.051, "step": 42620 }, { - "epoch": 3.1657507797415714, - "grad_norm": 0.4135105609893799, - "learning_rate": 1.1005495321550572e-05, - "loss": 0.0379, + "epoch": 6.331501559483143, + "grad_norm": 0.5500361323356628, + "learning_rate": 3.6684984405168578e-06, + "loss": 0.0568, "step": 42630 }, { - "epoch": 3.166493390761919, - "grad_norm": 1.9586702585220337, - "learning_rate": 1.1001039655428487e-05, - "loss": 0.0644, + "epoch": 6.332986781523838, + "grad_norm": 0.8495878577232361, + "learning_rate": 3.6670132184761626e-06, + "loss": 0.0537, "step": 42640 }, { - "epoch": 3.1672360017822663, - "grad_norm": 0.3655850887298584, - "learning_rate": 1.09965839893064e-05, - "loss": 0.0825, + "epoch": 6.3344720035645325, + "grad_norm": 0.6985103487968445, + "learning_rate": 3.6655279964354675e-06, + "loss": 0.0455, "step": 42650 }, { - "epoch": 3.167978612802614, - "grad_norm": 0.6566202640533447, - "learning_rate": 1.0992128323184316e-05, - "loss": 0.0597, + "epoch": 6.335957225605228, + "grad_norm": 0.6907896995544434, + "learning_rate": 3.664042774394772e-06, + "loss": 0.0501, "step": 42660 }, { - "epoch": 3.1687212238229616, - "grad_norm": 1.0529264211654663, - "learning_rate": 1.098767265706223e-05, - "loss": 0.0569, + "epoch": 6.337442447645923, + "grad_norm": 1.1214615106582642, + "learning_rate": 3.662557552354077e-06, + "loss": 0.0546, "step": 42670 }, { - "epoch": 3.169463834843309, - "grad_norm": 1.0065066814422607, - "learning_rate": 1.0983216990940146e-05, - "loss": 0.0674, + "epoch": 6.338927669686618, + "grad_norm": 0.7439780831336975, + "learning_rate": 3.661072330313382e-06, + "loss": 0.0566, "step": 42680 }, { - "epoch": 3.1702064458636565, - "grad_norm": 3.295680046081543, - "learning_rate": 1.097876132481806e-05, - "loss": 0.0754, + "epoch": 6.340412891727313, + "grad_norm": 0.47073668241500854, + "learning_rate": 3.659587108272687e-06, + "loss": 0.0488, "step": 42690 }, { - "epoch": 3.170949056884004, - "grad_norm": 1.2959693670272827, - "learning_rate": 1.0974305658695976e-05, - "loss": 0.0668, + "epoch": 6.341898113768008, + "grad_norm": 0.8119236826896667, + "learning_rate": 3.658101886231992e-06, + "loss": 0.0616, "step": 42700 }, { - "epoch": 3.171691667904352, - "grad_norm": 1.669705867767334, - "learning_rate": 1.096984999257389e-05, - "loss": 0.0788, + "epoch": 6.343383335808704, + "grad_norm": 0.8019691109657288, + "learning_rate": 3.656616664191297e-06, + "loss": 0.0604, "step": 42710 }, { - "epoch": 3.1724342789246993, - "grad_norm": 1.6456496715545654, - "learning_rate": 1.0965394326451804e-05, - "loss": 0.0564, + "epoch": 6.344868557849399, + "grad_norm": 0.8019810914993286, + "learning_rate": 3.655131442150602e-06, + "loss": 0.0516, "step": 42720 }, { - "epoch": 3.1731768899450468, - "grad_norm": 1.2554987668991089, - "learning_rate": 1.0960938660329719e-05, - "loss": 0.0777, + "epoch": 6.3463537798900935, + "grad_norm": 0.7432687282562256, + "learning_rate": 3.6536462201099063e-06, + "loss": 0.0644, "step": 42730 }, { - "epoch": 3.173919500965394, - "grad_norm": 2.1584696769714355, - "learning_rate": 1.0956482994207636e-05, - "loss": 0.0614, + "epoch": 6.347839001930788, + "grad_norm": 0.9278331398963928, + "learning_rate": 3.652160998069212e-06, + "loss": 0.0677, "step": 42740 }, { - "epoch": 3.1746621119857417, - "grad_norm": 1.2832754850387573, - "learning_rate": 1.0952027328085549e-05, - "loss": 0.0692, + "epoch": 6.349324223971483, + "grad_norm": 0.8735617399215698, + "learning_rate": 3.6506757760285165e-06, + "loss": 0.0416, "step": 42750 }, { - "epoch": 3.1754047230060896, - "grad_norm": 0.7738613486289978, - "learning_rate": 1.0947571661963464e-05, - "loss": 0.0628, + "epoch": 6.350809446012179, + "grad_norm": 0.627546489238739, + "learning_rate": 3.6491905539878213e-06, + "loss": 0.056, "step": 42760 }, { - "epoch": 3.176147334026437, - "grad_norm": 1.1968348026275635, - "learning_rate": 1.0943115995841377e-05, - "loss": 0.0578, + "epoch": 6.352294668052874, + "grad_norm": 0.3710090219974518, + "learning_rate": 3.647705331947126e-06, + "loss": 0.0572, "step": 42770 }, { - "epoch": 3.1768899450467845, - "grad_norm": 0.8434922099113464, - "learning_rate": 1.0938660329719294e-05, - "loss": 0.0698, + "epoch": 6.353779890093569, + "grad_norm": 0.8109626770019531, + "learning_rate": 3.6462201099064315e-06, + "loss": 0.0519, "step": 42780 }, { - "epoch": 3.177632556067132, - "grad_norm": 0.869853675365448, - "learning_rate": 1.0934204663597209e-05, - "loss": 0.0595, + "epoch": 6.355265112134264, + "grad_norm": 0.7307920455932617, + "learning_rate": 3.6447348878657363e-06, + "loss": 0.0624, "step": 42790 }, { - "epoch": 3.17837516708748, - "grad_norm": 1.2317126989364624, - "learning_rate": 1.0929748997475122e-05, - "loss": 0.0731, + "epoch": 6.35675033417496, + "grad_norm": 0.569338321685791, + "learning_rate": 3.643249665825041e-06, + "loss": 0.0626, "step": 42800 }, { - "epoch": 3.1791177781078273, - "grad_norm": 2.5130200386047363, - "learning_rate": 1.0925293331353037e-05, - "loss": 0.0744, + "epoch": 6.3582355562156545, + "grad_norm": 1.0345954895019531, + "learning_rate": 3.6417644437843456e-06, + "loss": 0.0639, "step": 42810 }, { - "epoch": 3.1798603891281747, - "grad_norm": 1.538057565689087, - "learning_rate": 1.0920837665230952e-05, - "loss": 0.0544, + "epoch": 6.359720778256349, + "grad_norm": 0.6668635606765747, + "learning_rate": 3.6402792217436513e-06, + "loss": 0.0512, "step": 42820 }, { - "epoch": 3.180603000148522, - "grad_norm": 2.4751596450805664, - "learning_rate": 1.0916381999108867e-05, - "loss": 0.0568, + "epoch": 6.361206000297044, + "grad_norm": 0.7520941495895386, + "learning_rate": 3.6387939997029558e-06, + "loss": 0.063, "step": 42830 }, { - "epoch": 3.1813456111688696, - "grad_norm": 0.298840194940567, - "learning_rate": 1.091192633298678e-05, - "loss": 0.0391, + "epoch": 6.362691222337739, + "grad_norm": 0.9604344367980957, + "learning_rate": 3.6373087776622606e-06, + "loss": 0.0638, "step": 42840 }, { - "epoch": 3.1820882221892175, - "grad_norm": 1.3424323797225952, - "learning_rate": 1.0907470666864697e-05, - "loss": 0.0854, + "epoch": 6.364176444378435, + "grad_norm": 0.719851016998291, + "learning_rate": 3.635823555621566e-06, + "loss": 0.0639, "step": 42850 }, { - "epoch": 3.182830833209565, - "grad_norm": 1.4192628860473633, - "learning_rate": 1.0903015000742612e-05, - "loss": 0.0538, + "epoch": 6.36566166641913, + "grad_norm": 1.470848798751831, + "learning_rate": 3.6343383335808707e-06, + "loss": 0.066, "step": 42860 }, { - "epoch": 3.1835734442299124, - "grad_norm": 1.6128898859024048, - "learning_rate": 1.0898559334620526e-05, - "loss": 0.0526, + "epoch": 6.367146888459825, + "grad_norm": 1.5860182046890259, + "learning_rate": 3.6328531115401756e-06, + "loss": 0.0548, "step": 42870 }, { - "epoch": 3.18431605525026, - "grad_norm": 1.4316538572311401, - "learning_rate": 1.089410366849844e-05, - "loss": 0.0555, + "epoch": 6.36863211050052, + "grad_norm": 0.7763499617576599, + "learning_rate": 3.6313678894994805e-06, + "loss": 0.0682, "step": 42880 }, { - "epoch": 3.1850586662706073, - "grad_norm": 0.824476420879364, - "learning_rate": 1.0889648002376356e-05, - "loss": 0.0546, + "epoch": 6.370117332541215, + "grad_norm": 0.8643427491188049, + "learning_rate": 3.6298826674587857e-06, + "loss": 0.0444, "step": 42890 }, { - "epoch": 3.185801277290955, - "grad_norm": 1.2334526777267456, - "learning_rate": 1.088519233625427e-05, - "loss": 0.0352, + "epoch": 6.37160255458191, + "grad_norm": 1.1857274770736694, + "learning_rate": 3.62839744541809e-06, + "loss": 0.0518, "step": 42900 }, { - "epoch": 3.1865438883113026, - "grad_norm": 1.7368484735488892, - "learning_rate": 1.0880736670132186e-05, - "loss": 0.0406, + "epoch": 6.373087776622605, + "grad_norm": 0.8132492303848267, + "learning_rate": 3.626912223377395e-06, + "loss": 0.0492, "step": 42910 }, { - "epoch": 3.18728649933165, - "grad_norm": 1.9491029977798462, - "learning_rate": 1.0876281004010099e-05, - "loss": 0.0542, + "epoch": 6.3745729986633, + "grad_norm": 1.3745230436325073, + "learning_rate": 3.6254270013367e-06, + "loss": 0.057, "step": 42920 }, { - "epoch": 3.1880291103519975, - "grad_norm": 2.460498809814453, - "learning_rate": 1.0871825337888016e-05, - "loss": 0.0532, + "epoch": 6.376058220703995, + "grad_norm": 1.691438913345337, + "learning_rate": 3.623941779296005e-06, + "loss": 0.0536, "step": 42930 }, { - "epoch": 3.188771721372345, - "grad_norm": 1.1754149198532104, - "learning_rate": 1.0867369671765929e-05, - "loss": 0.1044, + "epoch": 6.37754344274469, + "grad_norm": 0.6980904936790466, + "learning_rate": 3.62245655725531e-06, + "loss": 0.0468, "step": 42940 }, { - "epoch": 3.189514332392693, - "grad_norm": 1.4648820161819458, - "learning_rate": 1.0862914005643844e-05, - "loss": 0.0671, + "epoch": 6.379028664785386, + "grad_norm": 0.4476390480995178, + "learning_rate": 3.620971335214615e-06, + "loss": 0.0732, "step": 42950 }, { - "epoch": 3.1902569434130403, - "grad_norm": 1.5888352394104004, - "learning_rate": 1.0858458339521759e-05, - "loss": 0.054, + "epoch": 6.380513886826081, + "grad_norm": 0.8817645311355591, + "learning_rate": 3.61948611317392e-06, + "loss": 0.0513, "step": 42960 }, { - "epoch": 3.190999554433388, - "grad_norm": 0.9326488971710205, - "learning_rate": 1.0854002673399674e-05, - "loss": 0.067, + "epoch": 6.381999108866776, + "grad_norm": 0.406442791223526, + "learning_rate": 3.618000891133225e-06, + "loss": 0.057, "step": 42970 }, { - "epoch": 3.1917421654537352, - "grad_norm": 0.8771650195121765, - "learning_rate": 1.0849547007277589e-05, - "loss": 0.0449, + "epoch": 6.3834843309074705, + "grad_norm": 0.8691296577453613, + "learning_rate": 3.6165156690925295e-06, + "loss": 0.0692, "step": 42980 }, { - "epoch": 3.1924847764740827, - "grad_norm": 1.608896017074585, - "learning_rate": 1.0845091341155502e-05, - "loss": 0.0408, + "epoch": 6.384969552948165, + "grad_norm": 1.1780812740325928, + "learning_rate": 3.6150304470518343e-06, + "loss": 0.0708, "step": 42990 }, { - "epoch": 3.1932273874944306, - "grad_norm": 1.1456100940704346, - "learning_rate": 1.0840635675033419e-05, - "loss": 0.0858, + "epoch": 6.386454774988861, + "grad_norm": 0.6412712931632996, + "learning_rate": 3.6135452250111396e-06, + "loss": 0.0525, "step": 43000 }, { - "epoch": 3.193969998514778, - "grad_norm": 0.9099137187004089, - "learning_rate": 1.0836180008911332e-05, - "loss": 0.075, + "epoch": 6.387939997029556, + "grad_norm": 0.9573513865470886, + "learning_rate": 3.6120600029704444e-06, + "loss": 0.0691, "step": 43010 }, { - "epoch": 3.1947126095351255, - "grad_norm": 3.812081813812256, - "learning_rate": 1.0831724342789247e-05, - "loss": 0.0499, + "epoch": 6.389425219070251, + "grad_norm": 0.7389662861824036, + "learning_rate": 3.6105747809297493e-06, + "loss": 0.0616, "step": 43020 }, { - "epoch": 3.195455220555473, - "grad_norm": 3.0689592361450195, - "learning_rate": 1.0827268676667162e-05, - "loss": 0.0763, + "epoch": 6.390910441110946, + "grad_norm": 0.9667379260063171, + "learning_rate": 3.609089558889054e-06, + "loss": 0.0584, "step": 43030 }, { - "epoch": 3.1961978315758204, - "grad_norm": 1.6192634105682373, - "learning_rate": 1.0822813010545077e-05, - "loss": 0.0643, + "epoch": 6.392395663151641, + "grad_norm": 0.5876452922821045, + "learning_rate": 3.6076043368483594e-06, + "loss": 0.0692, "step": 43040 }, { - "epoch": 3.1969404425961683, - "grad_norm": 2.0305936336517334, - "learning_rate": 1.0818357344422992e-05, - "loss": 0.0602, + "epoch": 6.3938808851923365, + "grad_norm": 1.150445580482483, + "learning_rate": 3.606119114807664e-06, + "loss": 0.0686, "step": 43050 }, { - "epoch": 3.1976830536165157, - "grad_norm": 3.326087713241577, - "learning_rate": 1.0813901678300905e-05, - "loss": 0.0737, + "epoch": 6.3953661072330314, + "grad_norm": 1.0147703886032104, + "learning_rate": 3.6046338927669687e-06, + "loss": 0.0614, "step": 43060 }, { - "epoch": 3.198425664636863, - "grad_norm": 1.6276055574417114, - "learning_rate": 1.080944601217882e-05, - "loss": 0.0827, + "epoch": 6.396851329273726, + "grad_norm": 1.0354033708572388, + "learning_rate": 3.6031486707262736e-06, + "loss": 0.0589, "step": 43070 }, { - "epoch": 3.1991682756572106, - "grad_norm": 0.9700911641120911, - "learning_rate": 1.0804990346056735e-05, - "loss": 0.0455, + "epoch": 6.398336551314421, + "grad_norm": 1.8302271366119385, + "learning_rate": 3.601663448685579e-06, + "loss": 0.054, "step": 43080 }, { - "epoch": 3.199910886677558, - "grad_norm": 0.9960930347442627, - "learning_rate": 1.080053467993465e-05, - "loss": 0.061, + "epoch": 6.399821773355116, + "grad_norm": 0.3475869596004486, + "learning_rate": 3.6001782266448837e-06, + "loss": 0.0575, "step": 43090 }, { - "epoch": 3.200653497697906, - "grad_norm": 1.0367248058319092, - "learning_rate": 1.0796079013812565e-05, - "loss": 0.0802, + "epoch": 6.401306995395812, + "grad_norm": 0.732566773891449, + "learning_rate": 3.5986930046041886e-06, + "loss": 0.0394, "step": 43100 }, { - "epoch": 3.2013961087182534, - "grad_norm": 2.8516721725463867, - "learning_rate": 1.079162334769048e-05, - "loss": 0.0529, + "epoch": 6.402792217436507, + "grad_norm": 0.48667097091674805, + "learning_rate": 3.597207782563494e-06, + "loss": 0.0594, "step": 43110 }, { - "epoch": 3.202138719738601, - "grad_norm": 0.391720712184906, - "learning_rate": 1.0787167681568395e-05, - "loss": 0.0704, + "epoch": 6.404277439477202, + "grad_norm": 0.8790454268455505, + "learning_rate": 3.5957225605227987e-06, + "loss": 0.0405, "step": 43120 }, { - "epoch": 3.2028813307589483, - "grad_norm": 2.677454710006714, - "learning_rate": 1.0782712015446309e-05, - "loss": 0.0784, + "epoch": 6.405762661517897, + "grad_norm": 0.9130986928939819, + "learning_rate": 3.594237338482103e-06, + "loss": 0.053, "step": 43130 }, { - "epoch": 3.2036239417792958, - "grad_norm": 2.6573688983917236, - "learning_rate": 1.0778256349324224e-05, - "loss": 0.0293, + "epoch": 6.4072478835585915, + "grad_norm": 1.0321241617202759, + "learning_rate": 3.592752116441408e-06, + "loss": 0.0586, "step": 43140 }, { - "epoch": 3.2043665527996437, - "grad_norm": 2.1728572845458984, - "learning_rate": 1.077380068320214e-05, - "loss": 0.0788, + "epoch": 6.408733105599287, + "grad_norm": 0.9510276913642883, + "learning_rate": 3.5912668944007133e-06, + "loss": 0.0666, "step": 43150 }, { - "epoch": 3.205109163819991, - "grad_norm": 1.871918797492981, - "learning_rate": 1.0769345017080054e-05, - "loss": 0.0722, + "epoch": 6.410218327639982, + "grad_norm": 0.6249675750732422, + "learning_rate": 3.589781672360018e-06, + "loss": 0.0569, "step": 43160 }, { - "epoch": 3.2058517748403386, - "grad_norm": 0.8562690615653992, - "learning_rate": 1.0764889350957969e-05, - "loss": 0.0808, + "epoch": 6.411703549680677, + "grad_norm": 1.0495495796203613, + "learning_rate": 3.588296450319323e-06, + "loss": 0.0596, "step": 43170 }, { - "epoch": 3.206594385860686, - "grad_norm": 0.7276735305786133, - "learning_rate": 1.0760433684835882e-05, - "loss": 0.0452, + "epoch": 6.413188771721372, + "grad_norm": 1.4297109842300415, + "learning_rate": 3.586811228278628e-06, + "loss": 0.0661, "step": 43180 }, { - "epoch": 3.207336996881034, - "grad_norm": 1.5723731517791748, - "learning_rate": 1.0755978018713799e-05, - "loss": 0.0914, + "epoch": 6.414673993762068, + "grad_norm": 1.4268157482147217, + "learning_rate": 3.585326006237933e-06, + "loss": 0.0631, "step": 43190 }, { - "epoch": 3.2080796079013814, - "grad_norm": 2.1610429286956787, - "learning_rate": 1.0751522352591714e-05, - "loss": 0.069, + "epoch": 6.416159215802763, + "grad_norm": 0.49547335505485535, + "learning_rate": 3.583840784197238e-06, + "loss": 0.0529, "step": 43200 }, { - "epoch": 3.208822218921729, - "grad_norm": 2.077439069747925, - "learning_rate": 1.0747066686469627e-05, - "loss": 0.0626, + "epoch": 6.417644437843458, + "grad_norm": 1.1406906843185425, + "learning_rate": 3.5823555621565424e-06, + "loss": 0.0807, "step": 43210 }, { - "epoch": 3.2095648299420763, - "grad_norm": 1.1080368757247925, - "learning_rate": 1.0742611020347544e-05, - "loss": 0.0603, + "epoch": 6.4191296598841525, + "grad_norm": 0.9071478843688965, + "learning_rate": 3.5808703401158477e-06, + "loss": 0.0688, "step": 43220 }, { - "epoch": 3.2103074409624237, - "grad_norm": 1.089012622833252, - "learning_rate": 1.0738155354225457e-05, - "loss": 0.046, + "epoch": 6.420614881924847, + "grad_norm": 0.9624571204185486, + "learning_rate": 3.5793851180751526e-06, + "loss": 0.0609, "step": 43230 }, { - "epoch": 3.2110500519827716, - "grad_norm": 1.0629624128341675, - "learning_rate": 1.0733699688103372e-05, - "loss": 0.065, + "epoch": 6.422100103965543, + "grad_norm": 0.5686826705932617, + "learning_rate": 3.5778998960344574e-06, + "loss": 0.0472, "step": 43240 }, { - "epoch": 3.211792663003119, - "grad_norm": 2.318930149078369, - "learning_rate": 1.0729244021981285e-05, - "loss": 0.0856, + "epoch": 6.423585326006238, + "grad_norm": 1.276450753211975, + "learning_rate": 3.5764146739937623e-06, + "loss": 0.0593, "step": 43250 }, { - "epoch": 3.2125352740234665, - "grad_norm": 0.6202425360679626, - "learning_rate": 1.0724788355859202e-05, - "loss": 0.0659, + "epoch": 6.425070548046933, + "grad_norm": 1.1901986598968506, + "learning_rate": 3.5749294519530676e-06, + "loss": 0.0573, "step": 43260 }, { - "epoch": 3.213277885043814, - "grad_norm": 1.3956732749938965, - "learning_rate": 1.0720332689737117e-05, - "loss": 0.0679, + "epoch": 6.426555770087628, + "grad_norm": 0.5635876655578613, + "learning_rate": 3.5734442299123724e-06, + "loss": 0.0494, "step": 43270 }, { - "epoch": 3.2140204960641614, - "grad_norm": 1.315119743347168, - "learning_rate": 1.071587702361503e-05, - "loss": 0.0776, + "epoch": 6.428040992128323, + "grad_norm": 1.0205134153366089, + "learning_rate": 3.571959007871677e-06, + "loss": 0.0579, "step": 43280 }, { - "epoch": 3.2147631070845093, - "grad_norm": 1.4344345331192017, - "learning_rate": 1.0711421357492945e-05, - "loss": 0.0826, + "epoch": 6.429526214169019, + "grad_norm": 0.9467157125473022, + "learning_rate": 3.5704737858309817e-06, + "loss": 0.0696, "step": 43290 }, { - "epoch": 3.2155057181048567, - "grad_norm": 1.0249013900756836, - "learning_rate": 1.070696569137086e-05, - "loss": 0.052, + "epoch": 6.4310114362097135, + "grad_norm": 0.8886051774024963, + "learning_rate": 3.568988563790287e-06, + "loss": 0.0607, "step": 43300 }, { - "epoch": 3.216248329125204, - "grad_norm": 0.4725293815135956, - "learning_rate": 1.0702510025248775e-05, - "loss": 0.0762, + "epoch": 6.432496658250408, + "grad_norm": 0.7653177976608276, + "learning_rate": 3.567503341749592e-06, + "loss": 0.0823, "step": 43310 }, { - "epoch": 3.2169909401455516, - "grad_norm": 2.4270472526550293, - "learning_rate": 1.069805435912669e-05, - "loss": 0.0802, + "epoch": 6.433981880291103, + "grad_norm": 1.416134238243103, + "learning_rate": 3.5660181197088967e-06, + "loss": 0.0665, "step": 43320 }, { - "epoch": 3.217733551165899, - "grad_norm": 1.2253289222717285, - "learning_rate": 1.0693598693004603e-05, - "loss": 0.0569, + "epoch": 6.435467102331798, + "grad_norm": 0.49492040276527405, + "learning_rate": 3.5645328976682016e-06, + "loss": 0.0612, "step": 43330 }, { - "epoch": 3.218476162186247, - "grad_norm": 0.7013288140296936, - "learning_rate": 1.068914302688252e-05, - "loss": 0.0446, + "epoch": 6.436952324372494, + "grad_norm": 0.8057112693786621, + "learning_rate": 3.563047675627507e-06, + "loss": 0.0647, "step": 43340 }, { - "epoch": 3.2192187732065944, - "grad_norm": 1.2379825115203857, - "learning_rate": 1.0684687360760433e-05, - "loss": 0.0667, + "epoch": 6.438437546413189, + "grad_norm": 0.5526387095451355, + "learning_rate": 3.5615624535868117e-06, + "loss": 0.0738, "step": 43350 }, { - "epoch": 3.219961384226942, - "grad_norm": 1.6265310049057007, - "learning_rate": 1.0680231694638348e-05, - "loss": 0.0971, + "epoch": 6.439922768453884, + "grad_norm": 1.0343058109283447, + "learning_rate": 3.560077231546116e-06, + "loss": 0.0657, "step": 43360 }, { - "epoch": 3.2207039952472893, - "grad_norm": 1.87282133102417, - "learning_rate": 1.0675776028516263e-05, - "loss": 0.0718, + "epoch": 6.441407990494579, + "grad_norm": 1.026384949684143, + "learning_rate": 3.5585920095054214e-06, + "loss": 0.0455, "step": 43370 }, { - "epoch": 3.2214466062676372, - "grad_norm": 1.285352349281311, - "learning_rate": 1.0671320362394178e-05, - "loss": 0.0982, + "epoch": 6.4428932125352745, + "grad_norm": 0.7136487364768982, + "learning_rate": 3.5571067874647263e-06, + "loss": 0.0517, "step": 43380 }, { - "epoch": 3.2221892172879847, - "grad_norm": 1.9336485862731934, - "learning_rate": 1.0666864696272093e-05, - "loss": 0.0469, + "epoch": 6.444378434575969, + "grad_norm": 1.1231248378753662, + "learning_rate": 3.555621565424031e-06, + "loss": 0.0539, "step": 43390 }, { - "epoch": 3.222931828308332, - "grad_norm": 2.0391123294830322, - "learning_rate": 1.0662409030150007e-05, - "loss": 0.0731, + "epoch": 6.445863656616664, + "grad_norm": 0.7071220278739929, + "learning_rate": 3.554136343383336e-06, + "loss": 0.0462, "step": 43400 }, { - "epoch": 3.2236744393286796, - "grad_norm": 0.8943817019462585, - "learning_rate": 1.0657953364027923e-05, - "loss": 0.0598, + "epoch": 6.447348878657359, + "grad_norm": 0.6214123368263245, + "learning_rate": 3.5526511213426413e-06, + "loss": 0.0631, "step": 43410 }, { - "epoch": 3.224417050349027, - "grad_norm": 0.8489885330200195, - "learning_rate": 1.0653497697905837e-05, - "loss": 0.0584, + "epoch": 6.448834100698054, + "grad_norm": 0.8865244388580322, + "learning_rate": 3.551165899301946e-06, + "loss": 0.0481, "step": 43420 }, { - "epoch": 3.225159661369375, - "grad_norm": 0.9959657192230225, - "learning_rate": 1.0649042031783752e-05, - "loss": 0.064, + "epoch": 6.45031932273875, + "grad_norm": 0.6450020670890808, + "learning_rate": 3.5496806772612506e-06, + "loss": 0.0357, "step": 43430 }, { - "epoch": 3.2259022723897224, - "grad_norm": 1.330881953239441, - "learning_rate": 1.0644586365661667e-05, - "loss": 0.069, + "epoch": 6.451804544779445, + "grad_norm": 0.4999340772628784, + "learning_rate": 3.5481954552205554e-06, + "loss": 0.0623, "step": 43440 }, { - "epoch": 3.22664488341007, - "grad_norm": 2.326178550720215, - "learning_rate": 1.0640130699539582e-05, - "loss": 0.0519, + "epoch": 6.45328976682014, + "grad_norm": 0.978721559047699, + "learning_rate": 3.5467102331798607e-06, + "loss": 0.0814, "step": 43450 }, { - "epoch": 3.2273874944304173, - "grad_norm": 0.8001874089241028, - "learning_rate": 1.0635675033417497e-05, - "loss": 0.059, + "epoch": 6.454774988860835, + "grad_norm": 1.1102402210235596, + "learning_rate": 3.5452250111391656e-06, + "loss": 0.0667, "step": 43460 }, { - "epoch": 3.2281301054507647, - "grad_norm": 1.8835375308990479, - "learning_rate": 1.063121936729541e-05, - "loss": 0.1137, + "epoch": 6.4562602109015295, + "grad_norm": 0.7155255675315857, + "learning_rate": 3.5437397890984704e-06, + "loss": 0.0361, "step": 43470 }, { - "epoch": 3.2288727164711126, - "grad_norm": 1.0874513387680054, - "learning_rate": 1.0626763701173325e-05, - "loss": 0.0764, + "epoch": 6.457745432942225, + "grad_norm": 0.6756823658943176, + "learning_rate": 3.5422545670577753e-06, + "loss": 0.0772, "step": 43480 }, { - "epoch": 3.22961532749146, - "grad_norm": 0.6599858403205872, - "learning_rate": 1.0622308035051242e-05, - "loss": 0.0516, + "epoch": 6.45923065498292, + "grad_norm": 0.9203720688819885, + "learning_rate": 3.5407693450170805e-06, + "loss": 0.0585, "step": 43490 }, { - "epoch": 3.2303579385118075, - "grad_norm": 0.5097527503967285, - "learning_rate": 1.0617852368929155e-05, - "loss": 0.0429, + "epoch": 6.460715877023615, + "grad_norm": 1.1555261611938477, + "learning_rate": 3.5392841229763854e-06, + "loss": 0.046, "step": 43500 }, { - "epoch": 3.231100549532155, - "grad_norm": 0.7686970829963684, - "learning_rate": 1.061339670280707e-05, - "loss": 0.0492, + "epoch": 6.46220109906431, + "grad_norm": 0.8422235250473022, + "learning_rate": 3.53779890093569e-06, + "loss": 0.0494, "step": 43510 }, { - "epoch": 3.2318431605525024, - "grad_norm": 1.8468888998031616, - "learning_rate": 1.0608941036684985e-05, - "loss": 0.0558, + "epoch": 6.463686321105005, + "grad_norm": 0.676811695098877, + "learning_rate": 3.536313678894995e-06, + "loss": 0.0549, "step": 43520 }, { - "epoch": 3.2325857715728503, - "grad_norm": 0.26282399892807007, - "learning_rate": 1.06044853705629e-05, - "loss": 0.069, + "epoch": 6.465171543145701, + "grad_norm": 2.3225271701812744, + "learning_rate": 3.5348284568543e-06, + "loss": 0.0959, "step": 43530 }, { - "epoch": 3.2333283825931978, - "grad_norm": 2.4941744804382324, - "learning_rate": 1.0600029704440813e-05, - "loss": 0.053, + "epoch": 6.4666567651863955, + "grad_norm": 0.8296643495559692, + "learning_rate": 3.533343234813605e-06, + "loss": 0.0632, "step": 43540 }, { - "epoch": 3.234070993613545, - "grad_norm": 2.5920748710632324, - "learning_rate": 1.0595574038318728e-05, - "loss": 0.0638, + "epoch": 6.46814198722709, + "grad_norm": 1.4344838857650757, + "learning_rate": 3.5318580127729097e-06, + "loss": 0.0584, "step": 43550 }, { - "epoch": 3.2348136046338927, - "grad_norm": 0.590412437915802, - "learning_rate": 1.0591118372196645e-05, - "loss": 0.0606, + "epoch": 6.469627209267785, + "grad_norm": 1.3841466903686523, + "learning_rate": 3.530372790732215e-06, + "loss": 0.0707, "step": 43560 }, { - "epoch": 3.23555621565424, - "grad_norm": 1.5013396739959717, - "learning_rate": 1.0586662706074558e-05, - "loss": 0.0688, + "epoch": 6.47111243130848, + "grad_norm": 0.9099441170692444, + "learning_rate": 3.52888756869152e-06, + "loss": 0.0615, "step": 43570 }, { - "epoch": 3.236298826674588, - "grad_norm": 1.6271650791168213, - "learning_rate": 1.0582207039952473e-05, - "loss": 0.0675, + "epoch": 6.472597653349176, + "grad_norm": 0.8922671675682068, + "learning_rate": 3.5274023466508247e-06, + "loss": 0.0591, "step": 43580 }, { - "epoch": 3.2370414376949355, - "grad_norm": 0.9938003420829773, - "learning_rate": 1.0577751373830386e-05, - "loss": 0.081, + "epoch": 6.474082875389871, + "grad_norm": 1.31600821018219, + "learning_rate": 3.525917124610129e-06, + "loss": 0.0567, "step": 43590 }, { - "epoch": 3.237784048715283, - "grad_norm": 1.7118418216705322, - "learning_rate": 1.0573295707708303e-05, - "loss": 0.0595, + "epoch": 6.475568097430566, + "grad_norm": 0.8927801847457886, + "learning_rate": 3.5244319025694344e-06, + "loss": 0.0739, "step": 43600 }, { - "epoch": 3.2385266597356304, - "grad_norm": 0.5836747884750366, - "learning_rate": 1.0568840041586218e-05, - "loss": 0.0638, + "epoch": 6.477053319471261, + "grad_norm": 0.879862368106842, + "learning_rate": 3.5229466805287393e-06, + "loss": 0.0516, "step": 43610 }, { - "epoch": 3.239269270755978, - "grad_norm": 0.8831083178520203, - "learning_rate": 1.0564384375464131e-05, - "loss": 0.0438, + "epoch": 6.478538541511956, + "grad_norm": 0.6488139629364014, + "learning_rate": 3.521461458488044e-06, + "loss": 0.048, "step": 43620 }, { - "epoch": 3.2400118817763257, - "grad_norm": 1.3930721282958984, - "learning_rate": 1.0559928709342048e-05, - "loss": 0.0442, + "epoch": 6.480023763552651, + "grad_norm": 0.35967373847961426, + "learning_rate": 3.5199762364473494e-06, + "loss": 0.0622, "step": 43630 }, { - "epoch": 3.240754492796673, - "grad_norm": 5.538564682006836, - "learning_rate": 1.0555473043219961e-05, - "loss": 0.0557, + "epoch": 6.481508985593346, + "grad_norm": 0.7182832360267639, + "learning_rate": 3.5184910144066542e-06, + "loss": 0.0615, "step": 43640 }, { - "epoch": 3.2414971038170206, - "grad_norm": 1.8036948442459106, - "learning_rate": 1.0551017377097876e-05, - "loss": 0.0664, + "epoch": 6.482994207634041, + "grad_norm": 0.6438133716583252, + "learning_rate": 3.517005792365959e-06, + "loss": 0.0607, "step": 43650 }, { - "epoch": 3.242239714837368, - "grad_norm": 0.252446711063385, - "learning_rate": 1.054656171097579e-05, - "loss": 0.0578, + "epoch": 6.484479429674736, + "grad_norm": 0.5943735241889954, + "learning_rate": 3.5155205703252635e-06, + "loss": 0.0517, "step": 43660 }, { - "epoch": 3.2429823258577155, - "grad_norm": 1.6370078325271606, - "learning_rate": 1.0542106044853706e-05, - "loss": 0.0668, + "epoch": 6.485964651715431, + "grad_norm": 0.5622908473014832, + "learning_rate": 3.5140353482845692e-06, + "loss": 0.052, "step": 43670 }, { - "epoch": 3.2437249368780634, - "grad_norm": 0.6032128930091858, - "learning_rate": 1.0537650378731621e-05, - "loss": 0.0697, + "epoch": 6.487449873756127, + "grad_norm": 0.690758466720581, + "learning_rate": 3.5125501262438737e-06, + "loss": 0.0612, "step": 43680 }, { - "epoch": 3.244467547898411, - "grad_norm": 1.6264759302139282, - "learning_rate": 1.0533194712609535e-05, - "loss": 0.0407, + "epoch": 6.488935095796822, + "grad_norm": 0.8912779092788696, + "learning_rate": 3.5110649042031785e-06, + "loss": 0.0464, "step": 43690 }, { - "epoch": 3.2452101589187583, - "grad_norm": 1.4826754331588745, - "learning_rate": 1.052873904648745e-05, - "loss": 0.0342, + "epoch": 6.490420317837517, + "grad_norm": 0.9846697449684143, + "learning_rate": 3.5095796821624834e-06, + "loss": 0.0592, "step": 43700 }, { - "epoch": 3.2459527699391058, - "grad_norm": 0.2272782027721405, - "learning_rate": 1.0524283380365365e-05, - "loss": 0.0744, + "epoch": 6.4919055398782115, + "grad_norm": 0.5257307291030884, + "learning_rate": 3.5080944601217887e-06, + "loss": 0.0703, "step": 43710 }, { - "epoch": 3.246695380959453, - "grad_norm": 2.538353204727173, - "learning_rate": 1.051982771424328e-05, - "loss": 0.063, + "epoch": 6.493390761918906, + "grad_norm": 0.32277756929397583, + "learning_rate": 3.5066092380810935e-06, + "loss": 0.0585, "step": 43720 }, { - "epoch": 3.247437991979801, - "grad_norm": 1.0268744230270386, - "learning_rate": 1.0515372048121195e-05, - "loss": 0.0528, + "epoch": 6.494875983959602, + "grad_norm": 0.7821950912475586, + "learning_rate": 3.5051240160403984e-06, + "loss": 0.0685, "step": 43730 }, { - "epoch": 3.2481806030001485, - "grad_norm": 0.812559962272644, - "learning_rate": 1.0510916381999108e-05, - "loss": 0.0557, + "epoch": 6.496361206000297, + "grad_norm": 0.563582181930542, + "learning_rate": 3.503638793999703e-06, + "loss": 0.0514, "step": 43740 }, { - "epoch": 3.248923214020496, - "grad_norm": 0.4809117317199707, - "learning_rate": 1.0506460715877025e-05, - "loss": 0.0452, + "epoch": 6.497846428040992, + "grad_norm": 0.6015027165412903, + "learning_rate": 3.502153571959008e-06, + "loss": 0.0631, "step": 43750 }, { - "epoch": 3.2496658250408434, - "grad_norm": 2.713081121444702, - "learning_rate": 1.0502005049754938e-05, - "loss": 0.0444, + "epoch": 6.499331650081687, + "grad_norm": 0.8591887950897217, + "learning_rate": 3.500668349918313e-06, + "loss": 0.0703, "step": 43760 }, { - "epoch": 3.2504084360611913, - "grad_norm": 1.6045153141021729, - "learning_rate": 1.0497549383632853e-05, - "loss": 0.0835, + "epoch": 6.500816872122383, + "grad_norm": 0.7110676765441895, + "learning_rate": 3.499183127877618e-06, + "loss": 0.0765, "step": 43770 }, { - "epoch": 3.251151047081539, - "grad_norm": 1.182823657989502, - "learning_rate": 1.049309371751077e-05, - "loss": 0.0358, + "epoch": 6.502302094163078, + "grad_norm": 0.6216291785240173, + "learning_rate": 3.497697905836923e-06, + "loss": 0.0496, "step": 43780 }, { - "epoch": 3.2518936581018862, - "grad_norm": 1.261793613433838, - "learning_rate": 1.0488638051388683e-05, - "loss": 0.059, + "epoch": 6.5037873162037725, + "grad_norm": 0.9407916069030762, + "learning_rate": 3.496212683796228e-06, + "loss": 0.0538, "step": 43790 }, { - "epoch": 3.2526362691222337, - "grad_norm": 1.5234590768814087, - "learning_rate": 1.0484182385266598e-05, - "loss": 0.0675, + "epoch": 6.505272538244467, + "grad_norm": 1.50139582157135, + "learning_rate": 3.494727461755533e-06, + "loss": 0.0619, "step": 43800 }, { - "epoch": 3.253378880142581, - "grad_norm": 3.1365156173706055, - "learning_rate": 1.0479726719144511e-05, - "loss": 0.0543, + "epoch": 6.506757760285162, + "grad_norm": 1.1639320850372314, + "learning_rate": 3.4932422397148372e-06, + "loss": 0.0467, "step": 43810 }, { - "epoch": 3.254121491162929, - "grad_norm": 2.5033278465270996, - "learning_rate": 1.0475271053022428e-05, - "loss": 0.0883, + "epoch": 6.508242982325858, + "grad_norm": 0.931298017501831, + "learning_rate": 3.491757017674143e-06, + "loss": 0.0692, "step": 43820 }, { - "epoch": 3.2548641021832765, - "grad_norm": 1.3353326320648193, - "learning_rate": 1.0470815386900341e-05, - "loss": 0.0692, + "epoch": 6.509728204366553, + "grad_norm": 0.8163691163063049, + "learning_rate": 3.4902717956334474e-06, + "loss": 0.0646, "step": 43830 }, { - "epoch": 3.255606713203624, - "grad_norm": 1.0387260913848877, - "learning_rate": 1.0466359720778256e-05, - "loss": 0.0863, + "epoch": 6.511213426407248, + "grad_norm": 1.0349771976470947, + "learning_rate": 3.4887865735927522e-06, + "loss": 0.062, "step": 43840 }, { - "epoch": 3.2563493242239714, - "grad_norm": 1.1819405555725098, - "learning_rate": 1.0461904054656171e-05, - "loss": 0.0635, + "epoch": 6.512698648447943, + "grad_norm": 1.071566104888916, + "learning_rate": 3.487301351552057e-06, + "loss": 0.0657, "step": 43850 }, { - "epoch": 3.257091935244319, - "grad_norm": 0.8391949534416199, - "learning_rate": 1.0457448388534086e-05, - "loss": 0.0419, + "epoch": 6.514183870488638, + "grad_norm": 0.7026413679122925, + "learning_rate": 3.4858161295113624e-06, + "loss": 0.0638, "step": 43860 }, { - "epoch": 3.2578345462646667, - "grad_norm": 3.538817882537842, - "learning_rate": 1.0452992722412001e-05, - "loss": 0.0757, + "epoch": 6.5156690925293335, + "grad_norm": 0.6621662378311157, + "learning_rate": 3.4843309074706672e-06, + "loss": 0.0503, "step": 43870 }, { - "epoch": 3.258577157285014, - "grad_norm": 1.986291766166687, - "learning_rate": 1.0448537056289915e-05, - "loss": 0.0663, + "epoch": 6.517154314570028, + "grad_norm": 0.49411311745643616, + "learning_rate": 3.482845685429972e-06, + "loss": 0.0443, "step": 43880 }, { - "epoch": 3.2593197683053616, - "grad_norm": 2.091001510620117, - "learning_rate": 1.0444081390167831e-05, - "loss": 0.0641, + "epoch": 6.518639536610723, + "grad_norm": 0.7259182333946228, + "learning_rate": 3.4813604633892774e-06, + "loss": 0.0583, "step": 43890 }, { - "epoch": 3.260062379325709, - "grad_norm": 1.032757043838501, - "learning_rate": 1.0439625724045746e-05, - "loss": 0.0629, + "epoch": 6.520124758651418, + "grad_norm": 0.4303617477416992, + "learning_rate": 3.4798752413485822e-06, + "loss": 0.0545, "step": 43900 }, { - "epoch": 3.260804990346057, - "grad_norm": 0.7859309911727905, - "learning_rate": 1.043517005792366e-05, - "loss": 0.0354, + "epoch": 6.521609980692114, + "grad_norm": 0.8239855766296387, + "learning_rate": 3.4783900193078867e-06, + "loss": 0.0581, "step": 43910 }, { - "epoch": 3.2615476013664044, - "grad_norm": 0.43140801787376404, - "learning_rate": 1.0430714391801575e-05, - "loss": 0.0709, + "epoch": 6.523095202732809, + "grad_norm": 1.0155378580093384, + "learning_rate": 3.4769047972671915e-06, + "loss": 0.0651, "step": 43920 }, { - "epoch": 3.262290212386752, - "grad_norm": 1.7433068752288818, - "learning_rate": 1.042625872567949e-05, - "loss": 0.088, + "epoch": 6.524580424773504, + "grad_norm": 0.9577513933181763, + "learning_rate": 3.475419575226497e-06, + "loss": 0.0641, "step": 43930 }, { - "epoch": 3.2630328234070993, - "grad_norm": 2.8857994079589844, - "learning_rate": 1.0421803059557405e-05, - "loss": 0.0876, + "epoch": 6.526065646814199, + "grad_norm": 0.8862541317939758, + "learning_rate": 3.4739343531858016e-06, + "loss": 0.0483, "step": 43940 }, { - "epoch": 3.2637754344274468, - "grad_norm": 0.6673837304115295, - "learning_rate": 1.0417347393435318e-05, - "loss": 0.0525, + "epoch": 6.5275508688548936, + "grad_norm": 1.088813066482544, + "learning_rate": 3.4724491311451065e-06, + "loss": 0.0629, "step": 43950 }, { - "epoch": 3.2645180454477947, - "grad_norm": 0.9741489887237549, - "learning_rate": 1.0412891727313233e-05, - "loss": 0.0626, + "epoch": 6.529036090895589, + "grad_norm": 0.4905775785446167, + "learning_rate": 3.4709639091044114e-06, + "loss": 0.0428, "step": 43960 }, { - "epoch": 3.265260656468142, - "grad_norm": 0.6886597275733948, - "learning_rate": 1.040843606119115e-05, - "loss": 0.0607, + "epoch": 6.530521312936284, + "grad_norm": 0.9292188286781311, + "learning_rate": 3.4694786870637166e-06, + "loss": 0.0506, "step": 43970 }, { - "epoch": 3.2660032674884896, - "grad_norm": 0.6309210062026978, - "learning_rate": 1.0403980395069063e-05, - "loss": 0.0695, + "epoch": 6.532006534976979, + "grad_norm": 1.2144511938095093, + "learning_rate": 3.467993465023021e-06, + "loss": 0.0699, "step": 43980 }, { - "epoch": 3.266745878508837, - "grad_norm": 3.206247329711914, - "learning_rate": 1.0399524728946978e-05, - "loss": 0.0573, + "epoch": 6.533491757017674, + "grad_norm": 1.1785643100738525, + "learning_rate": 3.466508242982326e-06, + "loss": 0.0581, "step": 43990 }, { - "epoch": 3.2674884895291845, - "grad_norm": 0.590715765953064, - "learning_rate": 1.0395069062824891e-05, - "loss": 0.0443, + "epoch": 6.534976979058369, + "grad_norm": 0.647038459777832, + "learning_rate": 3.465023020941631e-06, + "loss": 0.066, "step": 44000 }, { - "epoch": 3.2682311005495324, - "grad_norm": 3.0547935962677, - "learning_rate": 1.0390613396702808e-05, - "loss": 0.062, + "epoch": 6.536462201099065, + "grad_norm": 0.5554239749908447, + "learning_rate": 3.463537798900936e-06, + "loss": 0.0673, "step": 44010 }, { - "epoch": 3.26897371156988, - "grad_norm": 5.862617015838623, - "learning_rate": 1.0386157730580723e-05, - "loss": 0.0543, + "epoch": 6.53794742313976, + "grad_norm": 0.9084454774856567, + "learning_rate": 3.462052576860241e-06, + "loss": 0.0677, "step": 44020 }, { - "epoch": 3.2697163225902273, - "grad_norm": 0.2635265588760376, - "learning_rate": 1.0381702064458636e-05, - "loss": 0.0307, + "epoch": 6.5394326451804545, + "grad_norm": 0.7279065251350403, + "learning_rate": 3.4605673548195458e-06, + "loss": 0.0674, "step": 44030 }, { - "epoch": 3.2704589336105747, - "grad_norm": 1.2568199634552002, - "learning_rate": 1.0377246398336553e-05, - "loss": 0.0388, + "epoch": 6.540917867221149, + "grad_norm": 0.3517337441444397, + "learning_rate": 3.459082132778851e-06, + "loss": 0.0519, "step": 44040 }, { - "epoch": 3.271201544630922, - "grad_norm": 0.4090416133403778, - "learning_rate": 1.0372790732214466e-05, - "loss": 0.0327, + "epoch": 6.542403089261844, + "grad_norm": 1.2529404163360596, + "learning_rate": 3.457596910738156e-06, + "loss": 0.0594, "step": 44050 }, { - "epoch": 3.27194415565127, - "grad_norm": 1.8070887327194214, - "learning_rate": 1.0368335066092381e-05, - "loss": 0.0626, + "epoch": 6.54388831130254, + "grad_norm": 1.125239610671997, + "learning_rate": 3.4561116886974604e-06, + "loss": 0.0717, "step": 44060 }, { - "epoch": 3.2726867666716175, - "grad_norm": 1.7493088245391846, - "learning_rate": 1.0363879399970296e-05, - "loss": 0.0763, + "epoch": 6.545373533343235, + "grad_norm": 1.0038970708847046, + "learning_rate": 3.454626466656765e-06, + "loss": 0.0566, "step": 44070 }, { - "epoch": 3.273429377691965, - "grad_norm": 1.4830260276794434, - "learning_rate": 1.0359423733848211e-05, - "loss": 0.0501, + "epoch": 6.54685875538393, + "grad_norm": 1.224101185798645, + "learning_rate": 3.4531412446160705e-06, + "loss": 0.0514, "step": 44080 }, { - "epoch": 3.2741719887123124, - "grad_norm": 0.5901992917060852, - "learning_rate": 1.0354968067726126e-05, - "loss": 0.0614, + "epoch": 6.548343977424625, + "grad_norm": 1.390031099319458, + "learning_rate": 3.4516560225753753e-06, + "loss": 0.0636, "step": 44090 }, { - "epoch": 3.27491459973266, - "grad_norm": 2.098797082901001, - "learning_rate": 1.035051240160404e-05, - "loss": 0.058, + "epoch": 6.54982919946532, + "grad_norm": 1.0642993450164795, + "learning_rate": 3.45017080053468e-06, + "loss": 0.0786, "step": 44100 }, { - "epoch": 3.2756572107530078, - "grad_norm": 1.220317006111145, - "learning_rate": 1.0346056735481954e-05, - "loss": 0.0958, + "epoch": 6.5513144215060155, + "grad_norm": 0.3884289264678955, + "learning_rate": 3.448685578493985e-06, + "loss": 0.055, "step": 44110 }, { - "epoch": 3.276399821773355, - "grad_norm": 0.6847271919250488, - "learning_rate": 1.034160106935987e-05, - "loss": 0.0539, + "epoch": 6.55279964354671, + "grad_norm": 0.6680036187171936, + "learning_rate": 3.4472003564532903e-06, + "loss": 0.043, "step": 44120 }, { - "epoch": 3.2771424327937027, - "grad_norm": 1.1545261144638062, - "learning_rate": 1.0337145403237784e-05, - "loss": 0.0644, + "epoch": 6.554284865587405, + "grad_norm": 0.6033088564872742, + "learning_rate": 3.4457151344125948e-06, + "loss": 0.0571, "step": 44130 }, { - "epoch": 3.27788504381405, - "grad_norm": 2.0466902256011963, - "learning_rate": 1.03326897371157e-05, - "loss": 0.0595, + "epoch": 6.5557700876281, + "grad_norm": 0.7803447246551514, + "learning_rate": 3.4442299123718996e-06, + "loss": 0.0646, "step": 44140 }, { - "epoch": 3.2786276548343976, - "grad_norm": 2.453418493270874, - "learning_rate": 1.0328234070993614e-05, - "loss": 0.0978, + "epoch": 6.557255309668795, + "grad_norm": 1.9591981172561646, + "learning_rate": 3.442744690331205e-06, + "loss": 0.0556, "step": 44150 }, { - "epoch": 3.2793702658547454, - "grad_norm": 3.9171948432922363, - "learning_rate": 1.032377840487153e-05, - "loss": 0.0583, + "epoch": 6.558740531709491, + "grad_norm": 0.18442347645759583, + "learning_rate": 3.4412594682905098e-06, + "loss": 0.0426, "step": 44160 }, { - "epoch": 3.280112876875093, - "grad_norm": 2.847308874130249, - "learning_rate": 1.0319322738749443e-05, - "loss": 0.0792, + "epoch": 6.560225753750186, + "grad_norm": 0.8935715556144714, + "learning_rate": 3.4397742462498146e-06, + "loss": 0.0599, "step": 44170 }, { - "epoch": 3.2808554878954403, - "grad_norm": 1.2531743049621582, - "learning_rate": 1.0314867072627358e-05, - "loss": 0.0833, + "epoch": 6.561710975790881, + "grad_norm": 0.8119651675224304, + "learning_rate": 3.4382890242091195e-06, + "loss": 0.063, "step": 44180 }, { - "epoch": 3.281598098915788, - "grad_norm": 3.184342384338379, - "learning_rate": 1.0310411406505274e-05, - "loss": 0.0575, + "epoch": 6.563196197831576, + "grad_norm": 0.9296265840530396, + "learning_rate": 3.4368038021684248e-06, + "loss": 0.0711, "step": 44190 }, { - "epoch": 3.2823407099361352, - "grad_norm": 1.0278104543685913, - "learning_rate": 1.0305955740383188e-05, - "loss": 0.0633, + "epoch": 6.5646814198722705, + "grad_norm": 0.2975754737854004, + "learning_rate": 3.4353185801277296e-06, + "loss": 0.0525, "step": 44200 }, { - "epoch": 3.283083320956483, - "grad_norm": 1.105556607246399, - "learning_rate": 1.0301500074261103e-05, - "loss": 0.0774, + "epoch": 6.566166641912966, + "grad_norm": 0.6989734768867493, + "learning_rate": 3.433833358087034e-06, + "loss": 0.0506, "step": 44210 }, { - "epoch": 3.2838259319768306, - "grad_norm": 2.209592342376709, - "learning_rate": 1.0297044408139016e-05, - "loss": 0.0427, + "epoch": 6.567651863953661, + "grad_norm": 0.19553795456886292, + "learning_rate": 3.432348136046339e-06, + "loss": 0.0472, "step": 44220 }, { - "epoch": 3.284568542997178, - "grad_norm": 1.6247801780700684, - "learning_rate": 1.0292588742016933e-05, - "loss": 0.0552, + "epoch": 6.569137085994356, + "grad_norm": 0.7017039060592651, + "learning_rate": 3.430862914005644e-06, + "loss": 0.0598, "step": 44230 }, { - "epoch": 3.2853111540175255, - "grad_norm": 1.5469924211502075, - "learning_rate": 1.0288133075894846e-05, - "loss": 0.0543, + "epoch": 6.570622308035051, + "grad_norm": 1.4003336429595947, + "learning_rate": 3.429377691964949e-06, + "loss": 0.0527, "step": 44240 }, { - "epoch": 3.286053765037873, - "grad_norm": 1.353393316268921, - "learning_rate": 1.0283677409772761e-05, - "loss": 0.0592, + "epoch": 6.572107530075746, + "grad_norm": 1.4430428743362427, + "learning_rate": 3.427892469924254e-06, + "loss": 0.0695, "step": 44250 }, { - "epoch": 3.286796376058221, - "grad_norm": 1.046172022819519, - "learning_rate": 1.0279221743650676e-05, - "loss": 0.0504, + "epoch": 6.573592752116442, + "grad_norm": 0.7251480221748352, + "learning_rate": 3.4264072478835588e-06, + "loss": 0.0559, "step": 44260 }, { - "epoch": 3.2875389870785683, - "grad_norm": 0.5917429327964783, - "learning_rate": 1.0274766077528591e-05, - "loss": 0.0429, + "epoch": 6.575077974157137, + "grad_norm": 0.3925531208515167, + "learning_rate": 3.424922025842864e-06, + "loss": 0.0617, "step": 44270 }, { - "epoch": 3.2882815980989157, - "grad_norm": 1.0659432411193848, - "learning_rate": 1.0270310411406506e-05, - "loss": 0.0625, + "epoch": 6.5765631961978315, + "grad_norm": 0.9270767569541931, + "learning_rate": 3.423436803802169e-06, + "loss": 0.0668, "step": 44280 }, { - "epoch": 3.289024209119263, - "grad_norm": 1.2359225749969482, - "learning_rate": 1.0265854745284419e-05, - "loss": 0.0623, + "epoch": 6.578048418238526, + "grad_norm": 0.6193248629570007, + "learning_rate": 3.4219515817614733e-06, + "loss": 0.0668, "step": 44290 }, { - "epoch": 3.2897668201396106, - "grad_norm": 1.3712466955184937, - "learning_rate": 1.0261399079162336e-05, - "loss": 0.0367, + "epoch": 6.579533640279221, + "grad_norm": 1.1179356575012207, + "learning_rate": 3.4204663597207786e-06, + "loss": 0.0491, "step": 44300 }, { - "epoch": 3.2905094311599585, - "grad_norm": 0.6402594447135925, - "learning_rate": 1.025694341304025e-05, - "loss": 0.06, + "epoch": 6.581018862319917, + "grad_norm": 0.7524288296699524, + "learning_rate": 3.4189811376800835e-06, + "loss": 0.0485, "step": 44310 }, { - "epoch": 3.291252042180306, - "grad_norm": 3.1848597526550293, - "learning_rate": 1.0252487746918164e-05, - "loss": 0.0566, + "epoch": 6.582504084360612, + "grad_norm": 0.6350118517875671, + "learning_rate": 3.4174959156393883e-06, + "loss": 0.0575, "step": 44320 }, { - "epoch": 3.2919946532006534, - "grad_norm": 0.769212543964386, - "learning_rate": 1.0248032080796079e-05, - "loss": 0.0579, + "epoch": 6.583989306401307, + "grad_norm": 0.1729784607887268, + "learning_rate": 3.416010693598693e-06, + "loss": 0.0454, "step": 44330 }, { - "epoch": 3.292737264221001, - "grad_norm": 2.3581175804138184, - "learning_rate": 1.0243576414673994e-05, - "loss": 0.0666, + "epoch": 6.585474528442002, + "grad_norm": 0.2924365997314453, + "learning_rate": 3.4145254715579985e-06, + "loss": 0.061, "step": 44340 }, { - "epoch": 3.2934798752413488, - "grad_norm": 0.7175013422966003, - "learning_rate": 1.0239120748551909e-05, - "loss": 0.0463, + "epoch": 6.5869597504826976, + "grad_norm": 0.9202251434326172, + "learning_rate": 3.4130402495173033e-06, + "loss": 0.0491, "step": 44350 }, { - "epoch": 3.2942224862616962, - "grad_norm": 1.026248574256897, - "learning_rate": 1.0234665082429822e-05, - "loss": 0.06, + "epoch": 6.5884449725233925, + "grad_norm": 0.7880986928939819, + "learning_rate": 3.4115550274766078e-06, + "loss": 0.0465, "step": 44360 }, { - "epoch": 3.2949650972820437, - "grad_norm": 1.5957810878753662, - "learning_rate": 1.0230209416307737e-05, - "loss": 0.0462, + "epoch": 6.589930194564087, + "grad_norm": 0.7341892719268799, + "learning_rate": 3.4100698054359126e-06, + "loss": 0.0741, "step": 44370 }, { - "epoch": 3.295707708302391, - "grad_norm": 0.7169394493103027, - "learning_rate": 1.0225753750185654e-05, - "loss": 0.0872, + "epoch": 6.591415416604782, + "grad_norm": 0.5536158084869385, + "learning_rate": 3.408584583395218e-06, + "loss": 0.0548, "step": 44380 }, { - "epoch": 3.2964503193227386, - "grad_norm": 1.4847623109817505, - "learning_rate": 1.0221298084063567e-05, - "loss": 0.0513, + "epoch": 6.592900638645477, + "grad_norm": 0.754804253578186, + "learning_rate": 3.4070993613545227e-06, + "loss": 0.047, "step": 44390 }, { - "epoch": 3.2971929303430865, - "grad_norm": 1.8581526279449463, - "learning_rate": 1.0216842417941482e-05, - "loss": 0.0663, + "epoch": 6.594385860686173, + "grad_norm": 0.522686243057251, + "learning_rate": 3.4056141393138276e-06, + "loss": 0.0683, "step": 44400 }, { - "epoch": 3.297935541363434, - "grad_norm": 0.3895215690135956, - "learning_rate": 1.0212386751819397e-05, - "loss": 0.0684, + "epoch": 6.595871082726868, + "grad_norm": 1.4597195386886597, + "learning_rate": 3.404128917273133e-06, + "loss": 0.0595, "step": 44410 }, { - "epoch": 3.2986781523837814, - "grad_norm": 0.4079320728778839, - "learning_rate": 1.0207931085697312e-05, - "loss": 0.0593, + "epoch": 6.597356304767563, + "grad_norm": 0.75593501329422, + "learning_rate": 3.4026436952324377e-06, + "loss": 0.0628, "step": 44420 }, { - "epoch": 3.299420763404129, - "grad_norm": 1.5425752401351929, - "learning_rate": 1.0203475419575227e-05, - "loss": 0.0927, + "epoch": 6.598841526808258, + "grad_norm": 0.8334012627601624, + "learning_rate": 3.4011584731917426e-06, + "loss": 0.0488, "step": 44430 }, { - "epoch": 3.3001633744244763, - "grad_norm": 1.9693676233291626, - "learning_rate": 1.019901975345314e-05, - "loss": 0.091, + "epoch": 6.6003267488489525, + "grad_norm": 1.003138780593872, + "learning_rate": 3.399673251151047e-06, + "loss": 0.0675, "step": 44440 }, { - "epoch": 3.300905985444824, - "grad_norm": 1.5233045816421509, - "learning_rate": 1.0194564087331057e-05, - "loss": 0.081, + "epoch": 6.601811970889648, + "grad_norm": 0.5932856798171997, + "learning_rate": 3.3981880291103523e-06, + "loss": 0.0432, "step": 44450 }, { - "epoch": 3.3016485964651716, - "grad_norm": 1.4039238691329956, - "learning_rate": 1.019010842120897e-05, - "loss": 0.0609, + "epoch": 6.603297192930343, + "grad_norm": 0.6687909364700317, + "learning_rate": 3.396702807069657e-06, + "loss": 0.068, "step": 44460 }, { - "epoch": 3.302391207485519, - "grad_norm": 1.4866162538528442, - "learning_rate": 1.0185652755086886e-05, - "loss": 0.0607, + "epoch": 6.604782414971038, + "grad_norm": 0.2233189046382904, + "learning_rate": 3.395217585028962e-06, + "loss": 0.0696, "step": 44470 }, { - "epoch": 3.3031338185058665, - "grad_norm": 2.2782821655273438, - "learning_rate": 1.01811970889648e-05, - "loss": 0.0506, + "epoch": 6.606267637011733, + "grad_norm": 1.6143152713775635, + "learning_rate": 3.393732362988267e-06, + "loss": 0.0388, "step": 44480 }, { - "epoch": 3.3038764295262144, - "grad_norm": 1.1079455614089966, - "learning_rate": 1.0176741422842716e-05, - "loss": 0.0785, + "epoch": 6.607752859052429, + "grad_norm": 1.348990797996521, + "learning_rate": 3.392247140947572e-06, + "loss": 0.0639, "step": 44490 }, { - "epoch": 3.304619040546562, - "grad_norm": 2.9163553714752197, - "learning_rate": 1.017228575672063e-05, - "loss": 0.0635, + "epoch": 6.609238081093124, + "grad_norm": 0.905087947845459, + "learning_rate": 3.390761918906877e-06, + "loss": 0.0636, "step": 44500 }, { - "epoch": 3.3053616515669093, - "grad_norm": 2.6138124465942383, - "learning_rate": 1.0167830090598544e-05, - "loss": 0.0539, + "epoch": 6.610723303133819, + "grad_norm": 0.6363906264305115, + "learning_rate": 3.3892766968661815e-06, + "loss": 0.0394, "step": 44510 }, { - "epoch": 3.3061042625872568, - "grad_norm": 1.3911471366882324, - "learning_rate": 1.0163374424476459e-05, - "loss": 0.0717, + "epoch": 6.6122085251745135, + "grad_norm": 0.41277167201042175, + "learning_rate": 3.3877914748254863e-06, + "loss": 0.0618, "step": 44520 }, { - "epoch": 3.306846873607604, - "grad_norm": 1.2493705749511719, - "learning_rate": 1.0158918758354374e-05, - "loss": 0.0833, + "epoch": 6.613693747215208, + "grad_norm": 0.7106294631958008, + "learning_rate": 3.3863062527847916e-06, + "loss": 0.0444, "step": 44530 }, { - "epoch": 3.307589484627952, - "grad_norm": 0.6470643877983093, - "learning_rate": 1.0154463092232289e-05, - "loss": 0.0834, + "epoch": 6.615178969255904, + "grad_norm": 0.21014659106731415, + "learning_rate": 3.3848210307440964e-06, + "loss": 0.0429, "step": 44540 }, { - "epoch": 3.3083320956482996, - "grad_norm": 1.346933126449585, - "learning_rate": 1.0150007426110204e-05, - "loss": 0.0417, + "epoch": 6.616664191296599, + "grad_norm": 1.446640968322754, + "learning_rate": 3.3833358087034013e-06, + "loss": 0.0565, "step": 44550 }, { - "epoch": 3.309074706668647, - "grad_norm": 1.7907460927963257, - "learning_rate": 1.0145551759988119e-05, - "loss": 0.0613, + "epoch": 6.618149413337294, + "grad_norm": 1.2112292051315308, + "learning_rate": 3.3818505866627066e-06, + "loss": 0.0668, "step": 44560 }, { - "epoch": 3.3098173176889945, - "grad_norm": 3.1035265922546387, - "learning_rate": 1.0141096093866034e-05, - "loss": 0.0773, + "epoch": 6.619634635377989, + "grad_norm": 0.5960361957550049, + "learning_rate": 3.3803653646220114e-06, + "loss": 0.0721, "step": 44570 }, { - "epoch": 3.310559928709342, - "grad_norm": 1.8176069259643555, - "learning_rate": 1.0136640427743947e-05, - "loss": 0.0679, + "epoch": 6.621119857418684, + "grad_norm": 0.8905211091041565, + "learning_rate": 3.3788801425813163e-06, + "loss": 0.0709, "step": 44580 }, { - "epoch": 3.31130253972969, - "grad_norm": 1.3476163148880005, - "learning_rate": 1.0132184761621862e-05, - "loss": 0.066, + "epoch": 6.62260507945938, + "grad_norm": 0.4864606559276581, + "learning_rate": 3.3773949205406207e-06, + "loss": 0.0506, "step": 44590 }, { - "epoch": 3.3120451507500372, - "grad_norm": 1.3628586530685425, - "learning_rate": 1.0127729095499779e-05, - "loss": 0.0671, + "epoch": 6.6240903015000745, + "grad_norm": 0.5841920375823975, + "learning_rate": 3.375909698499926e-06, + "loss": 0.0631, "step": 44600 }, { - "epoch": 3.3127877617703847, - "grad_norm": 0.9032784104347229, - "learning_rate": 1.0123273429377692e-05, - "loss": 0.0828, + "epoch": 6.625575523540769, + "grad_norm": 0.9447734355926514, + "learning_rate": 3.374424476459231e-06, + "loss": 0.0508, "step": 44610 }, { - "epoch": 3.313530372790732, - "grad_norm": 1.4668526649475098, - "learning_rate": 1.0118817763255607e-05, - "loss": 0.0669, + "epoch": 6.627060745581464, + "grad_norm": 0.7786805629730225, + "learning_rate": 3.3729392544185357e-06, + "loss": 0.0716, "step": 44620 }, { - "epoch": 3.3142729838110796, - "grad_norm": 0.5808708071708679, - "learning_rate": 1.011436209713352e-05, - "loss": 0.0926, + "epoch": 6.628545967622159, + "grad_norm": 0.5071942210197449, + "learning_rate": 3.3714540323778406e-06, + "loss": 0.0617, "step": 44630 }, { - "epoch": 3.3150155948314275, - "grad_norm": 0.9334034323692322, - "learning_rate": 1.0109906431011437e-05, - "loss": 0.0695, + "epoch": 6.630031189662855, + "grad_norm": 1.5175836086273193, + "learning_rate": 3.369968810337146e-06, + "loss": 0.0564, "step": 44640 }, { - "epoch": 3.315758205851775, - "grad_norm": 2.833603858947754, - "learning_rate": 1.010545076488935e-05, - "loss": 0.0559, + "epoch": 6.63151641170355, + "grad_norm": 0.8158230781555176, + "learning_rate": 3.3684835882964507e-06, + "loss": 0.0646, "step": 44650 }, { - "epoch": 3.3165008168721224, - "grad_norm": 0.9554916620254517, - "learning_rate": 1.0100995098767265e-05, - "loss": 0.0612, + "epoch": 6.633001633744245, + "grad_norm": 1.2170836925506592, + "learning_rate": 3.3669983662557556e-06, + "loss": 0.0498, "step": 44660 }, { - "epoch": 3.31724342789247, - "grad_norm": 2.4863193035125732, - "learning_rate": 1.0096539432645182e-05, - "loss": 0.0585, + "epoch": 6.63448685578494, + "grad_norm": 0.3966522514820099, + "learning_rate": 3.365513144215061e-06, + "loss": 0.0596, "step": 44670 }, { - "epoch": 3.3179860389128173, - "grad_norm": 1.2843139171600342, - "learning_rate": 1.0092083766523095e-05, - "loss": 0.057, + "epoch": 6.635972077825635, + "grad_norm": 1.9576127529144287, + "learning_rate": 3.3640279221743653e-06, + "loss": 0.0552, "step": 44680 }, { - "epoch": 3.318728649933165, - "grad_norm": 3.115098237991333, - "learning_rate": 1.008762810040101e-05, - "loss": 0.062, + "epoch": 6.63745729986633, + "grad_norm": 1.2230498790740967, + "learning_rate": 3.36254270013367e-06, + "loss": 0.0606, "step": 44690 }, { - "epoch": 3.3194712609535126, - "grad_norm": 1.55138099193573, - "learning_rate": 1.0083172434278924e-05, - "loss": 0.0757, + "epoch": 6.638942521907025, + "grad_norm": 1.4444591999053955, + "learning_rate": 3.361057478092975e-06, + "loss": 0.0427, "step": 44700 }, { - "epoch": 3.32021387197386, - "grad_norm": 2.5275778770446777, - "learning_rate": 1.007871676815684e-05, - "loss": 0.0794, + "epoch": 6.64042774394772, + "grad_norm": 1.66185462474823, + "learning_rate": 3.3595722560522803e-06, + "loss": 0.0519, "step": 44710 }, { - "epoch": 3.3209564829942075, - "grad_norm": 2.697035551071167, - "learning_rate": 1.0074261102034755e-05, - "loss": 0.0467, + "epoch": 6.641912965988415, + "grad_norm": 0.7028926610946655, + "learning_rate": 3.358087034011585e-06, + "loss": 0.0565, "step": 44720 }, { - "epoch": 3.321699094014555, - "grad_norm": 0.9199452996253967, - "learning_rate": 1.0069805435912669e-05, - "loss": 0.0822, + "epoch": 6.64339818802911, + "grad_norm": 1.442725658416748, + "learning_rate": 3.35660181197089e-06, + "loss": 0.0613, "step": 44730 }, { - "epoch": 3.322441705034903, - "grad_norm": 2.4956889152526855, - "learning_rate": 1.0065349769790584e-05, - "loss": 0.0608, + "epoch": 6.644883410069806, + "grad_norm": 1.138018250465393, + "learning_rate": 3.3551165899301944e-06, + "loss": 0.0434, "step": 44740 }, { - "epoch": 3.3231843160552503, - "grad_norm": 2.050920248031616, - "learning_rate": 1.0060894103668499e-05, - "loss": 0.0895, + "epoch": 6.646368632110501, + "grad_norm": 1.1926655769348145, + "learning_rate": 3.3536313678895e-06, + "loss": 0.0592, "step": 44750 }, { - "epoch": 3.323926927075598, - "grad_norm": 1.369531273841858, - "learning_rate": 1.0056438437546414e-05, - "loss": 0.0515, + "epoch": 6.647853854151196, + "grad_norm": 0.6647235155105591, + "learning_rate": 3.3521461458488046e-06, + "loss": 0.0579, "step": 44760 }, { - "epoch": 3.3246695380959452, - "grad_norm": 3.23797345161438, - "learning_rate": 1.0051982771424329e-05, - "loss": 0.0678, + "epoch": 6.6493390761918905, + "grad_norm": 0.8624783754348755, + "learning_rate": 3.3506609238081094e-06, + "loss": 0.0679, "step": 44770 }, { - "epoch": 3.3254121491162927, - "grad_norm": 0.7047389149665833, - "learning_rate": 1.0047527105302242e-05, - "loss": 0.0508, + "epoch": 6.650824298232585, + "grad_norm": 0.6453647017478943, + "learning_rate": 3.3491757017674143e-06, + "loss": 0.0566, "step": 44780 }, { - "epoch": 3.3261547601366406, - "grad_norm": 0.7370045781135559, - "learning_rate": 1.0043071439180159e-05, - "loss": 0.0548, + "epoch": 6.652309520273281, + "grad_norm": 1.101426124572754, + "learning_rate": 3.3476904797267196e-06, + "loss": 0.0472, "step": 44790 }, { - "epoch": 3.326897371156988, - "grad_norm": 1.6927924156188965, - "learning_rate": 1.0038615773058072e-05, - "loss": 0.0859, + "epoch": 6.653794742313976, + "grad_norm": 0.6298049688339233, + "learning_rate": 3.3462052576860244e-06, + "loss": 0.065, "step": 44800 }, { - "epoch": 3.3276399821773355, - "grad_norm": 0.9936562180519104, - "learning_rate": 1.0034160106935987e-05, - "loss": 0.0641, + "epoch": 6.655279964354671, + "grad_norm": 0.6735140085220337, + "learning_rate": 3.3447200356453293e-06, + "loss": 0.0458, "step": 44810 }, { - "epoch": 3.328382593197683, - "grad_norm": 1.1975995302200317, - "learning_rate": 1.0029704440813902e-05, - "loss": 0.0758, + "epoch": 6.656765186395366, + "grad_norm": 0.6601940393447876, + "learning_rate": 3.3432348136046346e-06, + "loss": 0.0698, "step": 44820 }, { - "epoch": 3.3291252042180304, - "grad_norm": 0.6930918097496033, - "learning_rate": 1.0025248774691817e-05, - "loss": 0.0674, + "epoch": 6.658250408436061, + "grad_norm": 1.033394694328308, + "learning_rate": 3.341749591563939e-06, + "loss": 0.0547, "step": 44830 }, { - "epoch": 3.3298678152383783, - "grad_norm": 1.631554365158081, - "learning_rate": 1.0020793108569732e-05, - "loss": 0.0528, + "epoch": 6.6597356304767565, + "grad_norm": 0.618804395198822, + "learning_rate": 3.340264369523244e-06, + "loss": 0.0619, "step": 44840 }, { - "epoch": 3.3306104262587257, - "grad_norm": 0.4644923508167267, - "learning_rate": 1.0016337442447645e-05, - "loss": 0.0543, + "epoch": 6.661220852517451, + "grad_norm": 0.6303835511207581, + "learning_rate": 3.3387791474825487e-06, + "loss": 0.0563, "step": 44850 }, { - "epoch": 3.331353037279073, - "grad_norm": 0.8077749013900757, - "learning_rate": 1.0011881776325562e-05, - "loss": 0.0411, + "epoch": 6.662706074558146, + "grad_norm": 1.1331371068954468, + "learning_rate": 3.337293925441854e-06, + "loss": 0.0589, "step": 44860 }, { - "epoch": 3.3320956482994206, - "grad_norm": 0.5882359743118286, - "learning_rate": 1.0007426110203475e-05, - "loss": 0.038, + "epoch": 6.664191296598841, + "grad_norm": 0.8499082326889038, + "learning_rate": 3.335808703401159e-06, + "loss": 0.0524, "step": 44870 }, { - "epoch": 3.332838259319768, - "grad_norm": 2.1609537601470947, - "learning_rate": 1.000297044408139e-05, - "loss": 0.0709, + "epoch": 6.665676518639536, + "grad_norm": 0.5605273246765137, + "learning_rate": 3.3343234813604637e-06, + "loss": 0.055, "step": 44880 }, { - "epoch": 3.333580870340116, - "grad_norm": 2.9716951847076416, - "learning_rate": 9.998514777959305e-06, - "loss": 0.0759, + "epoch": 6.667161740680232, + "grad_norm": 1.4254909753799438, + "learning_rate": 3.332838259319768e-06, + "loss": 0.048, "step": 44890 }, { - "epoch": 3.3343234813604634, - "grad_norm": 1.4051735401153564, - "learning_rate": 9.99405911183722e-06, - "loss": 0.0842, + "epoch": 6.668646962720927, + "grad_norm": 0.7363986968994141, + "learning_rate": 3.331353037279074e-06, + "loss": 0.0599, "step": 44900 }, { - "epoch": 3.335066092380811, - "grad_norm": 1.0370116233825684, - "learning_rate": 9.989603445715135e-06, - "loss": 0.0574, + "epoch": 6.670132184761622, + "grad_norm": 0.7361230254173279, + "learning_rate": 3.3298678152383783e-06, + "loss": 0.0585, "step": 44910 }, { - "epoch": 3.3358087034011583, - "grad_norm": 2.2001404762268066, - "learning_rate": 9.985147779593049e-06, - "loss": 0.0711, + "epoch": 6.671617406802317, + "grad_norm": 1.6775233745574951, + "learning_rate": 3.328382593197683e-06, + "loss": 0.0788, "step": 44920 }, { - "epoch": 3.336551314421506, - "grad_norm": 1.836188554763794, - "learning_rate": 9.980692113470964e-06, - "loss": 0.0672, + "epoch": 6.673102628843012, + "grad_norm": 1.4757988452911377, + "learning_rate": 3.326897371156988e-06, + "loss": 0.062, "step": 44930 }, { - "epoch": 3.3372939254418537, - "grad_norm": 3.0078184604644775, - "learning_rate": 9.976236447348879e-06, - "loss": 0.073, + "epoch": 6.674587850883707, + "grad_norm": 0.7029728889465332, + "learning_rate": 3.3254121491162933e-06, + "loss": 0.0613, "step": 44940 }, { - "epoch": 3.338036536462201, - "grad_norm": 0.9165183305740356, - "learning_rate": 9.971780781226794e-06, - "loss": 0.0768, + "epoch": 6.676073072924402, + "grad_norm": 0.9859765768051147, + "learning_rate": 3.323926927075598e-06, + "loss": 0.0582, "step": 44950 }, { - "epoch": 3.3387791474825486, - "grad_norm": 1.1523520946502686, - "learning_rate": 9.967325115104709e-06, - "loss": 0.0325, + "epoch": 6.677558294965097, + "grad_norm": 0.9241379499435425, + "learning_rate": 3.322441705034903e-06, + "loss": 0.0666, "step": 44960 }, - { - "epoch": 3.339521758502896, - "grad_norm": 0.701426088809967, - "learning_rate": 9.962869448982624e-06, - "loss": 0.0557, + { + "epoch": 6.679043517005792, + "grad_norm": 0.483398973941803, + "learning_rate": 3.3209564829942083e-06, + "loss": 0.069, "step": 44970 }, { - "epoch": 3.340264369523244, - "grad_norm": 0.9896045327186584, - "learning_rate": 9.958413782860539e-06, - "loss": 0.0438, + "epoch": 6.680528739046488, + "grad_norm": 0.4294925928115845, + "learning_rate": 3.319471260953513e-06, + "loss": 0.056, "step": 44980 }, { - "epoch": 3.3410069805435914, - "grad_norm": 1.784203290939331, - "learning_rate": 9.953958116738452e-06, - "loss": 0.0509, + "epoch": 6.682013961087183, + "grad_norm": 0.8365574479103088, + "learning_rate": 3.3179860389128176e-06, + "loss": 0.0571, "step": 44990 }, { - "epoch": 3.341749591563939, - "grad_norm": 1.9945133924484253, - "learning_rate": 9.949502450616367e-06, - "loss": 0.0682, + "epoch": 6.683499183127878, + "grad_norm": 0.9189231395721436, + "learning_rate": 3.3165008168721224e-06, + "loss": 0.0488, "step": 45000 }, { - "epoch": 3.3424922025842863, - "grad_norm": 2.4926345348358154, - "learning_rate": 9.945046784494283e-06, - "loss": 0.0784, + "epoch": 6.6849844051685725, + "grad_norm": 0.796147882938385, + "learning_rate": 3.3150155948314277e-06, + "loss": 0.0497, "step": 45010 }, { - "epoch": 3.3432348136046337, - "grad_norm": 1.1978400945663452, - "learning_rate": 9.940591118372197e-06, - "loss": 0.0644, + "epoch": 6.686469627209267, + "grad_norm": 0.5429372787475586, + "learning_rate": 3.3135303727907325e-06, + "loss": 0.0601, "step": 45020 }, { - "epoch": 3.3439774246249816, - "grad_norm": 0.44274619221687317, - "learning_rate": 9.936135452250112e-06, - "loss": 0.0558, + "epoch": 6.687954849249963, + "grad_norm": 1.0063883066177368, + "learning_rate": 3.3120451507500374e-06, + "loss": 0.0622, "step": 45030 }, { - "epoch": 3.344720035645329, - "grad_norm": 0.9496433138847351, - "learning_rate": 9.931679786128025e-06, - "loss": 0.0406, + "epoch": 6.689440071290658, + "grad_norm": 1.2463853359222412, + "learning_rate": 3.3105599287093423e-06, + "loss": 0.0583, "step": 45040 }, { - "epoch": 3.3454626466656765, - "grad_norm": 0.4227916896343231, - "learning_rate": 9.927224120005942e-06, - "loss": 0.0446, + "epoch": 6.690925293331353, + "grad_norm": 1.391730785369873, + "learning_rate": 3.3090747066686475e-06, + "loss": 0.0651, "step": 45050 }, { - "epoch": 3.346205257686024, - "grad_norm": 1.6861997842788696, - "learning_rate": 9.922768453883855e-06, - "loss": 0.0576, + "epoch": 6.692410515372048, + "grad_norm": 0.46899959444999695, + "learning_rate": 3.307589484627952e-06, + "loss": 0.057, "step": 45060 }, { - "epoch": 3.346947868706372, - "grad_norm": 2.8230645656585693, - "learning_rate": 9.91831278776177e-06, - "loss": 0.055, + "epoch": 6.693895737412744, + "grad_norm": 0.9016135931015015, + "learning_rate": 3.306104262587257e-06, + "loss": 0.0479, "step": 45070 }, { - "epoch": 3.3476904797267193, - "grad_norm": 1.0160224437713623, - "learning_rate": 9.913857121639687e-06, - "loss": 0.09, + "epoch": 6.695380959453439, + "grad_norm": 0.6811698079109192, + "learning_rate": 3.304619040546562e-06, + "loss": 0.0446, "step": 45080 }, { - "epoch": 3.3484330907470667, - "grad_norm": 2.2444396018981934, - "learning_rate": 9.9094014555176e-06, - "loss": 0.0685, + "epoch": 6.6968661814941335, + "grad_norm": 0.9126334190368652, + "learning_rate": 3.303133818505867e-06, + "loss": 0.068, "step": 45090 }, { - "epoch": 3.349175701767414, - "grad_norm": 1.4811400175094604, - "learning_rate": 9.904945789395515e-06, - "loss": 0.0576, + "epoch": 6.698351403534828, + "grad_norm": 1.4983772039413452, + "learning_rate": 3.301648596465172e-06, + "loss": 0.0551, "step": 45100 }, { - "epoch": 3.3499183127877616, - "grad_norm": 2.1645710468292236, - "learning_rate": 9.900490123273428e-06, - "loss": 0.0926, + "epoch": 6.699836625575523, + "grad_norm": 0.8648777604103088, + "learning_rate": 3.3001633744244767e-06, + "loss": 0.0319, "step": 45110 }, { - "epoch": 3.3506609238081095, - "grad_norm": 2.3082311153411865, - "learning_rate": 9.896034457151345e-06, - "loss": 0.0536, + "epoch": 6.701321847616219, + "grad_norm": 0.8801341652870178, + "learning_rate": 3.298678152383782e-06, + "loss": 0.0612, "step": 45120 }, { - "epoch": 3.351403534828457, - "grad_norm": 1.518615961074829, - "learning_rate": 9.89157879102926e-06, - "loss": 0.0877, + "epoch": 6.702807069656914, + "grad_norm": 0.8212476968765259, + "learning_rate": 3.297192930343087e-06, + "loss": 0.0661, "step": 45130 }, { - "epoch": 3.3521461458488044, - "grad_norm": 0.9290609955787659, - "learning_rate": 9.887123124907173e-06, - "loss": 0.0549, + "epoch": 6.704292291697609, + "grad_norm": 1.3855417966842651, + "learning_rate": 3.2957077083023913e-06, + "loss": 0.0626, "step": 45140 }, { - "epoch": 3.352888756869152, - "grad_norm": 2.0847578048706055, - "learning_rate": 9.882667458785088e-06, - "loss": 0.0656, + "epoch": 6.705777513738304, + "grad_norm": 1.2375926971435547, + "learning_rate": 3.294222486261696e-06, + "loss": 0.0466, "step": 45150 }, { - "epoch": 3.3536313678894993, - "grad_norm": 2.6729955673217773, - "learning_rate": 9.878211792663003e-06, - "loss": 0.0772, + "epoch": 6.707262735778999, + "grad_norm": 1.4005922079086304, + "learning_rate": 3.2927372642210014e-06, + "loss": 0.0486, "step": 45160 }, { - "epoch": 3.3543739789098472, - "grad_norm": 2.263134241104126, - "learning_rate": 9.873756126540918e-06, - "loss": 0.0689, + "epoch": 6.7087479578196945, + "grad_norm": 0.3769647777080536, + "learning_rate": 3.2912520421803062e-06, + "loss": 0.0463, "step": 45170 }, { - "epoch": 3.3551165899301947, - "grad_norm": 0.8726534247398376, - "learning_rate": 9.869300460418833e-06, - "loss": 0.0652, + "epoch": 6.710233179860389, + "grad_norm": 1.2366719245910645, + "learning_rate": 3.289766820139611e-06, + "loss": 0.0454, "step": 45180 }, { - "epoch": 3.355859200950542, - "grad_norm": 1.9389985799789429, - "learning_rate": 9.864844794296747e-06, - "loss": 0.0787, + "epoch": 6.711718401901084, + "grad_norm": 0.6258324980735779, + "learning_rate": 3.288281598098916e-06, + "loss": 0.0584, "step": 45190 }, { - "epoch": 3.3566018119708896, - "grad_norm": 2.6896302700042725, - "learning_rate": 9.860389128174663e-06, - "loss": 0.0622, + "epoch": 6.713203623941779, + "grad_norm": 0.7826153635978699, + "learning_rate": 3.2867963760582212e-06, + "loss": 0.0534, "step": 45200 }, { - "epoch": 3.357344422991237, - "grad_norm": 1.6283527612686157, - "learning_rate": 9.855933462052577e-06, - "loss": 0.052, + "epoch": 6.714688845982474, + "grad_norm": 0.854837954044342, + "learning_rate": 3.2853111540175257e-06, + "loss": 0.0491, "step": 45210 }, { - "epoch": 3.358087034011585, - "grad_norm": 1.3546130657196045, - "learning_rate": 9.851477795930492e-06, - "loss": 0.0797, + "epoch": 6.71617406802317, + "grad_norm": 0.3340891897678375, + "learning_rate": 3.2838259319768305e-06, + "loss": 0.0765, "step": 45220 }, { - "epoch": 3.3588296450319324, - "grad_norm": 1.457862377166748, - "learning_rate": 9.847022129808407e-06, - "loss": 0.0727, + "epoch": 6.717659290063865, + "grad_norm": 0.7108610272407532, + "learning_rate": 3.282340709936136e-06, + "loss": 0.0465, "step": 45230 }, { - "epoch": 3.35957225605228, - "grad_norm": 0.8017680644989014, - "learning_rate": 9.842566463686322e-06, - "loss": 0.0362, + "epoch": 6.71914451210456, + "grad_norm": 1.0852887630462646, + "learning_rate": 3.2808554878954407e-06, + "loss": 0.0535, "step": 45240 }, { - "epoch": 3.3603148670726273, - "grad_norm": 1.1099777221679688, - "learning_rate": 9.838110797564237e-06, - "loss": 0.0549, + "epoch": 6.7206297341452546, + "grad_norm": 0.7470007538795471, + "learning_rate": 3.2793702658547455e-06, + "loss": 0.0561, "step": 45250 }, { - "epoch": 3.3610574780929747, - "grad_norm": 2.5298869609832764, - "learning_rate": 9.83365513144215e-06, - "loss": 0.0445, + "epoch": 6.7221149561859495, + "grad_norm": 0.2710890471935272, + "learning_rate": 3.2778850438140504e-06, + "loss": 0.0507, "step": 45260 }, { - "epoch": 3.3618000891133226, - "grad_norm": 0.5401008725166321, - "learning_rate": 9.829199465320067e-06, - "loss": 0.0526, + "epoch": 6.723600178226645, + "grad_norm": 0.7607895731925964, + "learning_rate": 3.2763998217733557e-06, + "loss": 0.0411, "step": 45270 }, { - "epoch": 3.36254270013367, - "grad_norm": 1.3315315246582031, - "learning_rate": 9.82474379919798e-06, - "loss": 0.0675, + "epoch": 6.72508540026734, + "grad_norm": 1.0981948375701904, + "learning_rate": 3.2749145997326605e-06, + "loss": 0.0525, "step": 45280 }, { - "epoch": 3.3632853111540175, - "grad_norm": 0.9110653400421143, - "learning_rate": 9.820288133075895e-06, - "loss": 0.0365, + "epoch": 6.726570622308035, + "grad_norm": 0.8234240412712097, + "learning_rate": 3.273429377691965e-06, + "loss": 0.065, "step": 45290 }, { - "epoch": 3.364027922174365, - "grad_norm": 1.0673272609710693, - "learning_rate": 9.81583246695381e-06, - "loss": 0.0603, + "epoch": 6.72805584434873, + "grad_norm": 0.6028541922569275, + "learning_rate": 3.27194415565127e-06, + "loss": 0.0472, "step": 45300 }, { - "epoch": 3.3647705331947124, - "grad_norm": 1.7336030006408691, - "learning_rate": 9.811376800831725e-06, - "loss": 0.0634, + "epoch": 6.729541066389425, + "grad_norm": 1.2868266105651855, + "learning_rate": 3.270458933610575e-06, + "loss": 0.0375, "step": 45310 }, { - "epoch": 3.3655131442150603, - "grad_norm": 0.638027548789978, - "learning_rate": 9.80692113470964e-06, - "loss": 0.041, + "epoch": 6.731026288430121, + "grad_norm": 1.1159216165542603, + "learning_rate": 3.26897371156988e-06, + "loss": 0.064, "step": 45320 }, { - "epoch": 3.3662557552354078, - "grad_norm": 0.6306934356689453, - "learning_rate": 9.802465468587553e-06, - "loss": 0.092, + "epoch": 6.7325115104708155, + "grad_norm": 1.5056071281433105, + "learning_rate": 3.267488489529185e-06, + "loss": 0.0546, "step": 45330 }, { - "epoch": 3.366998366255755, - "grad_norm": 1.2354300022125244, - "learning_rate": 9.79800980246547e-06, - "loss": 0.0729, + "epoch": 6.73399673251151, + "grad_norm": 2.2418577671051025, + "learning_rate": 3.26600326748849e-06, + "loss": 0.045, "step": 45340 }, { - "epoch": 3.3677409772761027, - "grad_norm": 0.46772605180740356, - "learning_rate": 9.793554136343383e-06, - "loss": 0.039, + "epoch": 6.735481954552205, + "grad_norm": 0.6371803283691406, + "learning_rate": 3.264518045447795e-06, + "loss": 0.0709, "step": 45350 }, { - "epoch": 3.36848358829645, - "grad_norm": 0.28647175431251526, - "learning_rate": 9.789098470221298e-06, - "loss": 0.0544, + "epoch": 6.7369671765929, + "grad_norm": 0.7394696474075317, + "learning_rate": 3.2630328234071e-06, + "loss": 0.0668, "step": 45360 }, { - "epoch": 3.369226199316798, - "grad_norm": 0.0804813876748085, - "learning_rate": 9.784642804099213e-06, - "loss": 0.0483, + "epoch": 6.738452398633596, + "grad_norm": 0.636842668056488, + "learning_rate": 3.2615476013664042e-06, + "loss": 0.0461, "step": 45370 }, { - "epoch": 3.3699688103371455, - "grad_norm": 2.941643476486206, - "learning_rate": 9.780187137977128e-06, - "loss": 0.0728, + "epoch": 6.739937620674291, + "grad_norm": 0.5398241281509399, + "learning_rate": 3.2600623793257095e-06, + "loss": 0.0529, "step": 45380 }, { - "epoch": 3.370711421357493, - "grad_norm": 1.7482622861862183, - "learning_rate": 9.775731471855043e-06, - "loss": 0.0857, + "epoch": 6.741422842714986, + "grad_norm": 1.4932724237442017, + "learning_rate": 3.2585771572850144e-06, + "loss": 0.0532, "step": 45390 }, { - "epoch": 3.3714540323778404, - "grad_norm": 3.30426025390625, - "learning_rate": 9.771275805732956e-06, - "loss": 0.0671, + "epoch": 6.742908064755681, + "grad_norm": 0.7000812292098999, + "learning_rate": 3.2570919352443192e-06, + "loss": 0.0541, "step": 45400 }, { - "epoch": 3.372196643398188, - "grad_norm": 1.9652279615402222, - "learning_rate": 9.766820139610871e-06, - "loss": 0.0693, + "epoch": 6.744393286796376, + "grad_norm": 0.3533134460449219, + "learning_rate": 3.255606713203624e-06, + "loss": 0.0496, "step": 45410 }, { - "epoch": 3.3729392544185357, - "grad_norm": 0.7707439064979553, - "learning_rate": 9.762364473488788e-06, - "loss": 0.0572, + "epoch": 6.745878508837071, + "grad_norm": 1.3116090297698975, + "learning_rate": 3.2541214911629294e-06, + "loss": 0.058, "step": 45420 }, { - "epoch": 3.373681865438883, - "grad_norm": 0.6071941256523132, - "learning_rate": 9.757908807366701e-06, - "loss": 0.0451, + "epoch": 6.747363730877766, + "grad_norm": 1.1693205833435059, + "learning_rate": 3.2526362691222342e-06, + "loss": 0.0679, "step": 45430 }, { - "epoch": 3.3744244764592306, - "grad_norm": 1.8582054376602173, - "learning_rate": 9.753453141244616e-06, - "loss": 0.0391, + "epoch": 6.748848952918461, + "grad_norm": 0.7300345301628113, + "learning_rate": 3.2511510470815387e-06, + "loss": 0.052, "step": 45440 }, { - "epoch": 3.375167087479578, - "grad_norm": 1.5152584314346313, - "learning_rate": 9.74899747512253e-06, - "loss": 0.0941, + "epoch": 6.750334174959156, + "grad_norm": 1.4570856094360352, + "learning_rate": 3.2496658250408435e-06, + "loss": 0.0573, "step": 45450 }, { - "epoch": 3.3759096984999255, - "grad_norm": 0.9906954169273376, - "learning_rate": 9.744541809000446e-06, + "epoch": 6.751819396999851, + "grad_norm": 0.6028823852539062, + "learning_rate": 3.248180603000149e-06, "loss": 0.0514, "step": 45460 }, { - "epoch": 3.3766523095202734, - "grad_norm": 1.4278010129928589, - "learning_rate": 9.740086142878361e-06, - "loss": 0.0492, + "epoch": 6.753304619040547, + "grad_norm": 0.6922430396080017, + "learning_rate": 3.2466953809594536e-06, + "loss": 0.0457, "step": 45470 }, { - "epoch": 3.377394920540621, - "grad_norm": 2.468226671218872, - "learning_rate": 9.735630476756275e-06, - "loss": 0.0692, + "epoch": 6.754789841081242, + "grad_norm": 1.0826843976974487, + "learning_rate": 3.2452101589187585e-06, + "loss": 0.0374, "step": 45480 }, { - "epoch": 3.3781375315609683, - "grad_norm": 0.5829160809516907, - "learning_rate": 9.731174810634191e-06, - "loss": 0.0661, + "epoch": 6.756275063121937, + "grad_norm": 0.9226403832435608, + "learning_rate": 3.2437249368780638e-06, + "loss": 0.075, "step": 45490 }, { - "epoch": 3.3788801425813157, - "grad_norm": 1.5764065980911255, - "learning_rate": 9.726719144512105e-06, - "loss": 0.0695, + "epoch": 6.7577602851626315, + "grad_norm": 1.2400094270706177, + "learning_rate": 3.2422397148373686e-06, + "loss": 0.0735, "step": 45500 }, { - "epoch": 3.3796227536016636, - "grad_norm": 0.9860460162162781, - "learning_rate": 9.72226347839002e-06, - "loss": 0.0474, + "epoch": 6.759245507203327, + "grad_norm": 0.8410294055938721, + "learning_rate": 3.2407544927966735e-06, + "loss": 0.0627, "step": 45510 }, { - "epoch": 3.380365364622011, - "grad_norm": 1.8442999124526978, - "learning_rate": 9.717807812267933e-06, - "loss": 0.1209, + "epoch": 6.760730729244022, + "grad_norm": 0.6011391878128052, + "learning_rate": 3.239269270755978e-06, + "loss": 0.0453, "step": 45520 }, { - "epoch": 3.3811079756423585, - "grad_norm": 3.0390632152557373, - "learning_rate": 9.71335214614585e-06, - "loss": 0.0604, + "epoch": 6.762215951284717, + "grad_norm": 1.1391057968139648, + "learning_rate": 3.237784048715283e-06, + "loss": 0.068, "step": 45530 }, { - "epoch": 3.381850586662706, - "grad_norm": 1.5547683238983154, - "learning_rate": 9.708896480023765e-06, - "loss": 0.093, + "epoch": 6.763701173325412, + "grad_norm": 0.9182172417640686, + "learning_rate": 3.236298826674588e-06, + "loss": 0.062, "step": 45540 }, { - "epoch": 3.3825931976830534, - "grad_norm": 2.504794120788574, - "learning_rate": 9.704440813901678e-06, - "loss": 0.0385, + "epoch": 6.765186395366107, + "grad_norm": 0.744276225566864, + "learning_rate": 3.234813604633893e-06, + "loss": 0.0592, "step": 45550 }, { - "epoch": 3.3833358087034013, - "grad_norm": 2.069444417953491, - "learning_rate": 9.699985147779593e-06, - "loss": 0.0766, + "epoch": 6.766671617406803, + "grad_norm": 0.7632409930229187, + "learning_rate": 3.2333283825931978e-06, + "loss": 0.0542, "step": 45560 }, { - "epoch": 3.384078419723749, - "grad_norm": 1.7017885446548462, - "learning_rate": 9.695529481657508e-06, - "loss": 0.066, + "epoch": 6.768156839447498, + "grad_norm": 1.5479755401611328, + "learning_rate": 3.231843160552503e-06, + "loss": 0.075, "step": 45570 }, { - "epoch": 3.3848210307440962, - "grad_norm": 1.3779213428497314, - "learning_rate": 9.691073815535423e-06, - "loss": 0.0581, + "epoch": 6.7696420614881925, + "grad_norm": 0.5727285742759705, + "learning_rate": 3.230357938511808e-06, + "loss": 0.0546, "step": 45580 }, { - "epoch": 3.3855636417644437, - "grad_norm": 2.676387071609497, - "learning_rate": 9.686618149413338e-06, - "loss": 0.0898, + "epoch": 6.771127283528887, + "grad_norm": 1.4612948894500732, + "learning_rate": 3.2288727164711124e-06, + "loss": 0.0482, "step": 45590 }, { - "epoch": 3.386306252784791, - "grad_norm": 1.7272534370422363, - "learning_rate": 9.682162483291253e-06, - "loss": 0.0493, + "epoch": 6.772612505569582, + "grad_norm": 1.1288641691207886, + "learning_rate": 3.227387494430418e-06, + "loss": 0.0623, "step": 45600 }, { - "epoch": 3.387048863805139, - "grad_norm": 0.43985655903816223, - "learning_rate": 9.677706817169168e-06, - "loss": 0.0536, + "epoch": 6.774097727610278, + "grad_norm": 0.662311315536499, + "learning_rate": 3.2259022723897225e-06, + "loss": 0.0448, "step": 45610 }, { - "epoch": 3.3877914748254865, - "grad_norm": 2.7680649757385254, - "learning_rate": 9.673251151047081e-06, - "loss": 0.0442, + "epoch": 6.775582949650973, + "grad_norm": 1.3300342559814453, + "learning_rate": 3.2244170503490273e-06, + "loss": 0.0463, "step": 45620 }, { - "epoch": 3.388534085845834, - "grad_norm": 2.375917911529541, - "learning_rate": 9.668795484924996e-06, - "loss": 0.0681, + "epoch": 6.777068171691668, + "grad_norm": 0.4563080966472626, + "learning_rate": 3.222931828308332e-06, + "loss": 0.0432, "step": 45630 }, { - "epoch": 3.3892766968661814, - "grad_norm": 1.5520646572113037, - "learning_rate": 9.664339818802911e-06, - "loss": 0.0533, + "epoch": 6.778553393732363, + "grad_norm": 0.7089258432388306, + "learning_rate": 3.2214466062676375e-06, + "loss": 0.0524, "step": 45640 }, { - "epoch": 3.3900193078865293, - "grad_norm": 0.7106296420097351, - "learning_rate": 9.659884152680826e-06, - "loss": 0.0527, + "epoch": 6.7800386157730586, + "grad_norm": 0.6759585738182068, + "learning_rate": 3.2199613842269423e-06, + "loss": 0.051, "step": 45650 }, { - "epoch": 3.3907619189068767, - "grad_norm": 1.5558034181594849, - "learning_rate": 9.655428486558741e-06, - "loss": 0.0664, + "epoch": 6.7815238378137535, + "grad_norm": 0.4059349000453949, + "learning_rate": 3.218476162186247e-06, + "loss": 0.073, "step": 45660 }, { - "epoch": 3.391504529927224, - "grad_norm": 2.0336802005767822, - "learning_rate": 9.650972820436654e-06, - "loss": 0.0744, + "epoch": 6.783009059854448, + "grad_norm": 1.0772613286972046, + "learning_rate": 3.2169909401455516e-06, + "loss": 0.0635, "step": 45670 }, { - "epoch": 3.3922471409475716, - "grad_norm": 2.6379876136779785, - "learning_rate": 9.646517154314571e-06, - "loss": 0.0752, + "epoch": 6.784494281895143, + "grad_norm": 0.5796312093734741, + "learning_rate": 3.215505718104857e-06, + "loss": 0.0508, "step": 45680 }, { - "epoch": 3.392989751967919, - "grad_norm": 1.7118732929229736, - "learning_rate": 9.642061488192484e-06, - "loss": 0.0775, + "epoch": 6.785979503935838, + "grad_norm": 0.342790812253952, + "learning_rate": 3.2140204960641618e-06, + "loss": 0.0686, "step": 45690 }, { - "epoch": 3.393732362988267, - "grad_norm": 0.5552663803100586, - "learning_rate": 9.6376058220704e-06, - "loss": 0.0601, + "epoch": 6.787464725976534, + "grad_norm": 0.28183823823928833, + "learning_rate": 3.2125352740234666e-06, + "loss": 0.0377, "step": 45700 }, { - "epoch": 3.3944749740086144, - "grad_norm": 1.9017750024795532, - "learning_rate": 9.633150155948314e-06, - "loss": 0.0687, + "epoch": 6.788949948017229, + "grad_norm": 1.015336513519287, + "learning_rate": 3.2110500519827715e-06, + "loss": 0.0651, "step": 45710 }, { - "epoch": 3.395217585028962, - "grad_norm": 0.6380416750907898, - "learning_rate": 9.62869448982623e-06, - "loss": 0.0657, + "epoch": 6.790435170057924, + "grad_norm": 0.5184191465377808, + "learning_rate": 3.2095648299420768e-06, + "loss": 0.0566, "step": 45720 }, { - "epoch": 3.3959601960493093, - "grad_norm": 0.7644681930541992, - "learning_rate": 9.624238823704144e-06, - "loss": 0.0563, + "epoch": 6.791920392098619, + "grad_norm": 1.2390819787979126, + "learning_rate": 3.2080796079013816e-06, + "loss": 0.0634, "step": 45730 }, { - "epoch": 3.3967028070696568, - "grad_norm": 3.4050753116607666, - "learning_rate": 9.619783157582058e-06, - "loss": 0.0557, + "epoch": 6.7934056141393135, + "grad_norm": 1.3153749704360962, + "learning_rate": 3.2065943858606865e-06, + "loss": 0.0733, "step": 45740 }, { - "epoch": 3.3974454180900047, - "grad_norm": 2.2620880603790283, - "learning_rate": 9.615327491459974e-06, - "loss": 0.0667, + "epoch": 6.794890836180009, + "grad_norm": 0.7628545165061951, + "learning_rate": 3.2051091638199918e-06, + "loss": 0.0642, "step": 45750 }, { - "epoch": 3.398188029110352, - "grad_norm": 1.442107915878296, - "learning_rate": 9.610871825337888e-06, - "loss": 0.0644, + "epoch": 6.796376058220704, + "grad_norm": 0.3626285493373871, + "learning_rate": 3.203623941779296e-06, + "loss": 0.0683, "step": 45760 }, { - "epoch": 3.3989306401306996, - "grad_norm": 0.9981054067611694, - "learning_rate": 9.606416159215803e-06, - "loss": 0.0412, + "epoch": 6.797861280261399, + "grad_norm": 1.9119198322296143, + "learning_rate": 3.202138719738601e-06, + "loss": 0.061, "step": 45770 }, { - "epoch": 3.399673251151047, - "grad_norm": 0.9032704830169678, - "learning_rate": 9.601960493093718e-06, - "loss": 0.0779, + "epoch": 6.799346502302094, + "grad_norm": 3.3886430263519287, + "learning_rate": 3.200653497697906e-06, + "loss": 0.0658, "step": 45780 }, { - "epoch": 3.4004158621713945, - "grad_norm": 1.9194782972335815, - "learning_rate": 9.597504826971633e-06, - "loss": 0.0679, + "epoch": 6.800831724342789, + "grad_norm": 0.7113927006721497, + "learning_rate": 3.199168275657211e-06, + "loss": 0.0538, "step": 45790 }, { - "epoch": 3.4011584731917424, - "grad_norm": 2.2495553493499756, - "learning_rate": 9.593049160849548e-06, - "loss": 0.0495, + "epoch": 6.802316946383485, + "grad_norm": 0.2835240364074707, + "learning_rate": 3.197683053616516e-06, + "loss": 0.0624, "step": 45800 }, { - "epoch": 3.40190108421209, - "grad_norm": 0.7401419878005981, - "learning_rate": 9.588593494727461e-06, - "loss": 0.0372, + "epoch": 6.80380216842418, + "grad_norm": 1.0122696161270142, + "learning_rate": 3.196197831575821e-06, + "loss": 0.0578, "step": 45810 }, { - "epoch": 3.4026436952324373, - "grad_norm": 2.4702985286712646, - "learning_rate": 9.584137828605376e-06, - "loss": 0.0629, + "epoch": 6.8052873904648745, + "grad_norm": 0.16555657982826233, + "learning_rate": 3.1947126095351253e-06, + "loss": 0.0491, "step": 45820 }, { - "epoch": 3.4033863062527847, - "grad_norm": 1.2022415399551392, - "learning_rate": 9.579682162483293e-06, - "loss": 0.0573, + "epoch": 6.806772612505569, + "grad_norm": 1.048108696937561, + "learning_rate": 3.193227387494431e-06, + "loss": 0.0622, "step": 45830 }, { - "epoch": 3.404128917273132, - "grad_norm": 1.8239872455596924, - "learning_rate": 9.575226496361206e-06, - "loss": 0.0513, + "epoch": 6.808257834546264, + "grad_norm": 0.842766523361206, + "learning_rate": 3.1917421654537355e-06, + "loss": 0.0464, "step": 45840 }, { - "epoch": 3.40487152829348, - "grad_norm": 2.117016553878784, - "learning_rate": 9.570770830239121e-06, - "loss": 0.0671, + "epoch": 6.80974305658696, + "grad_norm": 0.6435438990592957, + "learning_rate": 3.1902569434130403e-06, + "loss": 0.0426, "step": 45850 }, { - "epoch": 3.4056141393138275, - "grad_norm": 1.195753574371338, - "learning_rate": 9.566315164117036e-06, - "loss": 0.0544, + "epoch": 6.811228278627655, + "grad_norm": 0.6203869581222534, + "learning_rate": 3.1887717213723456e-06, + "loss": 0.0484, "step": 45860 }, { - "epoch": 3.406356750334175, - "grad_norm": 0.719028890132904, - "learning_rate": 9.561859497994951e-06, - "loss": 0.058, + "epoch": 6.81271350066835, + "grad_norm": 0.9314228296279907, + "learning_rate": 3.1872864993316505e-06, + "loss": 0.0581, "step": 45870 }, { - "epoch": 3.4070993613545224, - "grad_norm": 0.7662678360939026, - "learning_rate": 9.557403831872866e-06, - "loss": 0.06, + "epoch": 6.814198722709045, + "grad_norm": 0.7096196413040161, + "learning_rate": 3.1858012772909553e-06, + "loss": 0.0488, "step": 45880 }, { - "epoch": 3.40784197237487, - "grad_norm": 1.3117008209228516, - "learning_rate": 9.55294816575078e-06, - "loss": 0.0367, + "epoch": 6.81568394474974, + "grad_norm": 0.7279118299484253, + "learning_rate": 3.18431605525026e-06, + "loss": 0.0684, "step": 45890 }, { - "epoch": 3.4085845833952177, - "grad_norm": 0.3628579080104828, - "learning_rate": 9.548492499628696e-06, - "loss": 0.0582, + "epoch": 6.8171691667904355, + "grad_norm": 0.3500811755657196, + "learning_rate": 3.1828308332095655e-06, + "loss": 0.0511, "step": 45900 }, { - "epoch": 3.409327194415565, - "grad_norm": 3.9150137901306152, - "learning_rate": 9.54403683350661e-06, - "loss": 0.0684, + "epoch": 6.81865438883113, + "grad_norm": 2.1978440284729004, + "learning_rate": 3.18134561116887e-06, + "loss": 0.0594, "step": 45910 }, { - "epoch": 3.4100698054359126, - "grad_norm": 1.5000407695770264, - "learning_rate": 9.539581167384524e-06, - "loss": 0.0871, + "epoch": 6.820139610871825, + "grad_norm": 0.5409901738166809, + "learning_rate": 3.1798603891281747e-06, + "loss": 0.0479, "step": 45920 }, { - "epoch": 3.41081241645626, - "grad_norm": 1.804474115371704, - "learning_rate": 9.535125501262438e-06, - "loss": 0.0469, + "epoch": 6.82162483291252, + "grad_norm": 1.1827361583709717, + "learning_rate": 3.1783751670874796e-06, + "loss": 0.0754, "step": 45930 }, { - "epoch": 3.4115550274766075, - "grad_norm": 1.7255808115005493, - "learning_rate": 9.530669835140354e-06, - "loss": 0.0358, + "epoch": 6.823110054953215, + "grad_norm": 1.3332449197769165, + "learning_rate": 3.176889945046785e-06, + "loss": 0.0687, "step": 45940 }, { - "epoch": 3.4122976384969554, - "grad_norm": 1.890074372291565, - "learning_rate": 9.52621416901827e-06, - "loss": 0.0541, + "epoch": 6.824595276993911, + "grad_norm": 1.1483596563339233, + "learning_rate": 3.1754047230060897e-06, + "loss": 0.0816, "step": 45950 }, { - "epoch": 3.413040249517303, - "grad_norm": 1.5225468873977661, - "learning_rate": 9.521758502896183e-06, - "loss": 0.074, + "epoch": 6.826080499034606, + "grad_norm": 0.9273011684417725, + "learning_rate": 3.1739195009653946e-06, + "loss": 0.0577, "step": 45960 }, { - "epoch": 3.4137828605376503, - "grad_norm": 1.2024898529052734, - "learning_rate": 9.517302836774098e-06, - "loss": 0.0931, + "epoch": 6.827565721075301, + "grad_norm": 1.3494038581848145, + "learning_rate": 3.172434278924699e-06, + "loss": 0.069, "step": 45970 }, { - "epoch": 3.414525471557998, - "grad_norm": 5.313319683074951, - "learning_rate": 9.512847170652013e-06, - "loss": 0.0663, + "epoch": 6.829050943115996, + "grad_norm": 1.0662420988082886, + "learning_rate": 3.1709490568840047e-06, + "loss": 0.0494, "step": 45980 }, { - "epoch": 3.4152680825783452, - "grad_norm": 1.3511769771575928, - "learning_rate": 9.508391504529928e-06, - "loss": 0.0887, + "epoch": 6.8305361651566905, + "grad_norm": 0.4860890507698059, + "learning_rate": 3.169463834843309e-06, + "loss": 0.0428, "step": 45990 }, { - "epoch": 3.416010693598693, - "grad_norm": 2.674060344696045, - "learning_rate": 9.503935838407843e-06, - "loss": 0.0518, + "epoch": 6.832021387197386, + "grad_norm": 0.5755127668380737, + "learning_rate": 3.167978612802614e-06, + "loss": 0.0485, "step": 46000 }, { - "epoch": 3.4167533046190406, - "grad_norm": 0.3742149770259857, - "learning_rate": 9.499480172285757e-06, - "loss": 0.0483, + "epoch": 6.833506609238081, + "grad_norm": 1.821703553199768, + "learning_rate": 3.1664933907619193e-06, + "loss": 0.0745, "step": 46010 }, { - "epoch": 3.417495915639388, - "grad_norm": 0.4566430151462555, - "learning_rate": 9.495024506163672e-06, - "loss": 0.0641, + "epoch": 6.834991831278776, + "grad_norm": 1.4857114553451538, + "learning_rate": 3.165008168721224e-06, + "loss": 0.0549, "step": 46020 }, { - "epoch": 3.4182385266597355, - "grad_norm": 1.1455416679382324, - "learning_rate": 9.490568840041586e-06, - "loss": 0.0723, + "epoch": 6.836477053319471, + "grad_norm": 0.24308165907859802, + "learning_rate": 3.163522946680529e-06, + "loss": 0.0573, "step": 46030 }, { - "epoch": 3.418981137680083, - "grad_norm": 0.7190825343132019, - "learning_rate": 9.4861131739195e-06, - "loss": 0.0622, + "epoch": 6.837962275360166, + "grad_norm": 0.7036371231079102, + "learning_rate": 3.162037724639834e-06, + "loss": 0.0492, "step": 46040 }, { - "epoch": 3.419723748700431, - "grad_norm": 0.6445209383964539, - "learning_rate": 9.481657507797416e-06, - "loss": 0.0357, + "epoch": 6.839447497400862, + "grad_norm": 1.0023090839385986, + "learning_rate": 3.160552502599139e-06, + "loss": 0.0555, "step": 46050 }, { - "epoch": 3.4204663597207783, - "grad_norm": 2.212388038635254, - "learning_rate": 9.47720184167533e-06, - "loss": 0.0446, + "epoch": 6.840932719441557, + "grad_norm": 1.0630323886871338, + "learning_rate": 3.159067280558444e-06, + "loss": 0.0572, "step": 46060 }, { - "epoch": 3.4212089707411257, - "grad_norm": 1.0653600692749023, - "learning_rate": 9.472746175553246e-06, - "loss": 0.099, + "epoch": 6.8424179414822515, + "grad_norm": 0.6267212629318237, + "learning_rate": 3.1575820585177484e-06, + "loss": 0.058, "step": 46070 }, { - "epoch": 3.421951581761473, - "grad_norm": 2.42584228515625, - "learning_rate": 9.468290509431159e-06, - "loss": 0.0587, + "epoch": 6.843903163522946, + "grad_norm": 1.072516679763794, + "learning_rate": 3.1560968364770533e-06, + "loss": 0.0391, "step": 46080 }, { - "epoch": 3.422694192781821, - "grad_norm": 2.0575003623962402, - "learning_rate": 9.463834843309076e-06, - "loss": 0.0777, + "epoch": 6.845388385563642, + "grad_norm": 1.2599564790725708, + "learning_rate": 3.1546116144363586e-06, + "loss": 0.0442, "step": 46090 }, { - "epoch": 3.4234368038021685, - "grad_norm": 2.0597803592681885, - "learning_rate": 9.459379177186989e-06, - "loss": 0.0569, + "epoch": 6.846873607604337, + "grad_norm": 0.9169860482215881, + "learning_rate": 3.1531263923956634e-06, + "loss": 0.081, "step": 46100 }, { - "epoch": 3.424179414822516, - "grad_norm": 1.2137857675552368, - "learning_rate": 9.454923511064904e-06, - "loss": 0.0696, + "epoch": 6.848358829645032, + "grad_norm": 0.9532322883605957, + "learning_rate": 3.1516411703549683e-06, + "loss": 0.0626, "step": 46110 }, { - "epoch": 3.4249220258428634, - "grad_norm": 1.2219140529632568, - "learning_rate": 9.45046784494282e-06, - "loss": 0.0644, + "epoch": 6.849844051685727, + "grad_norm": 1.1273218393325806, + "learning_rate": 3.1501559483142736e-06, + "loss": 0.0488, "step": 46120 }, { - "epoch": 3.425664636863211, - "grad_norm": 0.3102966845035553, - "learning_rate": 9.446012178820734e-06, - "loss": 0.0485, + "epoch": 6.851329273726422, + "grad_norm": 0.7113762497901917, + "learning_rate": 3.1486707262735784e-06, + "loss": 0.0402, "step": 46130 }, { - "epoch": 3.4264072478835588, - "grad_norm": 3.548948287963867, - "learning_rate": 9.441556512698649e-06, - "loss": 0.0802, + "epoch": 6.8528144957671175, + "grad_norm": 0.5954686403274536, + "learning_rate": 3.147185504232883e-06, + "loss": 0.0543, "step": 46140 }, { - "epoch": 3.427149858903906, - "grad_norm": 1.5830320119857788, - "learning_rate": 9.437100846576562e-06, - "loss": 0.0685, + "epoch": 6.854299717807812, + "grad_norm": 1.0223079919815063, + "learning_rate": 3.1457002821921877e-06, + "loss": 0.0602, "step": 46150 }, { - "epoch": 3.4278924699242537, - "grad_norm": 0.3730054795742035, - "learning_rate": 9.432645180454479e-06, - "loss": 0.0555, + "epoch": 6.855784939848507, + "grad_norm": 0.790874719619751, + "learning_rate": 3.144215060151493e-06, + "loss": 0.0587, "step": 46160 }, { - "epoch": 3.428635080944601, - "grad_norm": 2.1286816596984863, - "learning_rate": 9.428189514332394e-06, - "loss": 0.0546, + "epoch": 6.857270161889202, + "grad_norm": 1.2213834524154663, + "learning_rate": 3.142729838110798e-06, + "loss": 0.0795, "step": 46170 }, { - "epoch": 3.4293776919649486, - "grad_norm": 0.7147573828697205, - "learning_rate": 9.423733848210307e-06, - "loss": 0.0516, + "epoch": 6.858755383929897, + "grad_norm": 1.4595249891281128, + "learning_rate": 3.1412446160701027e-06, + "loss": 0.0633, "step": 46180 }, { - "epoch": 3.4301203029852965, - "grad_norm": 2.009298324584961, - "learning_rate": 9.419278182088222e-06, - "loss": 0.0667, + "epoch": 6.860240605970593, + "grad_norm": 1.1774989366531372, + "learning_rate": 3.1397593940294076e-06, + "loss": 0.0525, "step": 46190 }, { - "epoch": 3.430862914005644, - "grad_norm": 1.3036754131317139, - "learning_rate": 9.414822515966137e-06, - "loss": 0.0738, + "epoch": 6.861725828011288, + "grad_norm": 0.7904123067855835, + "learning_rate": 3.138274171988713e-06, + "loss": 0.0699, "step": 46200 }, { - "epoch": 3.4316055250259914, - "grad_norm": 1.2708832025527954, - "learning_rate": 9.410366849844052e-06, - "loss": 0.0378, + "epoch": 6.863211050051983, + "grad_norm": 0.8630386590957642, + "learning_rate": 3.1367889499480177e-06, + "loss": 0.0661, "step": 46210 }, { - "epoch": 3.432348136046339, - "grad_norm": 1.8291300535202026, - "learning_rate": 9.405911183721966e-06, - "loss": 0.0484, + "epoch": 6.864696272092678, + "grad_norm": 0.9028981328010559, + "learning_rate": 3.135303727907322e-06, + "loss": 0.0614, "step": 46220 }, { - "epoch": 3.4330907470666867, - "grad_norm": 3.277575731277466, - "learning_rate": 9.40145551759988e-06, - "loss": 0.0766, + "epoch": 6.866181494133373, + "grad_norm": 0.7527992129325867, + "learning_rate": 3.133818505866627e-06, + "loss": 0.0571, "step": 46230 }, { - "epoch": 3.433833358087034, - "grad_norm": 2.973456859588623, - "learning_rate": 9.396999851477797e-06, - "loss": 0.0464, + "epoch": 6.867666716174068, + "grad_norm": 1.0797587633132935, + "learning_rate": 3.1323332838259323e-06, + "loss": 0.0481, "step": 46240 }, { - "epoch": 3.4345759691073816, - "grad_norm": 1.2118042707443237, - "learning_rate": 9.39254418535571e-06, - "loss": 0.0573, + "epoch": 6.869151938214763, + "grad_norm": 0.7128764986991882, + "learning_rate": 3.130848061785237e-06, + "loss": 0.0694, "step": 46250 }, { - "epoch": 3.435318580127729, - "grad_norm": 1.3770191669464111, - "learning_rate": 9.388088519233626e-06, - "loss": 0.0598, + "epoch": 6.870637160255458, + "grad_norm": 0.7176929712295532, + "learning_rate": 3.129362839744542e-06, + "loss": 0.0587, "step": 46260 }, { - "epoch": 3.4360611911480765, - "grad_norm": 2.709092855453491, - "learning_rate": 9.38363285311154e-06, - "loss": 0.0743, + "epoch": 6.872122382296153, + "grad_norm": 1.0772123336791992, + "learning_rate": 3.1278776177038473e-06, + "loss": 0.0671, "step": 46270 }, { - "epoch": 3.4368038021684244, - "grad_norm": 0.41751283407211304, - "learning_rate": 9.379177186989456e-06, - "loss": 0.0687, + "epoch": 6.873607604336849, + "grad_norm": 0.9489224553108215, + "learning_rate": 3.126392395663152e-06, + "loss": 0.0761, "step": 46280 }, { - "epoch": 3.437546413188772, - "grad_norm": 0.8437953591346741, - "learning_rate": 9.37472152086737e-06, - "loss": 0.047, + "epoch": 6.875092826377544, + "grad_norm": 0.44016531109809875, + "learning_rate": 3.1249071736224566e-06, + "loss": 0.0462, "step": 46290 }, { - "epoch": 3.4382890242091193, - "grad_norm": 2.391899824142456, - "learning_rate": 9.370265854745284e-06, - "loss": 0.0499, + "epoch": 6.876578048418239, + "grad_norm": 1.233738899230957, + "learning_rate": 3.1234219515817614e-06, + "loss": 0.0654, "step": 46300 }, { - "epoch": 3.4390316352294668, - "grad_norm": 1.9807353019714355, - "learning_rate": 9.3658101886232e-06, - "loss": 0.0596, + "epoch": 6.8780632704589335, + "grad_norm": 1.355086088180542, + "learning_rate": 3.1219367295410667e-06, + "loss": 0.0656, "step": 46310 }, { - "epoch": 3.439774246249814, - "grad_norm": 0.2767632007598877, - "learning_rate": 9.361354522501114e-06, - "loss": 0.0419, + "epoch": 6.879548492499628, + "grad_norm": 0.967938244342804, + "learning_rate": 3.1204515075003716e-06, + "loss": 0.0563, "step": 46320 }, { - "epoch": 3.440516857270162, - "grad_norm": 2.447248697280884, - "learning_rate": 9.356898856379029e-06, - "loss": 0.0679, + "epoch": 6.881033714540324, + "grad_norm": 0.3975263833999634, + "learning_rate": 3.1189662854596764e-06, + "loss": 0.0569, "step": 46330 }, { - "epoch": 3.4412594682905095, - "grad_norm": 1.484784722328186, - "learning_rate": 9.352443190256942e-06, - "loss": 0.039, + "epoch": 6.882518936581019, + "grad_norm": 0.5445032715797424, + "learning_rate": 3.1174810634189813e-06, + "loss": 0.0585, "step": 46340 }, { - "epoch": 3.442002079310857, - "grad_norm": 1.6514251232147217, - "learning_rate": 9.347987524134859e-06, - "loss": 0.0346, + "epoch": 6.884004158621714, + "grad_norm": 1.676100492477417, + "learning_rate": 3.1159958413782866e-06, + "loss": 0.0737, "step": 46350 }, { - "epoch": 3.4427446903312044, - "grad_norm": 1.2701706886291504, - "learning_rate": 9.343531858012774e-06, - "loss": 0.0298, + "epoch": 6.885489380662409, + "grad_norm": 0.5582711100578308, + "learning_rate": 3.1145106193375914e-06, + "loss": 0.0411, "step": 46360 }, { - "epoch": 3.443487301351552, - "grad_norm": 6.292190074920654, - "learning_rate": 9.339076191890687e-06, - "loss": 0.0471, + "epoch": 6.886974602703104, + "grad_norm": 0.9410350322723389, + "learning_rate": 3.113025397296896e-06, + "loss": 0.0392, "step": 46370 }, { - "epoch": 3.4442299123719, - "grad_norm": 1.1822702884674072, - "learning_rate": 9.334620525768604e-06, - "loss": 0.088, + "epoch": 6.8884598247438, + "grad_norm": 1.3940622806549072, + "learning_rate": 3.111540175256201e-06, + "loss": 0.0542, "step": 46380 }, { - "epoch": 3.4449725233922472, - "grad_norm": 2.342872142791748, - "learning_rate": 9.330164859646517e-06, - "loss": 0.0844, + "epoch": 6.8899450467844945, + "grad_norm": 1.2696105241775513, + "learning_rate": 3.110054953215506e-06, + "loss": 0.0529, "step": 46390 }, { - "epoch": 3.4457151344125947, - "grad_norm": 0.5191331505775452, - "learning_rate": 9.325709193524432e-06, - "loss": 0.0737, + "epoch": 6.891430268825189, + "grad_norm": 0.5525928735733032, + "learning_rate": 3.108569731174811e-06, + "loss": 0.0653, "step": 46400 }, { - "epoch": 3.446457745432942, - "grad_norm": 2.7693073749542236, - "learning_rate": 9.321253527402347e-06, - "loss": 0.0634, + "epoch": 6.892915490865884, + "grad_norm": 0.6907713413238525, + "learning_rate": 3.1070845091341157e-06, + "loss": 0.0557, "step": 46410 }, { - "epoch": 3.4472003564532896, - "grad_norm": 1.8164703845977783, - "learning_rate": 9.316797861280262e-06, - "loss": 0.0518, + "epoch": 6.894400712906579, + "grad_norm": 1.9477370977401733, + "learning_rate": 3.105599287093421e-06, + "loss": 0.0398, "step": 46420 }, { - "epoch": 3.4479429674736375, - "grad_norm": 0.6304923295974731, - "learning_rate": 9.312342195158177e-06, - "loss": 0.0483, + "epoch": 6.895885934947275, + "grad_norm": 0.5067228674888611, + "learning_rate": 3.104114065052726e-06, + "loss": 0.0618, "step": 46430 }, { - "epoch": 3.448685578493985, - "grad_norm": 0.35260432958602905, - "learning_rate": 9.30788652903609e-06, - "loss": 0.0711, + "epoch": 6.89737115698797, + "grad_norm": 0.6338545083999634, + "learning_rate": 3.1026288430120307e-06, + "loss": 0.0475, "step": 46440 }, { - "epoch": 3.4494281895143324, - "grad_norm": 1.1337438821792603, - "learning_rate": 9.303430862914005e-06, - "loss": 0.0628, + "epoch": 6.898856379028665, + "grad_norm": 1.339288592338562, + "learning_rate": 3.101143620971335e-06, + "loss": 0.0614, "step": 46450 }, { - "epoch": 3.45017080053468, - "grad_norm": 1.7715853452682495, - "learning_rate": 9.29897519679192e-06, - "loss": 0.0582, + "epoch": 6.90034160106936, + "grad_norm": 0.7984129786491394, + "learning_rate": 3.0996583989306404e-06, + "loss": 0.0688, "step": 46460 }, { - "epoch": 3.4509134115550273, - "grad_norm": 2.768024206161499, - "learning_rate": 9.294519530669835e-06, - "loss": 0.0729, + "epoch": 6.901826823110055, + "grad_norm": 1.0853620767593384, + "learning_rate": 3.0981731768899453e-06, + "loss": 0.0624, "step": 46470 }, { - "epoch": 3.451656022575375, - "grad_norm": 0.7756059169769287, - "learning_rate": 9.29006386454775e-06, - "loss": 0.0466, + "epoch": 6.90331204515075, + "grad_norm": 0.7857335805892944, + "learning_rate": 3.09668795484925e-06, + "loss": 0.0731, "step": 46480 }, { - "epoch": 3.4523986335957226, - "grad_norm": 1.0126781463623047, - "learning_rate": 9.285608198425664e-06, - "loss": 0.0615, + "epoch": 6.904797267191445, + "grad_norm": 0.6904709935188293, + "learning_rate": 3.095202732808555e-06, + "loss": 0.0528, "step": 46490 }, { - "epoch": 3.45314124461607, - "grad_norm": 1.432900071144104, - "learning_rate": 9.28115253230358e-06, - "loss": 0.0481, + "epoch": 6.90628248923214, + "grad_norm": 1.523937702178955, + "learning_rate": 3.0937175107678603e-06, + "loss": 0.0723, "step": 46500 }, { - "epoch": 3.4538838556364175, - "grad_norm": 1.1251558065414429, - "learning_rate": 9.276696866181494e-06, - "loss": 0.0573, + "epoch": 6.907767711272835, + "grad_norm": 0.5652097463607788, + "learning_rate": 3.092232288727165e-06, + "loss": 0.0645, "step": 46510 }, { - "epoch": 3.454626466656765, - "grad_norm": 1.9688692092895508, - "learning_rate": 9.272241200059409e-06, - "loss": 0.0425, + "epoch": 6.90925293331353, + "grad_norm": 1.3932520151138306, + "learning_rate": 3.0907470666864696e-06, + "loss": 0.0491, "step": 46520 }, { - "epoch": 3.455369077677113, - "grad_norm": 0.8410460352897644, - "learning_rate": 9.267785533937325e-06, - "loss": 0.0555, + "epoch": 6.910738155354226, + "grad_norm": 0.5495141744613647, + "learning_rate": 3.0892618446457753e-06, + "loss": 0.0622, "step": 46530 }, { - "epoch": 3.4561116886974603, - "grad_norm": 1.3958379030227661, - "learning_rate": 9.263329867815239e-06, - "loss": 0.0733, + "epoch": 6.912223377394921, + "grad_norm": 0.7841812968254089, + "learning_rate": 3.0877766226050797e-06, + "loss": 0.0606, "step": 46540 }, { - "epoch": 3.4568542997178078, - "grad_norm": 0.4909604489803314, - "learning_rate": 9.258874201693154e-06, - "loss": 0.0681, + "epoch": 6.9137085994356156, + "grad_norm": 0.9534539580345154, + "learning_rate": 3.0862914005643845e-06, + "loss": 0.0517, "step": 46550 }, { - "epoch": 3.4575969107381552, - "grad_norm": 0.4906344413757324, - "learning_rate": 9.254418535571067e-06, - "loss": 0.0965, + "epoch": 6.9151938214763105, + "grad_norm": 0.7401015758514404, + "learning_rate": 3.0848061785236894e-06, + "loss": 0.0598, "step": 46560 }, { - "epoch": 3.4583395217585027, - "grad_norm": 0.9761192202568054, - "learning_rate": 9.249962869448984e-06, - "loss": 0.049, + "epoch": 6.916679043517005, + "grad_norm": 1.01905357837677, + "learning_rate": 3.0833209564829947e-06, + "loss": 0.0599, "step": 46570 }, { - "epoch": 3.4590821327788506, - "grad_norm": 0.2913404107093811, - "learning_rate": 9.245507203326899e-06, - "loss": 0.0506, + "epoch": 6.918164265557701, + "grad_norm": 0.5511347055435181, + "learning_rate": 3.0818357344422995e-06, + "loss": 0.0563, "step": 46580 }, { - "epoch": 3.459824743799198, - "grad_norm": 1.4044737815856934, - "learning_rate": 9.241051537204812e-06, - "loss": 0.0496, + "epoch": 6.919649487598396, + "grad_norm": 0.8203628063201904, + "learning_rate": 3.0803505124016044e-06, + "loss": 0.0501, "step": 46590 }, { - "epoch": 3.4605673548195455, - "grad_norm": 0.5915066599845886, - "learning_rate": 9.236595871082727e-06, - "loss": 0.0476, + "epoch": 6.921134709639091, + "grad_norm": 0.7230483889579773, + "learning_rate": 3.078865290360909e-06, + "loss": 0.0636, "step": 46600 }, { - "epoch": 3.461309965839893, - "grad_norm": 0.6072288751602173, - "learning_rate": 9.232140204960642e-06, - "loss": 0.0433, + "epoch": 6.922619931679786, + "grad_norm": 1.0204007625579834, + "learning_rate": 3.077380068320214e-06, + "loss": 0.046, "step": 46610 }, { - "epoch": 3.4620525768602404, - "grad_norm": 1.144883632659912, - "learning_rate": 9.227684538838557e-06, - "loss": 0.0593, + "epoch": 6.924105153720481, + "grad_norm": 0.5306640267372131, + "learning_rate": 3.075894846279519e-06, + "loss": 0.0564, "step": 46620 }, { - "epoch": 3.4627951878805883, - "grad_norm": 1.200415015220642, - "learning_rate": 9.22322887271647e-06, - "loss": 0.0611, + "epoch": 6.9255903757611765, + "grad_norm": 1.163796067237854, + "learning_rate": 3.074409624238824e-06, + "loss": 0.0685, "step": 46630 }, { - "epoch": 3.4635377989009357, - "grad_norm": 1.8944742679595947, - "learning_rate": 9.218773206594385e-06, - "loss": 0.0679, + "epoch": 6.927075597801871, + "grad_norm": 0.9207706451416016, + "learning_rate": 3.0729244021981287e-06, + "loss": 0.0522, "step": 46640 }, { - "epoch": 3.464280409921283, - "grad_norm": 1.1931655406951904, - "learning_rate": 9.214317540472302e-06, - "loss": 0.0392, + "epoch": 6.928560819842566, + "grad_norm": 0.6923567652702332, + "learning_rate": 3.071439180157434e-06, + "loss": 0.0599, "step": 46650 }, { - "epoch": 3.4650230209416306, - "grad_norm": 0.9544970989227295, - "learning_rate": 9.209861874350215e-06, - "loss": 0.0808, + "epoch": 6.930046041883261, + "grad_norm": 0.8811810612678528, + "learning_rate": 3.069953958116739e-06, + "loss": 0.0657, "step": 46660 }, { - "epoch": 3.465765631961978, - "grad_norm": 0.6240988373756409, - "learning_rate": 9.20540620822813e-06, - "loss": 0.0622, + "epoch": 6.931531263923956, + "grad_norm": 0.8357272148132324, + "learning_rate": 3.0684687360760433e-06, + "loss": 0.0537, "step": 46670 }, { - "epoch": 3.466508242982326, - "grad_norm": 0.9956406950950623, - "learning_rate": 9.200950542106045e-06, - "loss": 0.0536, + "epoch": 6.933016485964652, + "grad_norm": 1.2306370735168457, + "learning_rate": 3.066983514035349e-06, + "loss": 0.0692, "step": 46680 }, { - "epoch": 3.4672508540026734, - "grad_norm": 1.983014702796936, - "learning_rate": 9.19649487598396e-06, - "loss": 0.0661, + "epoch": 6.934501708005347, + "grad_norm": 0.9291362166404724, + "learning_rate": 3.0654982919946534e-06, + "loss": 0.0535, "step": 46690 }, { - "epoch": 3.467993465023021, - "grad_norm": 1.8986002206802368, - "learning_rate": 9.192039209861875e-06, - "loss": 0.0885, + "epoch": 6.935986930046042, + "grad_norm": 0.82795649766922, + "learning_rate": 3.0640130699539582e-06, + "loss": 0.0533, "step": 46700 }, { - "epoch": 3.4687360760433683, - "grad_norm": 1.382034182548523, - "learning_rate": 9.187583543739788e-06, - "loss": 0.0768, + "epoch": 6.937472152086737, + "grad_norm": 1.0339187383651733, + "learning_rate": 3.062527847913263e-06, + "loss": 0.0721, "step": 46710 }, { - "epoch": 3.469478687063716, - "grad_norm": 2.0056140422821045, - "learning_rate": 9.183127877617705e-06, - "loss": 0.0564, + "epoch": 6.938957374127432, + "grad_norm": 0.706620991230011, + "learning_rate": 3.0610426258725684e-06, + "loss": 0.0643, "step": 46720 }, { - "epoch": 3.4702212980840637, - "grad_norm": 0.6202702522277832, - "learning_rate": 9.178672211495618e-06, - "loss": 0.0412, + "epoch": 6.940442596168127, + "grad_norm": 1.19780695438385, + "learning_rate": 3.0595574038318732e-06, + "loss": 0.071, "step": 46730 }, { - "epoch": 3.470963909104411, - "grad_norm": 2.408010959625244, - "learning_rate": 9.174216545373533e-06, - "loss": 0.0654, + "epoch": 6.941927818208822, + "grad_norm": 0.41239285469055176, + "learning_rate": 3.058072181791178e-06, + "loss": 0.0618, "step": 46740 }, { - "epoch": 3.4717065201247586, - "grad_norm": 1.9785070419311523, - "learning_rate": 9.169760879251447e-06, - "loss": 0.0879, + "epoch": 6.943413040249517, + "grad_norm": 0.7439904808998108, + "learning_rate": 3.0565869597504825e-06, + "loss": 0.0524, "step": 46750 }, { - "epoch": 3.472449131145106, - "grad_norm": 1.066245675086975, - "learning_rate": 9.165305213129363e-06, - "loss": 0.0434, + "epoch": 6.944898262290212, + "grad_norm": 1.1534885168075562, + "learning_rate": 3.055101737709788e-06, + "loss": 0.0511, "step": 46760 }, { - "epoch": 3.473191742165454, - "grad_norm": 0.1916639357805252, - "learning_rate": 9.160849547007278e-06, - "loss": 0.0524, + "epoch": 6.946383484330908, + "grad_norm": 1.2603319883346558, + "learning_rate": 3.0536165156690927e-06, + "loss": 0.0475, "step": 46770 }, { - "epoch": 3.4739343531858013, - "grad_norm": 2.890707492828369, - "learning_rate": 9.156393880885192e-06, - "loss": 0.081, + "epoch": 6.947868706371603, + "grad_norm": 0.8110623955726624, + "learning_rate": 3.0521312936283975e-06, + "loss": 0.0552, "step": 46780 }, { - "epoch": 3.474676964206149, - "grad_norm": 1.435386061668396, - "learning_rate": 9.151938214763108e-06, - "loss": 0.0458, + "epoch": 6.949353928412298, + "grad_norm": 0.9178988337516785, + "learning_rate": 3.050646071587703e-06, + "loss": 0.0566, "step": 46790 }, { - "epoch": 3.4754195752264962, - "grad_norm": 1.8877719640731812, - "learning_rate": 9.147482548641022e-06, - "loss": 0.0704, + "epoch": 6.9508391504529925, + "grad_norm": 1.1614978313446045, + "learning_rate": 3.0491608495470077e-06, + "loss": 0.0679, "step": 46800 }, { - "epoch": 3.476162186246844, - "grad_norm": 2.4223110675811768, - "learning_rate": 9.143026882518937e-06, - "loss": 0.0615, + "epoch": 6.952324372493688, + "grad_norm": 1.3334935903549194, + "learning_rate": 3.0476756275063125e-06, + "loss": 0.0358, "step": 46810 }, { - "epoch": 3.4769047972671916, - "grad_norm": 0.5216322541236877, - "learning_rate": 9.138571216396852e-06, - "loss": 0.0584, + "epoch": 6.953809594534383, + "grad_norm": 1.5388178825378418, + "learning_rate": 3.0461904054656174e-06, + "loss": 0.0738, "step": 46820 }, { - "epoch": 3.477647408287539, - "grad_norm": 2.4628725051879883, - "learning_rate": 9.134115550274767e-06, - "loss": 0.0849, + "epoch": 6.955294816575078, + "grad_norm": 1.34645414352417, + "learning_rate": 3.0447051834249227e-06, + "loss": 0.0561, "step": 46830 }, { - "epoch": 3.4783900193078865, - "grad_norm": 1.662915825843811, - "learning_rate": 9.129659884152682e-06, - "loss": 0.0587, + "epoch": 6.956780038615773, + "grad_norm": 1.158138632774353, + "learning_rate": 3.043219961384227e-06, + "loss": 0.0492, "step": 46840 }, { - "epoch": 3.479132630328234, - "grad_norm": 2.0761513710021973, - "learning_rate": 9.125204218030595e-06, - "loss": 0.0419, + "epoch": 6.958265260656468, + "grad_norm": 0.43134617805480957, + "learning_rate": 3.041734739343532e-06, + "loss": 0.0549, "step": 46850 }, { - "epoch": 3.479875241348582, - "grad_norm": 0.843273401260376, - "learning_rate": 9.12074855190851e-06, - "loss": 0.0633, + "epoch": 6.959750482697164, + "grad_norm": 0.8437008857727051, + "learning_rate": 3.040249517302837e-06, + "loss": 0.0746, "step": 46860 }, { - "epoch": 3.4806178523689293, - "grad_norm": 2.208324432373047, - "learning_rate": 9.116292885786427e-06, - "loss": 0.0575, + "epoch": 6.961235704737859, + "grad_norm": 1.0502889156341553, + "learning_rate": 3.038764295262142e-06, + "loss": 0.0463, "step": 46870 }, { - "epoch": 3.4813604633892767, - "grad_norm": 2.6127047538757324, - "learning_rate": 9.11183721966434e-06, - "loss": 0.0774, + "epoch": 6.9627209267785535, + "grad_norm": 0.48640525341033936, + "learning_rate": 3.037279073221447e-06, + "loss": 0.0591, "step": 46880 }, { - "epoch": 3.482103074409624, - "grad_norm": 0.6786608099937439, - "learning_rate": 9.107381553542255e-06, - "loss": 0.0691, + "epoch": 6.964206148819248, + "grad_norm": 1.5872031450271606, + "learning_rate": 3.035793851180752e-06, + "loss": 0.0653, "step": 46890 }, { - "epoch": 3.4828456854299716, - "grad_norm": 3.497749090194702, - "learning_rate": 9.102925887420168e-06, - "loss": 0.0899, + "epoch": 6.965691370859943, + "grad_norm": 0.44661468267440796, + "learning_rate": 3.0343086291400562e-06, + "loss": 0.058, "step": 46900 }, { - "epoch": 3.4835882964503195, - "grad_norm": 0.41955363750457764, - "learning_rate": 9.098470221298085e-06, - "loss": 0.0617, + "epoch": 6.967176592900639, + "grad_norm": 0.8914174437522888, + "learning_rate": 3.032823407099362e-06, + "loss": 0.0555, "step": 46910 }, { - "epoch": 3.484330907470667, - "grad_norm": 0.7046768069267273, - "learning_rate": 9.094014555175998e-06, - "loss": 0.0789, + "epoch": 6.968661814941334, + "grad_norm": 0.38050782680511475, + "learning_rate": 3.0313381850586664e-06, + "loss": 0.053, "step": 46920 }, { - "epoch": 3.4850735184910144, - "grad_norm": 1.7392123937606812, - "learning_rate": 9.089558889053913e-06, - "loss": 0.0574, + "epoch": 6.970147036982029, + "grad_norm": 0.38610634207725525, + "learning_rate": 3.0298529630179712e-06, + "loss": 0.0738, "step": 46930 }, { - "epoch": 3.485816129511362, - "grad_norm": 0.9724016189575195, - "learning_rate": 9.08510322293183e-06, - "loss": 0.0793, + "epoch": 6.971632259022724, + "grad_norm": 1.3435474634170532, + "learning_rate": 3.0283677409772765e-06, + "loss": 0.0648, "step": 46940 }, { - "epoch": 3.4865587405317093, - "grad_norm": 1.3222392797470093, - "learning_rate": 9.080647556809743e-06, - "loss": 0.0675, + "epoch": 6.973117481063419, + "grad_norm": 1.6155376434326172, + "learning_rate": 3.0268825189365814e-06, + "loss": 0.0642, "step": 46950 }, { - "epoch": 3.4873013515520572, - "grad_norm": 1.0925577878952026, - "learning_rate": 9.076191890687658e-06, - "loss": 0.0428, + "epoch": 6.9746027031041145, + "grad_norm": 0.6376026272773743, + "learning_rate": 3.0253972968958862e-06, + "loss": 0.0509, "step": 46960 }, { - "epoch": 3.4880439625724047, - "grad_norm": 0.7402594685554504, - "learning_rate": 9.071736224565572e-06, - "loss": 0.0559, + "epoch": 6.976087925144809, + "grad_norm": 1.0941927433013916, + "learning_rate": 3.023912074855191e-06, + "loss": 0.0715, "step": 46970 }, { - "epoch": 3.488786573592752, - "grad_norm": 2.0264675617218018, - "learning_rate": 9.067280558443488e-06, - "loss": 0.0425, + "epoch": 6.977573147185504, + "grad_norm": 1.121254324913025, + "learning_rate": 3.0224268528144964e-06, + "loss": 0.0567, "step": 46980 }, { - "epoch": 3.4895291846130996, - "grad_norm": 1.4667295217514038, - "learning_rate": 9.062824892321403e-06, - "loss": 0.0659, + "epoch": 6.979058369226199, + "grad_norm": 1.4150434732437134, + "learning_rate": 3.020941630773801e-06, + "loss": 0.0661, "step": 46990 }, { - "epoch": 3.490271795633447, - "grad_norm": 2.150097131729126, - "learning_rate": 9.058369226199317e-06, - "loss": 0.0656, + "epoch": 6.980543591266894, + "grad_norm": 0.7081685662269592, + "learning_rate": 3.0194564087331056e-06, + "loss": 0.0726, "step": 47000 }, { - "epoch": 3.491014406653795, - "grad_norm": 0.9886521100997925, - "learning_rate": 9.053913560077232e-06, - "loss": 0.0598, + "epoch": 6.98202881330759, + "grad_norm": 0.5111793279647827, + "learning_rate": 3.0179711866924105e-06, + "loss": 0.0451, "step": 47010 }, { - "epoch": 3.4917570176741424, - "grad_norm": 1.4608570337295532, - "learning_rate": 9.049457893955147e-06, - "loss": 0.1056, + "epoch": 6.983514035348285, + "grad_norm": 1.0652034282684326, + "learning_rate": 3.0164859646517158e-06, + "loss": 0.055, "step": 47020 }, { - "epoch": 3.49249962869449, - "grad_norm": 0.7766015529632568, - "learning_rate": 9.045002227833061e-06, - "loss": 0.0371, + "epoch": 6.98499925738898, + "grad_norm": 0.7724589109420776, + "learning_rate": 3.0150007426110206e-06, + "loss": 0.0593, "step": 47030 }, { - "epoch": 3.4932422397148373, - "grad_norm": 0.7282046675682068, - "learning_rate": 9.040546561710975e-06, - "loss": 0.0582, + "epoch": 6.9864844794296745, + "grad_norm": 1.0594000816345215, + "learning_rate": 3.0135155205703255e-06, + "loss": 0.0631, "step": 47040 }, { - "epoch": 3.4939848507351847, - "grad_norm": 2.043895721435547, - "learning_rate": 9.036090895588891e-06, - "loss": 0.0516, + "epoch": 6.987969701470369, + "grad_norm": 0.5354403853416443, + "learning_rate": 3.0120302985296308e-06, + "loss": 0.0618, "step": 47050 }, { - "epoch": 3.4947274617555326, - "grad_norm": 0.4283212721347809, - "learning_rate": 9.031635229466806e-06, - "loss": 0.0603, + "epoch": 6.989454923511065, + "grad_norm": 0.9589335322380066, + "learning_rate": 3.0105450764889356e-06, + "loss": 0.0534, "step": 47060 }, { - "epoch": 3.49547007277588, - "grad_norm": 1.2481293678283691, - "learning_rate": 9.02717956334472e-06, - "loss": 0.0381, + "epoch": 6.99094014555176, + "grad_norm": 0.591218113899231, + "learning_rate": 3.00905985444824e-06, + "loss": 0.0506, "step": 47070 }, { - "epoch": 3.4962126837962275, - "grad_norm": 1.1199532747268677, - "learning_rate": 9.022723897222635e-06, - "loss": 0.0837, + "epoch": 6.992425367592455, + "grad_norm": 1.093970537185669, + "learning_rate": 3.007574632407545e-06, + "loss": 0.0632, "step": 47080 }, { - "epoch": 3.496955294816575, - "grad_norm": 1.4884730577468872, - "learning_rate": 9.01826823110055e-06, - "loss": 0.0265, + "epoch": 6.99391058963315, + "grad_norm": 0.9521450996398926, + "learning_rate": 3.00608941036685e-06, + "loss": 0.0761, "step": 47090 }, { - "epoch": 3.4976979058369224, - "grad_norm": 0.7303683757781982, - "learning_rate": 9.013812564978465e-06, - "loss": 0.0711, + "epoch": 6.995395811673845, + "grad_norm": 0.8672487139701843, + "learning_rate": 3.004604188326155e-06, + "loss": 0.0548, "step": 47100 }, { - "epoch": 3.4984405168572703, - "grad_norm": 1.0589395761489868, - "learning_rate": 9.00935689885638e-06, - "loss": 0.0711, + "epoch": 6.996881033714541, + "grad_norm": 1.1248619556427002, + "learning_rate": 3.00311896628546e-06, + "loss": 0.0533, "step": 47110 }, { - "epoch": 3.4991831278776178, - "grad_norm": 0.7774037718772888, - "learning_rate": 9.004901232734293e-06, - "loss": 0.0543, + "epoch": 6.9983662557552355, + "grad_norm": 0.7572886943817139, + "learning_rate": 3.0016337442447648e-06, + "loss": 0.0534, "step": 47120 }, { - "epoch": 3.499925738897965, - "grad_norm": 1.6601343154907227, - "learning_rate": 9.00044556661221e-06, - "loss": 0.0608, + "epoch": 6.99985147779593, + "grad_norm": 0.904132068157196, + "learning_rate": 3.00014852220407e-06, + "loss": 0.0623, "step": 47130 }, { - "epoch": 3.5006683499183127, - "grad_norm": 0.5749710202217102, - "learning_rate": 8.995989900490123e-06, - "loss": 0.0565, + "epoch": 7.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.05673398822546005, + "eval_runtime": 213.0335, + "eval_samples_per_second": 178.465, + "eval_steps_per_second": 5.581, + "step": 47131 + }, + { + "epoch": 7.001336699836625, + "grad_norm": 1.1677008867263794, + "learning_rate": 2.998663300163375e-06, + "loss": 0.0588, "step": 47140 }, { - "epoch": 3.50141096093866, - "grad_norm": 1.1628215312957764, - "learning_rate": 8.991534234368038e-06, - "loss": 0.0382, + "epoch": 7.00282192187732, + "grad_norm": 1.201698660850525, + "learning_rate": 2.9971780781226793e-06, + "loss": 0.0475, "step": 47150 }, { - "epoch": 3.502153571959008, - "grad_norm": 1.716511607170105, - "learning_rate": 8.987078568245953e-06, - "loss": 0.0662, + "epoch": 7.004307143918016, + "grad_norm": 1.3719254732131958, + "learning_rate": 2.995692856081984e-06, + "loss": 0.0736, "step": 47160 }, { - "epoch": 3.5028961829793555, - "grad_norm": 1.7500982284545898, - "learning_rate": 8.982622902123868e-06, - "loss": 0.0447, + "epoch": 7.005792365958711, + "grad_norm": 0.3711611032485962, + "learning_rate": 2.9942076340412895e-06, + "loss": 0.0631, "step": 47170 }, { - "epoch": 3.503638793999703, - "grad_norm": 1.2955238819122314, - "learning_rate": 8.978167236001783e-06, - "loss": 0.0674, + "epoch": 7.007277587999406, + "grad_norm": 0.5947009325027466, + "learning_rate": 2.9927224120005943e-06, + "loss": 0.0641, "step": 47180 }, { - "epoch": 3.5043814050200504, - "grad_norm": 0.32319340109825134, - "learning_rate": 8.973711569879696e-06, - "loss": 0.0294, + "epoch": 7.008762810040101, + "grad_norm": 0.6703227162361145, + "learning_rate": 2.991237189959899e-06, + "loss": 0.0587, "step": 47190 }, { - "epoch": 3.505124016040398, - "grad_norm": 1.2012195587158203, - "learning_rate": 8.969255903757613e-06, - "loss": 0.085, + "epoch": 7.0102480320807965, + "grad_norm": 0.85968416929245, + "learning_rate": 2.9897519679192045e-06, + "loss": 0.0553, "step": 47200 }, { - "epoch": 3.5058666270607457, - "grad_norm": 1.1960065364837646, - "learning_rate": 8.964800237635526e-06, - "loss": 0.0697, + "epoch": 7.011733254121491, + "grad_norm": 0.6204405426979065, + "learning_rate": 2.9882667458785093e-06, + "loss": 0.0583, "step": 47210 }, { - "epoch": 3.506609238081093, - "grad_norm": 0.929478108882904, - "learning_rate": 8.960344571513441e-06, - "loss": 0.0367, + "epoch": 7.013218476162186, + "grad_norm": 1.092622995376587, + "learning_rate": 2.9867815238378138e-06, + "loss": 0.0634, "step": 47220 }, { - "epoch": 3.5073518491014406, - "grad_norm": 0.9691451787948608, - "learning_rate": 8.955888905391356e-06, - "loss": 0.0622, + "epoch": 7.014703698202881, + "grad_norm": 0.9893145561218262, + "learning_rate": 2.9852963017971186e-06, + "loss": 0.0891, "step": 47230 }, { - "epoch": 3.508094460121788, - "grad_norm": 1.1364026069641113, - "learning_rate": 8.951433239269271e-06, - "loss": 0.0581, + "epoch": 7.016188920243576, + "grad_norm": 0.6505508422851562, + "learning_rate": 2.983811079756424e-06, + "loss": 0.0728, "step": 47240 }, { - "epoch": 3.5088370711421355, - "grad_norm": 1.0724977254867554, - "learning_rate": 8.946977573147186e-06, - "loss": 0.06, + "epoch": 7.017674142284272, + "grad_norm": 2.0147671699523926, + "learning_rate": 2.9823258577157288e-06, + "loss": 0.0625, "step": 47250 }, { - "epoch": 3.5095796821624834, - "grad_norm": 2.1212868690490723, - "learning_rate": 8.9425219070251e-06, - "loss": 0.0663, + "epoch": 7.019159364324967, + "grad_norm": 0.79218989610672, + "learning_rate": 2.9808406356750336e-06, + "loss": 0.0661, "step": 47260 }, { - "epoch": 3.510322293182831, - "grad_norm": 0.43843552470207214, - "learning_rate": 8.938066240903015e-06, - "loss": 0.0727, + "epoch": 7.020644586365662, + "grad_norm": 0.8125013113021851, + "learning_rate": 2.9793554136343385e-06, + "loss": 0.0577, "step": 47270 }, { - "epoch": 3.5110649042031783, - "grad_norm": 1.7042205333709717, - "learning_rate": 8.933610574780931e-06, - "loss": 0.0521, + "epoch": 7.022129808406357, + "grad_norm": 0.9180750846862793, + "learning_rate": 2.9778701915936438e-06, + "loss": 0.0676, "step": 47280 }, { - "epoch": 3.511807515223526, - "grad_norm": 1.1995595693588257, - "learning_rate": 8.929154908658845e-06, - "loss": 0.0867, + "epoch": 7.0236150304470515, + "grad_norm": 0.8958239555358887, + "learning_rate": 2.9763849695529486e-06, + "loss": 0.0623, "step": 47290 }, { - "epoch": 3.512550126243873, - "grad_norm": 0.9248149394989014, - "learning_rate": 8.92469924253676e-06, - "loss": 0.0627, + "epoch": 7.025100252487747, + "grad_norm": 1.1006089448928833, + "learning_rate": 2.974899747512253e-06, + "loss": 0.0595, "step": 47300 }, { - "epoch": 3.513292737264221, - "grad_norm": 0.9813995957374573, - "learning_rate": 8.920243576414675e-06, - "loss": 0.0924, + "epoch": 7.026585474528442, + "grad_norm": 1.45045006275177, + "learning_rate": 2.9734145254715583e-06, + "loss": 0.0555, "step": 47310 }, { - "epoch": 3.5140353482845685, - "grad_norm": 1.9305483102798462, - "learning_rate": 8.91578791029259e-06, - "loss": 0.054, + "epoch": 7.028070696569137, + "grad_norm": 1.0360887050628662, + "learning_rate": 2.971929303430863e-06, + "loss": 0.0668, "step": 47320 }, { - "epoch": 3.514777959304916, - "grad_norm": 0.959563672542572, - "learning_rate": 8.911332244170503e-06, - "loss": 0.0512, + "epoch": 7.029555918609832, + "grad_norm": 1.3869552612304688, + "learning_rate": 2.970444081390168e-06, + "loss": 0.0509, "step": 47330 }, { - "epoch": 3.515520570325264, - "grad_norm": 0.4607963263988495, - "learning_rate": 8.906876578048418e-06, - "loss": 0.0822, + "epoch": 7.031041140650527, + "grad_norm": 1.085456132888794, + "learning_rate": 2.968958859349473e-06, + "loss": 0.0613, "step": 47340 }, { - "epoch": 3.5162631813456113, - "grad_norm": 0.6391094923019409, - "learning_rate": 8.902420911926335e-06, - "loss": 0.0722, + "epoch": 7.032526362691223, + "grad_norm": 1.137816071510315, + "learning_rate": 2.967473637308778e-06, + "loss": 0.0499, "step": 47350 }, { - "epoch": 3.517005792365959, - "grad_norm": 1.5800341367721558, - "learning_rate": 8.897965245804248e-06, - "loss": 0.0268, + "epoch": 7.034011584731918, + "grad_norm": 0.6190617680549622, + "learning_rate": 2.965988415268083e-06, + "loss": 0.0494, "step": 47360 }, { - "epoch": 3.5177484033863062, - "grad_norm": 0.609835147857666, - "learning_rate": 8.893509579682163e-06, - "loss": 0.0625, + "epoch": 7.0354968067726125, + "grad_norm": 0.7790008187294006, + "learning_rate": 2.9645031932273875e-06, + "loss": 0.0554, "step": 47370 }, { - "epoch": 3.5184910144066537, - "grad_norm": 2.046144723892212, - "learning_rate": 8.889053913560076e-06, - "loss": 0.0691, + "epoch": 7.036982028813307, + "grad_norm": 0.7529904246330261, + "learning_rate": 2.9630179711866923e-06, + "loss": 0.047, "step": 47380 }, { - "epoch": 3.5192336254270016, - "grad_norm": 1.5540525913238525, - "learning_rate": 8.884598247437993e-06, - "loss": 0.0637, + "epoch": 7.038467250854002, + "grad_norm": 0.9241394400596619, + "learning_rate": 2.9615327491459976e-06, + "loss": 0.053, "step": 47390 }, { - "epoch": 3.519976236447349, - "grad_norm": 0.6543061137199402, - "learning_rate": 8.880142581315908e-06, - "loss": 0.0655, + "epoch": 7.039952472894698, + "grad_norm": 0.6133353114128113, + "learning_rate": 2.9600475271053025e-06, + "loss": 0.0574, "step": 47400 }, { - "epoch": 3.5207188474676965, - "grad_norm": 0.7677350640296936, - "learning_rate": 8.875686915193821e-06, - "loss": 0.0481, + "epoch": 7.041437694935393, + "grad_norm": 1.049790620803833, + "learning_rate": 2.9585623050646073e-06, + "loss": 0.0523, "step": 47410 }, { - "epoch": 3.521461458488044, - "grad_norm": 0.8277533054351807, - "learning_rate": 8.871231249071736e-06, - "loss": 0.0858, + "epoch": 7.042922916976088, + "grad_norm": 1.0434339046478271, + "learning_rate": 2.957077083023912e-06, + "loss": 0.0488, "step": 47420 }, { - "epoch": 3.5222040695083914, - "grad_norm": 0.4589194059371948, - "learning_rate": 8.866775582949651e-06, - "loss": 0.0333, + "epoch": 7.044408139016783, + "grad_norm": 0.4983573257923126, + "learning_rate": 2.9555918609832175e-06, + "loss": 0.0515, "step": 47430 }, { - "epoch": 3.5229466805287393, - "grad_norm": 1.8496214151382446, - "learning_rate": 8.862319916827566e-06, - "loss": 0.0732, + "epoch": 7.045893361057478, + "grad_norm": 0.8298611640930176, + "learning_rate": 2.9541066389425223e-06, + "loss": 0.0521, "step": 47440 }, { - "epoch": 3.5236892915490867, - "grad_norm": 2.326258659362793, - "learning_rate": 8.85786425070548e-06, - "loss": 0.0875, + "epoch": 7.047378583098173, + "grad_norm": 1.118912935256958, + "learning_rate": 2.9526214169018267e-06, + "loss": 0.0501, "step": 47450 }, { - "epoch": 3.524431902569434, - "grad_norm": 3.8180079460144043, - "learning_rate": 8.853408584583396e-06, - "loss": 0.0932, + "epoch": 7.048863805138868, + "grad_norm": 1.4120548963546753, + "learning_rate": 2.951136194861132e-06, + "loss": 0.0662, "step": 47460 }, { - "epoch": 3.5251745135897816, - "grad_norm": 2.513268232345581, - "learning_rate": 8.848952918461311e-06, - "loss": 0.065, + "epoch": 7.050349027179563, + "grad_norm": 0.8734990358352661, + "learning_rate": 2.949650972820437e-06, + "loss": 0.0572, "step": 47470 }, { - "epoch": 3.525917124610129, - "grad_norm": 0.91473788022995, - "learning_rate": 8.844497252339224e-06, - "loss": 0.068, + "epoch": 7.051834249220258, + "grad_norm": 1.285258412361145, + "learning_rate": 2.9481657507797417e-06, + "loss": 0.0516, "step": 47480 }, { - "epoch": 3.526659735630477, - "grad_norm": 1.168892502784729, - "learning_rate": 8.84004158621714e-06, - "loss": 0.0351, + "epoch": 7.053319471260954, + "grad_norm": 1.261879324913025, + "learning_rate": 2.9466805287390466e-06, + "loss": 0.0517, "step": 47490 }, { - "epoch": 3.5274023466508244, - "grad_norm": 0.857007622718811, - "learning_rate": 8.835585920095054e-06, - "loss": 0.08, + "epoch": 7.054804693301649, + "grad_norm": 1.157348871231079, + "learning_rate": 2.945195306698352e-06, + "loss": 0.0448, "step": 47500 }, { - "epoch": 3.528144957671172, - "grad_norm": 1.0127304792404175, - "learning_rate": 8.83113025397297e-06, - "loss": 0.0736, + "epoch": 7.056289915342344, + "grad_norm": 1.2821811437606812, + "learning_rate": 2.9437100846576567e-06, + "loss": 0.0491, "step": 47510 }, { - "epoch": 3.5288875686915193, - "grad_norm": 1.726643681526184, - "learning_rate": 8.826674587850884e-06, - "loss": 0.0578, + "epoch": 7.057775137383039, + "grad_norm": 0.40937334299087524, + "learning_rate": 2.9422248626169616e-06, + "loss": 0.0564, "step": 47520 }, { - "epoch": 3.5296301797118668, - "grad_norm": 1.7665687799453735, - "learning_rate": 8.822218921728798e-06, - "loss": 0.061, + "epoch": 7.0592603594237335, + "grad_norm": 0.4357692301273346, + "learning_rate": 2.940739640576266e-06, + "loss": 0.0635, "step": 47530 }, { - "epoch": 3.5303727907322147, - "grad_norm": 0.5315186977386475, - "learning_rate": 8.817763255606714e-06, - "loss": 0.0641, + "epoch": 7.060745581464429, + "grad_norm": 1.136781096458435, + "learning_rate": 2.9392544185355713e-06, + "loss": 0.0522, "step": 47540 }, { - "epoch": 3.531115401752562, - "grad_norm": 1.4870011806488037, - "learning_rate": 8.813307589484628e-06, - "loss": 0.0726, + "epoch": 7.062230803505124, + "grad_norm": 1.0357050895690918, + "learning_rate": 2.937769196494876e-06, + "loss": 0.0569, "step": 47550 }, { - "epoch": 3.5318580127729096, - "grad_norm": 1.0774098634719849, - "learning_rate": 8.808851923362543e-06, - "loss": 0.0726, + "epoch": 7.063716025545819, + "grad_norm": 0.6880903244018555, + "learning_rate": 2.936283974454181e-06, + "loss": 0.0573, "step": 47560 }, { - "epoch": 3.532600623793257, - "grad_norm": 2.8988194465637207, - "learning_rate": 8.80439625724046e-06, - "loss": 0.0568, + "epoch": 7.065201247586514, + "grad_norm": 0.8587028384208679, + "learning_rate": 2.9347987524134863e-06, + "loss": 0.0617, "step": 47570 }, { - "epoch": 3.5333432348136045, - "grad_norm": 0.4730290472507477, - "learning_rate": 8.799940591118373e-06, - "loss": 0.053, + "epoch": 7.066686469627209, + "grad_norm": 1.4754736423492432, + "learning_rate": 2.933313530372791e-06, + "loss": 0.0606, "step": 47580 }, { - "epoch": 3.5340858458339524, - "grad_norm": 1.4914735555648804, - "learning_rate": 8.795484924996288e-06, - "loss": 0.0601, + "epoch": 7.068171691667905, + "grad_norm": 1.347342610359192, + "learning_rate": 2.931828308332096e-06, + "loss": 0.066, "step": 47590 }, { - "epoch": 3.5348284568543, - "grad_norm": 1.6329556703567505, - "learning_rate": 8.791029258874201e-06, - "loss": 0.0553, + "epoch": 7.0696569137086, + "grad_norm": 1.1508538722991943, + "learning_rate": 2.9303430862914004e-06, + "loss": 0.0544, "step": 47600 }, { - "epoch": 3.5355710678746473, - "grad_norm": 2.3614673614501953, - "learning_rate": 8.786573592752118e-06, - "loss": 0.0744, + "epoch": 7.0711421357492945, + "grad_norm": 1.5262290239334106, + "learning_rate": 2.928857864250706e-06, + "loss": 0.0514, "step": 47610 }, { - "epoch": 3.5363136788949947, - "grad_norm": 2.2033894062042236, - "learning_rate": 8.782117926630031e-06, - "loss": 0.0615, + "epoch": 7.072627357789989, + "grad_norm": 0.5285647511482239, + "learning_rate": 2.9273726422100106e-06, + "loss": 0.0499, "step": 47620 }, { - "epoch": 3.537056289915342, - "grad_norm": 0.42361772060394287, - "learning_rate": 8.777662260507946e-06, - "loss": 0.0494, + "epoch": 7.074112579830684, + "grad_norm": 0.4555477797985077, + "learning_rate": 2.9258874201693154e-06, + "loss": 0.0359, "step": 47630 }, { - "epoch": 3.53779890093569, - "grad_norm": 1.1795815229415894, - "learning_rate": 8.773206594385861e-06, - "loss": 0.0633, + "epoch": 7.07559780187138, + "grad_norm": 0.7044863700866699, + "learning_rate": 2.9244021981286203e-06, + "loss": 0.0638, "step": 47640 }, { - "epoch": 3.5385415119560375, - "grad_norm": 1.4586540460586548, - "learning_rate": 8.768750928263776e-06, - "loss": 0.0525, + "epoch": 7.077083023912075, + "grad_norm": 1.0085748434066772, + "learning_rate": 2.9229169760879256e-06, + "loss": 0.0764, "step": 47650 }, { - "epoch": 3.539284122976385, - "grad_norm": 0.6656326055526733, - "learning_rate": 8.764295262141691e-06, - "loss": 0.0831, + "epoch": 7.07856824595277, + "grad_norm": 1.3109335899353027, + "learning_rate": 2.9214317540472304e-06, + "loss": 0.0607, "step": 47660 }, { - "epoch": 3.5400267339967324, - "grad_norm": 0.5945910215377808, - "learning_rate": 8.759839596019604e-06, - "loss": 0.0444, + "epoch": 7.080053467993465, + "grad_norm": 0.44020575284957886, + "learning_rate": 2.9199465320065353e-06, + "loss": 0.0583, "step": 47670 }, { - "epoch": 3.54076934501708, - "grad_norm": 1.4914181232452393, - "learning_rate": 8.75538392989752e-06, - "loss": 0.0344, + "epoch": 7.08153869003416, + "grad_norm": 0.9954208731651306, + "learning_rate": 2.9184613099658397e-06, + "loss": 0.0534, "step": 47680 }, { - "epoch": 3.5415119560374277, - "grad_norm": 1.5932813882827759, - "learning_rate": 8.750928263775436e-06, - "loss": 0.0624, + "epoch": 7.0830239120748555, + "grad_norm": 0.6965577006340027, + "learning_rate": 2.916976087925145e-06, + "loss": 0.0697, "step": 47690 }, { - "epoch": 3.542254567057775, - "grad_norm": 1.4807531833648682, - "learning_rate": 8.74647259765335e-06, - "loss": 0.0671, + "epoch": 7.08450913411555, + "grad_norm": 1.5124120712280273, + "learning_rate": 2.91549086588445e-06, + "loss": 0.0621, "step": 47700 }, { - "epoch": 3.5429971780781226, - "grad_norm": 3.4547231197357178, - "learning_rate": 8.742016931531264e-06, - "loss": 0.0489, + "epoch": 7.085994356156245, + "grad_norm": 1.4099786281585693, + "learning_rate": 2.9140056438437547e-06, + "loss": 0.0685, "step": 47710 }, { - "epoch": 3.54373978909847, - "grad_norm": 1.1375788450241089, - "learning_rate": 8.73756126540918e-06, - "loss": 0.0641, + "epoch": 7.08747957819694, + "grad_norm": 0.9136437773704529, + "learning_rate": 2.91252042180306e-06, + "loss": 0.063, "step": 47720 }, { - "epoch": 3.5444824001188175, - "grad_norm": 1.481046199798584, - "learning_rate": 8.733105599287094e-06, - "loss": 0.0676, + "epoch": 7.088964800237635, + "grad_norm": 0.8798291087150574, + "learning_rate": 2.911035199762365e-06, + "loss": 0.0601, "step": 47730 }, { - "epoch": 3.5452250111391654, - "grad_norm": 0.4229665994644165, - "learning_rate": 8.728649933165007e-06, - "loss": 0.0647, + "epoch": 7.090450022278331, + "grad_norm": 1.518417239189148, + "learning_rate": 2.9095499777216697e-06, + "loss": 0.049, "step": 47740 }, { - "epoch": 3.545967622159513, - "grad_norm": 1.467894196510315, - "learning_rate": 8.724194267042922e-06, - "loss": 0.0597, + "epoch": 7.091935244319026, + "grad_norm": 0.8989495635032654, + "learning_rate": 2.908064755680974e-06, + "loss": 0.0536, "step": 47750 }, { - "epoch": 3.5467102331798603, - "grad_norm": 0.5830600261688232, - "learning_rate": 8.719738600920839e-06, - "loss": 0.0649, + "epoch": 7.093420466359721, + "grad_norm": 1.1232954263687134, + "learning_rate": 2.90657953364028e-06, + "loss": 0.0645, "step": 47760 }, { - "epoch": 3.547452844200208, - "grad_norm": 1.7029845714569092, - "learning_rate": 8.715282934798752e-06, - "loss": 0.104, + "epoch": 7.094905688400416, + "grad_norm": 1.2198522090911865, + "learning_rate": 2.9050943115995843e-06, + "loss": 0.0608, "step": 47770 }, { - "epoch": 3.5481954552205552, - "grad_norm": 0.429775208234787, - "learning_rate": 8.710827268676667e-06, - "loss": 0.0556, + "epoch": 7.096390910441111, + "grad_norm": 0.6304935812950134, + "learning_rate": 2.903609089558889e-06, + "loss": 0.0599, "step": 47780 }, { - "epoch": 3.548938066240903, - "grad_norm": 2.0149717330932617, - "learning_rate": 8.70637160255458e-06, - "loss": 0.0864, + "epoch": 7.097876132481806, + "grad_norm": 1.277036190032959, + "learning_rate": 2.902123867518194e-06, + "loss": 0.0537, "step": 47790 }, { - "epoch": 3.5496806772612506, - "grad_norm": 3.289201498031616, - "learning_rate": 8.701915936432497e-06, - "loss": 0.077, + "epoch": 7.099361354522501, + "grad_norm": 0.8688114285469055, + "learning_rate": 2.9006386454774993e-06, + "loss": 0.0521, "step": 47800 }, { - "epoch": 3.550423288281598, - "grad_norm": 0.7623452544212341, - "learning_rate": 8.697460270310412e-06, - "loss": 0.063, + "epoch": 7.100846576563196, + "grad_norm": 1.1619441509246826, + "learning_rate": 2.899153423436804e-06, + "loss": 0.0802, "step": 47810 }, { - "epoch": 3.5511658993019455, - "grad_norm": 1.59382164478302, - "learning_rate": 8.693004604188326e-06, - "loss": 0.0783, + "epoch": 7.102331798603891, + "grad_norm": 1.2505648136138916, + "learning_rate": 2.897668201396109e-06, + "loss": 0.0449, "step": 47820 }, { - "epoch": 3.551908510322293, - "grad_norm": 1.199479579925537, - "learning_rate": 8.688548938066242e-06, - "loss": 0.0654, + "epoch": 7.103817020644587, + "grad_norm": 1.1496368646621704, + "learning_rate": 2.8961829793554143e-06, + "loss": 0.0743, "step": 47830 }, { - "epoch": 3.552651121342641, - "grad_norm": 0.7450114488601685, - "learning_rate": 8.684093271944156e-06, - "loss": 0.0612, + "epoch": 7.105302242685282, + "grad_norm": 0.9446761608123779, + "learning_rate": 2.8946977573147187e-06, + "loss": 0.0607, "step": 47840 }, { - "epoch": 3.5533937323629883, - "grad_norm": 2.706019163131714, - "learning_rate": 8.67963760582207e-06, - "loss": 0.0717, + "epoch": 7.1067874647259766, + "grad_norm": 0.35389065742492676, + "learning_rate": 2.8932125352740236e-06, + "loss": 0.0491, "step": 47850 }, { - "epoch": 3.5541363433833357, - "grad_norm": 1.1404179334640503, - "learning_rate": 8.675181939699986e-06, - "loss": 0.0612, + "epoch": 7.1082726867666715, + "grad_norm": 0.4818675220012665, + "learning_rate": 2.8917273132333284e-06, + "loss": 0.041, "step": 47860 }, { - "epoch": 3.5548789544036836, - "grad_norm": 4.893725872039795, - "learning_rate": 8.6707262735779e-06, - "loss": 0.0612, + "epoch": 7.109757908807366, + "grad_norm": 1.4597423076629639, + "learning_rate": 2.8902420911926337e-06, + "loss": 0.0574, "step": 47870 }, { - "epoch": 3.5556215654240306, - "grad_norm": 2.2265067100524902, - "learning_rate": 8.666270607455816e-06, - "loss": 0.0517, + "epoch": 7.111243130848062, + "grad_norm": 1.8125760555267334, + "learning_rate": 2.8887568691519386e-06, + "loss": 0.0627, "step": 47880 }, { - "epoch": 3.5563641764443785, - "grad_norm": 0.6842568516731262, - "learning_rate": 8.661814941333729e-06, - "loss": 0.0562, + "epoch": 7.112728352888757, + "grad_norm": 0.7303992509841919, + "learning_rate": 2.8872716471112434e-06, + "loss": 0.0453, "step": 47890 }, { - "epoch": 3.557106787464726, - "grad_norm": 0.7433666586875916, - "learning_rate": 8.657359275211644e-06, - "loss": 0.0586, + "epoch": 7.114213574929452, + "grad_norm": 0.7685091495513916, + "learning_rate": 2.8857864250705483e-06, + "loss": 0.046, "step": 47900 }, { - "epoch": 3.5578493984850734, - "grad_norm": 1.5881272554397583, - "learning_rate": 8.652903609089559e-06, - "loss": 0.0667, + "epoch": 7.115698796970147, + "grad_norm": 1.3156096935272217, + "learning_rate": 2.8843012030298536e-06, + "loss": 0.0608, "step": 47910 }, { - "epoch": 3.5585920095054213, - "grad_norm": 1.4796943664550781, - "learning_rate": 8.648447942967474e-06, - "loss": 0.0797, + "epoch": 7.117184019010842, + "grad_norm": 0.556989848613739, + "learning_rate": 2.882815980989158e-06, + "loss": 0.0674, "step": 47920 }, { - "epoch": 3.5593346205257688, - "grad_norm": 1.468156337738037, - "learning_rate": 8.643992276845389e-06, - "loss": 0.0655, + "epoch": 7.1186692410515375, + "grad_norm": 0.4582745134830475, + "learning_rate": 2.881330758948463e-06, + "loss": 0.0505, "step": 47930 }, { - "epoch": 3.560077231546116, - "grad_norm": 1.8605856895446777, - "learning_rate": 8.639536610723302e-06, - "loss": 0.0929, + "epoch": 7.120154463092232, + "grad_norm": 0.7635936141014099, + "learning_rate": 2.8798455369077677e-06, + "loss": 0.0583, "step": 47940 }, { - "epoch": 3.5608198425664637, - "grad_norm": 0.9194366335868835, - "learning_rate": 8.635080944601219e-06, - "loss": 0.0498, + "epoch": 7.121639685132927, + "grad_norm": 1.5454378128051758, + "learning_rate": 2.878360314867073e-06, + "loss": 0.0627, "step": 47950 }, { - "epoch": 3.561562453586811, - "grad_norm": 1.5396569967269897, - "learning_rate": 8.630625278479132e-06, - "loss": 0.0706, + "epoch": 7.123124907173622, + "grad_norm": 1.238945484161377, + "learning_rate": 2.876875092826378e-06, + "loss": 0.0666, "step": 47960 }, { - "epoch": 3.562305064607159, - "grad_norm": 2.437840700149536, - "learning_rate": 8.626169612357047e-06, - "loss": 0.0877, + "epoch": 7.124610129214317, + "grad_norm": 0.886013388633728, + "learning_rate": 2.8753898707856827e-06, + "loss": 0.0593, "step": 47970 }, { - "epoch": 3.5630476756275065, - "grad_norm": 0.9188141822814941, - "learning_rate": 8.621713946234964e-06, - "loss": 0.0793, + "epoch": 7.126095351255013, + "grad_norm": 0.6208621263504028, + "learning_rate": 2.873904648744988e-06, + "loss": 0.051, "step": 47980 }, { - "epoch": 3.563790286647854, - "grad_norm": 1.7149858474731445, - "learning_rate": 8.617258280112877e-06, - "loss": 0.0615, + "epoch": 7.127580573295708, + "grad_norm": 0.888614296913147, + "learning_rate": 2.872419426704293e-06, + "loss": 0.0587, "step": 47990 }, { - "epoch": 3.5645328976682014, - "grad_norm": 0.9212315082550049, - "learning_rate": 8.612802613990792e-06, - "loss": 0.0857, + "epoch": 7.129065795336403, + "grad_norm": 0.8305061459541321, + "learning_rate": 2.8709342046635973e-06, + "loss": 0.0458, "step": 48000 }, { - "epoch": 3.565275508688549, - "grad_norm": 1.6391431093215942, - "learning_rate": 8.608346947868706e-06, - "loss": 0.0434, + "epoch": 7.130551017377098, + "grad_norm": 0.8907108306884766, + "learning_rate": 2.869448982622902e-06, + "loss": 0.0377, "step": 48010 }, { - "epoch": 3.5660181197088967, - "grad_norm": 0.9591582417488098, - "learning_rate": 8.603891281746622e-06, - "loss": 0.0665, + "epoch": 7.132036239417793, + "grad_norm": 1.2513445615768433, + "learning_rate": 2.8679637605822074e-06, + "loss": 0.0579, "step": 48020 }, { - "epoch": 3.566760730729244, - "grad_norm": 0.856239914894104, - "learning_rate": 8.599435615624536e-06, - "loss": 0.0416, + "epoch": 7.133521461458488, + "grad_norm": 0.8097134828567505, + "learning_rate": 2.8664785385415123e-06, + "loss": 0.054, "step": 48030 }, { - "epoch": 3.5675033417495916, - "grad_norm": 0.6518556475639343, - "learning_rate": 8.59497994950245e-06, - "loss": 0.0264, + "epoch": 7.135006683499183, + "grad_norm": 0.735083281993866, + "learning_rate": 2.864993316500817e-06, + "loss": 0.0636, "step": 48040 }, { - "epoch": 3.568245952769939, - "grad_norm": 1.1841713190078735, - "learning_rate": 8.590524283380365e-06, - "loss": 0.0806, + "epoch": 7.136491905539878, + "grad_norm": 0.6065291166305542, + "learning_rate": 2.863508094460122e-06, + "loss": 0.0515, "step": 48050 }, { - "epoch": 3.5689885637902865, - "grad_norm": 0.7646443247795105, - "learning_rate": 8.58606861725828e-06, - "loss": 0.0474, + "epoch": 7.137977127580573, + "grad_norm": 0.965104341506958, + "learning_rate": 2.8620228724194273e-06, + "loss": 0.0582, "step": 48060 }, { - "epoch": 3.5697311748106344, - "grad_norm": 2.25919246673584, - "learning_rate": 8.581612951136195e-06, - "loss": 0.0511, + "epoch": 7.139462349621269, + "grad_norm": 1.0279301404953003, + "learning_rate": 2.8605376503787317e-06, + "loss": 0.0504, "step": 48070 }, { - "epoch": 3.570473785830982, - "grad_norm": 2.300975799560547, - "learning_rate": 8.577157285014109e-06, - "loss": 0.0626, + "epoch": 7.140947571661964, + "grad_norm": 0.6441022157669067, + "learning_rate": 2.8590524283380365e-06, + "loss": 0.0627, "step": 48080 }, { - "epoch": 3.5712163968513293, - "grad_norm": 0.9677648544311523, - "learning_rate": 8.572701618892024e-06, - "loss": 0.0506, + "epoch": 7.142432793702659, + "grad_norm": 0.9285979866981506, + "learning_rate": 2.8575672062973414e-06, + "loss": 0.0516, "step": 48090 }, { - "epoch": 3.5719590078716768, - "grad_norm": 2.2813456058502197, - "learning_rate": 8.56824595276994e-06, - "loss": 0.0981, + "epoch": 7.1439180157433535, + "grad_norm": 1.1106793880462646, + "learning_rate": 2.8560819842566467e-06, + "loss": 0.0791, "step": 48100 }, { - "epoch": 3.572701618892024, - "grad_norm": 1.1389786005020142, - "learning_rate": 8.563790286647854e-06, - "loss": 0.045, + "epoch": 7.145403237784048, + "grad_norm": 0.6336337327957153, + "learning_rate": 2.8545967622159515e-06, + "loss": 0.0373, "step": 48110 }, { - "epoch": 3.573444229912372, - "grad_norm": 0.6446773409843445, - "learning_rate": 8.559334620525769e-06, - "loss": 0.0623, + "epoch": 7.146888459824744, + "grad_norm": 0.7305782437324524, + "learning_rate": 2.8531115401752564e-06, + "loss": 0.0568, "step": 48120 }, { - "epoch": 3.5741868409327195, - "grad_norm": 0.8095260858535767, - "learning_rate": 8.554878954403684e-06, - "loss": 0.039, + "epoch": 7.148373681865439, + "grad_norm": 0.8777934908866882, + "learning_rate": 2.8516263181345617e-06, + "loss": 0.0588, "step": 48130 }, { - "epoch": 3.574929451953067, - "grad_norm": 1.6221411228179932, - "learning_rate": 8.550423288281599e-06, - "loss": 0.0335, + "epoch": 7.149858903906134, + "grad_norm": 0.6388712525367737, + "learning_rate": 2.8501410960938665e-06, + "loss": 0.0405, "step": 48140 }, { - "epoch": 3.5756720629734144, - "grad_norm": 2.590031385421753, - "learning_rate": 8.545967622159512e-06, - "loss": 0.0626, + "epoch": 7.151344125946829, + "grad_norm": 0.9644005298614502, + "learning_rate": 2.848655874053171e-06, + "loss": 0.0517, "step": 48150 }, { - "epoch": 3.576414673993762, - "grad_norm": 1.4469174146652222, - "learning_rate": 8.541511956037427e-06, - "loss": 0.0582, + "epoch": 7.152829347987524, + "grad_norm": 0.5722053050994873, + "learning_rate": 2.847170652012476e-06, + "loss": 0.0615, "step": 48160 }, { - "epoch": 3.57715728501411, - "grad_norm": 1.5746777057647705, - "learning_rate": 8.537056289915344e-06, - "loss": 0.0842, + "epoch": 7.15431457002822, + "grad_norm": 0.550632119178772, + "learning_rate": 2.845685429971781e-06, + "loss": 0.0488, "step": 48170 }, { - "epoch": 3.5778998960344572, - "grad_norm": 1.6506327390670776, - "learning_rate": 8.532600623793257e-06, - "loss": 0.0394, + "epoch": 7.1557997920689145, + "grad_norm": 0.8180745244026184, + "learning_rate": 2.844200207931086e-06, + "loss": 0.0569, "step": 48180 }, { - "epoch": 3.5786425070548047, - "grad_norm": 1.0401891469955444, - "learning_rate": 8.528144957671172e-06, - "loss": 0.0653, + "epoch": 7.157285014109609, + "grad_norm": 0.5829613208770752, + "learning_rate": 2.842714985890391e-06, + "loss": 0.0505, "step": 48190 }, { - "epoch": 3.579385118075152, - "grad_norm": 1.626905918121338, - "learning_rate": 8.523689291549085e-06, - "loss": 0.0546, + "epoch": 7.158770236150304, + "grad_norm": 1.0786627531051636, + "learning_rate": 2.8412297638496957e-06, + "loss": 0.0454, "step": 48200 }, { - "epoch": 3.5801277290954996, - "grad_norm": 1.3999053239822388, - "learning_rate": 8.519233625427002e-06, - "loss": 0.0506, + "epoch": 7.160255458190999, + "grad_norm": 1.1173996925354004, + "learning_rate": 2.839744541809001e-06, + "loss": 0.0581, "step": 48210 }, { - "epoch": 3.5808703401158475, - "grad_norm": 1.21640944480896, - "learning_rate": 8.514777959304917e-06, - "loss": 0.0772, + "epoch": 7.161740680231695, + "grad_norm": 1.616842269897461, + "learning_rate": 2.8382593197683054e-06, + "loss": 0.0575, "step": 48220 }, { - "epoch": 3.581612951136195, - "grad_norm": 0.853153645992279, - "learning_rate": 8.51032229318283e-06, - "loss": 0.0399, + "epoch": 7.16322590227239, + "grad_norm": 0.777524471282959, + "learning_rate": 2.8367740977276102e-06, + "loss": 0.0623, "step": 48230 }, { - "epoch": 3.5823555621565424, - "grad_norm": 1.5424240827560425, - "learning_rate": 8.505866627060747e-06, - "loss": 0.0593, + "epoch": 7.164711124313085, + "grad_norm": 1.2010893821716309, + "learning_rate": 2.8352888756869155e-06, + "loss": 0.049, "step": 48240 }, { - "epoch": 3.58309817317689, - "grad_norm": 0.3190561830997467, - "learning_rate": 8.50141096093866e-06, - "loss": 0.0463, + "epoch": 7.16619634635378, + "grad_norm": 0.6864365339279175, + "learning_rate": 2.8338036536462204e-06, + "loss": 0.0428, "step": 48250 }, { - "epoch": 3.5838407841972373, - "grad_norm": 1.145538568496704, - "learning_rate": 8.496955294816575e-06, - "loss": 0.044, + "epoch": 7.167681568394475, + "grad_norm": 0.8801079392433167, + "learning_rate": 2.8323184316055252e-06, + "loss": 0.0466, "step": 48260 }, { - "epoch": 3.584583395217585, - "grad_norm": 1.7267332077026367, - "learning_rate": 8.49249962869449e-06, - "loss": 0.065, + "epoch": 7.16916679043517, + "grad_norm": 1.5361732244491577, + "learning_rate": 2.83083320956483e-06, + "loss": 0.0508, "step": 48270 }, { - "epoch": 3.5853260062379326, - "grad_norm": 0.45990875363349915, - "learning_rate": 8.488043962572405e-06, - "loss": 0.0485, + "epoch": 7.170652012475865, + "grad_norm": 0.971454381942749, + "learning_rate": 2.8293479875241354e-06, + "loss": 0.055, "step": 48280 }, { - "epoch": 3.58606861725828, - "grad_norm": 0.5736406445503235, - "learning_rate": 8.48358829645032e-06, - "loss": 0.0597, + "epoch": 7.17213723451656, + "grad_norm": 0.7517418265342712, + "learning_rate": 2.8278627654834402e-06, + "loss": 0.0566, "step": 48290 }, { - "epoch": 3.5868112282786275, - "grad_norm": 0.34725332260131836, - "learning_rate": 8.479132630328234e-06, - "loss": 0.0634, + "epoch": 7.173622456557255, + "grad_norm": 0.8058810234069824, + "learning_rate": 2.8263775434427447e-06, + "loss": 0.0694, "step": 48300 }, { - "epoch": 3.587553839298975, - "grad_norm": 2.8280141353607178, - "learning_rate": 8.474676964206149e-06, - "loss": 0.0553, + "epoch": 7.17510767859795, + "grad_norm": 1.1214815378189087, + "learning_rate": 2.8248923214020495e-06, + "loss": 0.0699, "step": 48310 }, { - "epoch": 3.588296450319323, - "grad_norm": 0.43756672739982605, - "learning_rate": 8.470221298084064e-06, - "loss": 0.0565, + "epoch": 7.176592900638646, + "grad_norm": 0.5619127750396729, + "learning_rate": 2.823407099361355e-06, + "loss": 0.0687, "step": 48320 }, { - "epoch": 3.5890390613396703, - "grad_norm": 4.297688961029053, - "learning_rate": 8.465765631961979e-06, - "loss": 0.0723, + "epoch": 7.178078122679341, + "grad_norm": 0.9164327383041382, + "learning_rate": 2.8219218773206597e-06, + "loss": 0.0538, "step": 48330 }, { - "epoch": 3.5897816723600178, - "grad_norm": 1.2074358463287354, - "learning_rate": 8.461309965839894e-06, + "epoch": 7.1795633447200355, + "grad_norm": 0.5329567193984985, + "learning_rate": 2.8204366552799645e-06, "loss": 0.0601, "step": 48340 }, { - "epoch": 3.5905242833803652, - "grad_norm": 1.4937044382095337, - "learning_rate": 8.456854299717807e-06, - "loss": 0.0968, + "epoch": 7.1810485667607304, + "grad_norm": 1.0147507190704346, + "learning_rate": 2.8189514332392694e-06, + "loss": 0.0596, "step": 48350 }, { - "epoch": 3.5912668944007127, - "grad_norm": 1.5273339748382568, - "learning_rate": 8.452398633595724e-06, - "loss": 0.0736, + "epoch": 7.182533788801426, + "grad_norm": 0.6977734565734863, + "learning_rate": 2.8174662111985747e-06, + "loss": 0.0612, "step": 48360 }, { - "epoch": 3.5920095054210606, - "grad_norm": 2.0641915798187256, - "learning_rate": 8.447942967473637e-06, - "loss": 0.0649, + "epoch": 7.184019010842121, + "grad_norm": 0.5618019700050354, + "learning_rate": 2.8159809891578795e-06, + "loss": 0.0517, "step": 48370 }, { - "epoch": 3.592752116441408, - "grad_norm": 1.619729995727539, - "learning_rate": 8.443487301351552e-06, - "loss": 0.0689, + "epoch": 7.185504232882816, + "grad_norm": 1.5264066457748413, + "learning_rate": 2.814495767117184e-06, + "loss": 0.0598, "step": 48380 }, { - "epoch": 3.5934947274617555, - "grad_norm": 2.8663530349731445, - "learning_rate": 8.439031635229469e-06, - "loss": 0.0823, + "epoch": 7.186989454923511, + "grad_norm": 0.5172926187515259, + "learning_rate": 2.8130105450764892e-06, + "loss": 0.0589, "step": 48390 }, { - "epoch": 3.594237338482103, - "grad_norm": 0.41677528619766235, - "learning_rate": 8.434575969107382e-06, - "loss": 0.0444, + "epoch": 7.188474676964206, + "grad_norm": 1.296660304069519, + "learning_rate": 2.811525323035794e-06, + "loss": 0.0504, "step": 48400 }, { - "epoch": 3.5949799495024504, - "grad_norm": 1.2534587383270264, - "learning_rate": 8.430120302985297e-06, - "loss": 0.046, + "epoch": 7.189959899004902, + "grad_norm": 1.113176703453064, + "learning_rate": 2.810040100995099e-06, + "loss": 0.0607, "step": 48410 }, { - "epoch": 3.5957225605227983, - "grad_norm": 1.1408332586288452, - "learning_rate": 8.42566463686321e-06, - "loss": 0.0431, + "epoch": 7.1914451210455965, + "grad_norm": 0.9029462933540344, + "learning_rate": 2.808554878954404e-06, + "loss": 0.0646, "step": 48420 }, { - "epoch": 3.5964651715431457, - "grad_norm": 1.304916262626648, - "learning_rate": 8.421208970741127e-06, - "loss": 0.0525, + "epoch": 7.192930343086291, + "grad_norm": 0.4349440336227417, + "learning_rate": 2.807069656913709e-06, + "loss": 0.058, "step": 48430 }, { - "epoch": 3.597207782563493, - "grad_norm": 1.2031378746032715, - "learning_rate": 8.41675330461904e-06, - "loss": 0.0647, + "epoch": 7.194415565126986, + "grad_norm": 1.4266722202301025, + "learning_rate": 2.805584434873014e-06, + "loss": 0.0494, "step": 48440 }, { - "epoch": 3.597950393583841, - "grad_norm": 1.5209531784057617, - "learning_rate": 8.412297638496955e-06, - "loss": 0.0614, + "epoch": 7.195900787167681, + "grad_norm": 0.38038304448127747, + "learning_rate": 2.8040992128323184e-06, + "loss": 0.0596, "step": 48450 }, { - "epoch": 3.598693004604188, - "grad_norm": 1.3445301055908203, - "learning_rate": 8.40784197237487e-06, - "loss": 0.0793, + "epoch": 7.197386009208377, + "grad_norm": 0.9611129760742188, + "learning_rate": 2.8026139907916232e-06, + "loss": 0.0586, "step": 48460 }, { - "epoch": 3.599435615624536, - "grad_norm": 0.48478415608406067, - "learning_rate": 8.403386306252785e-06, - "loss": 0.0781, + "epoch": 7.198871231249072, + "grad_norm": 0.32369136810302734, + "learning_rate": 2.8011287687509285e-06, + "loss": 0.0497, "step": 48470 }, { - "epoch": 3.6001782266448834, - "grad_norm": 0.9039621353149414, - "learning_rate": 8.3989306401307e-06, - "loss": 0.0734, + "epoch": 7.200356453289767, + "grad_norm": 0.7440458536148071, + "learning_rate": 2.7996435467102334e-06, + "loss": 0.0554, "step": 48480 }, { - "epoch": 3.600920837665231, - "grad_norm": 1.992552638053894, - "learning_rate": 8.394474974008613e-06, - "loss": 0.0582, + "epoch": 7.201841675330462, + "grad_norm": 0.6804837584495544, + "learning_rate": 2.7981583246695382e-06, + "loss": 0.0476, "step": 48490 }, { - "epoch": 3.6016634486855788, - "grad_norm": 1.8207603693008423, - "learning_rate": 8.39001930788653e-06, - "loss": 0.0443, + "epoch": 7.203326897371157, + "grad_norm": 0.7779265642166138, + "learning_rate": 2.7966731026288435e-06, + "loss": 0.0624, "step": 48500 }, { - "epoch": 3.602406059705926, - "grad_norm": 1.5029350519180298, - "learning_rate": 8.385563641764445e-06, - "loss": 0.0553, + "epoch": 7.204812119411852, + "grad_norm": 0.5164976716041565, + "learning_rate": 2.7951878805881484e-06, + "loss": 0.0582, "step": 48510 }, { - "epoch": 3.6031486707262737, - "grad_norm": 1.8282309770584106, - "learning_rate": 8.381107975642358e-06, - "loss": 0.0408, + "epoch": 7.206297341452547, + "grad_norm": 1.5991668701171875, + "learning_rate": 2.7937026585474532e-06, + "loss": 0.0647, "step": 48520 }, { - "epoch": 3.603891281746621, - "grad_norm": 1.2360031604766846, - "learning_rate": 8.376652309520273e-06, - "loss": 0.0606, + "epoch": 7.207782563493242, + "grad_norm": 0.7785415649414062, + "learning_rate": 2.7922174365067576e-06, + "loss": 0.0482, "step": 48530 }, { - "epoch": 3.6046338927669686, - "grad_norm": 2.7400107383728027, - "learning_rate": 8.372196643398188e-06, - "loss": 0.0532, + "epoch": 7.209267785533937, + "grad_norm": 0.5820591449737549, + "learning_rate": 2.790732214466063e-06, + "loss": 0.0433, "step": 48540 }, { - "epoch": 3.6053765037873164, - "grad_norm": 1.5960508584976196, - "learning_rate": 8.367740977276103e-06, - "loss": 0.0641, + "epoch": 7.210753007574632, + "grad_norm": 0.9165753126144409, + "learning_rate": 2.7892469924253678e-06, + "loss": 0.0415, "step": 48550 }, { - "epoch": 3.606119114807664, - "grad_norm": 1.4296334981918335, - "learning_rate": 8.363285311154018e-06, - "loss": 0.0749, + "epoch": 7.212238229615328, + "grad_norm": 0.5261610746383667, + "learning_rate": 2.7877617703846726e-06, + "loss": 0.0451, "step": 48560 }, { - "epoch": 3.6068617258280113, - "grad_norm": 2.3336386680603027, - "learning_rate": 8.358829645031932e-06, - "loss": 0.0739, + "epoch": 7.213723451656023, + "grad_norm": 1.694600224494934, + "learning_rate": 2.7862765483439775e-06, + "loss": 0.069, "step": 48570 }, { - "epoch": 3.607604336848359, - "grad_norm": 1.2750415802001953, - "learning_rate": 8.354373978909848e-06, - "loss": 0.0661, + "epoch": 7.215208673696718, + "grad_norm": 1.0975875854492188, + "learning_rate": 2.7847913263032828e-06, + "loss": 0.0409, "step": 48580 }, { - "epoch": 3.6083469478687062, - "grad_norm": 1.1788580417633057, - "learning_rate": 8.349918312787762e-06, - "loss": 0.0629, + "epoch": 7.2166938957374125, + "grad_norm": 1.1013407707214355, + "learning_rate": 2.7833061042625876e-06, + "loss": 0.0654, "step": 48590 }, { - "epoch": 3.609089558889054, - "grad_norm": 1.36868155002594, - "learning_rate": 8.345462646665677e-06, - "loss": 0.0625, + "epoch": 7.218179117778108, + "grad_norm": 1.3050427436828613, + "learning_rate": 2.7818208822218925e-06, + "loss": 0.052, "step": 48600 }, { - "epoch": 3.6098321699094016, - "grad_norm": 1.373689889907837, - "learning_rate": 8.34100698054359e-06, - "loss": 0.0562, + "epoch": 7.219664339818803, + "grad_norm": 0.5271753668785095, + "learning_rate": 2.780335660181197e-06, + "loss": 0.045, "step": 48610 }, { - "epoch": 3.610574780929749, - "grad_norm": 2.2986576557159424, - "learning_rate": 8.336551314421507e-06, - "loss": 0.0644, + "epoch": 7.221149561859498, + "grad_norm": 0.8320847749710083, + "learning_rate": 2.778850438140502e-06, + "loss": 0.0574, "step": 48620 }, { - "epoch": 3.6113173919500965, - "grad_norm": 0.9667069911956787, - "learning_rate": 8.332095648299422e-06, - "loss": 0.0964, + "epoch": 7.222634783900193, + "grad_norm": 0.9930746555328369, + "learning_rate": 2.777365216099807e-06, + "loss": 0.057, "step": 48630 }, { - "epoch": 3.612060002970444, - "grad_norm": 1.5480561256408691, - "learning_rate": 8.327639982177335e-06, - "loss": 0.0456, + "epoch": 7.224120005940888, + "grad_norm": 0.9369096159934998, + "learning_rate": 2.775879994059112e-06, + "loss": 0.0664, "step": 48640 }, { - "epoch": 3.612802613990792, - "grad_norm": 2.361801862716675, - "learning_rate": 8.323184316055252e-06, - "loss": 0.0599, + "epoch": 7.225605227981584, + "grad_norm": 1.8006670475006104, + "learning_rate": 2.774394772018417e-06, + "loss": 0.0698, "step": 48650 }, { - "epoch": 3.6135452250111393, - "grad_norm": 0.5613052845001221, - "learning_rate": 8.318728649933165e-06, - "loss": 0.0428, + "epoch": 7.227090450022279, + "grad_norm": 0.9909857511520386, + "learning_rate": 2.772909549977722e-06, + "loss": 0.0613, "step": 48660 }, { - "epoch": 3.6142878360314867, - "grad_norm": 2.242196559906006, - "learning_rate": 8.31427298381108e-06, - "loss": 0.0627, + "epoch": 7.2285756720629735, + "grad_norm": 0.6781131625175476, + "learning_rate": 2.771424327937027e-06, + "loss": 0.0426, "step": 48670 }, { - "epoch": 3.615030447051834, - "grad_norm": 0.8085373640060425, - "learning_rate": 8.309817317688995e-06, - "loss": 0.0531, + "epoch": 7.230060894103668, + "grad_norm": 0.7474071383476257, + "learning_rate": 2.7699391058963313e-06, + "loss": 0.0464, "step": 48680 }, { - "epoch": 3.6157730580721816, - "grad_norm": 1.4560467004776, - "learning_rate": 8.30536165156691e-06, - "loss": 0.054, + "epoch": 7.231546116144363, + "grad_norm": 0.791830837726593, + "learning_rate": 2.768453883855637e-06, + "loss": 0.0695, "step": 48690 }, { - "epoch": 3.6165156690925295, - "grad_norm": 1.3971903324127197, - "learning_rate": 8.300905985444825e-06, - "loss": 0.039, + "epoch": 7.233031338185059, + "grad_norm": 1.9448063373565674, + "learning_rate": 2.7669686618149415e-06, + "loss": 0.0523, "step": 48700 }, { - "epoch": 3.617258280112877, - "grad_norm": 1.9902868270874023, - "learning_rate": 8.296450319322738e-06, - "loss": 0.0625, + "epoch": 7.234516560225754, + "grad_norm": 0.4355827867984772, + "learning_rate": 2.7654834397742463e-06, + "loss": 0.0551, "step": 48710 }, { - "epoch": 3.6180008911332244, - "grad_norm": 2.5948617458343506, - "learning_rate": 8.291994653200653e-06, - "loss": 0.0437, + "epoch": 7.236001782266449, + "grad_norm": 0.8302456140518188, + "learning_rate": 2.763998217733551e-06, + "loss": 0.0597, "step": 48720 }, { - "epoch": 3.618743502153572, - "grad_norm": 1.1913443803787231, - "learning_rate": 8.287538987078568e-06, - "loss": 0.0699, + "epoch": 7.237487004307144, + "grad_norm": 0.36988481879234314, + "learning_rate": 2.7625129956928565e-06, + "loss": 0.0431, "step": 48730 }, { - "epoch": 3.6194861131739193, - "grad_norm": 1.5906063318252563, - "learning_rate": 8.283083320956483e-06, - "loss": 0.0839, + "epoch": 7.238972226347839, + "grad_norm": 0.8383266925811768, + "learning_rate": 2.7610277736521613e-06, + "loss": 0.0663, "step": 48740 }, { - "epoch": 3.6202287241942672, - "grad_norm": 1.980405569076538, - "learning_rate": 8.278627654834398e-06, - "loss": 0.0694, + "epoch": 7.2404574483885344, + "grad_norm": 0.5984920263290405, + "learning_rate": 2.759542551611466e-06, + "loss": 0.0577, "step": 48750 }, { - "epoch": 3.6209713352146147, - "grad_norm": 0.5400698781013489, - "learning_rate": 8.274171988712313e-06, - "loss": 0.065, + "epoch": 7.241942670429229, + "grad_norm": 1.1798092126846313, + "learning_rate": 2.7580573295707715e-06, + "loss": 0.0457, "step": 48760 }, { - "epoch": 3.621713946234962, - "grad_norm": 2.1848058700561523, - "learning_rate": 8.269716322590228e-06, - "loss": 0.0694, + "epoch": 7.243427892469924, + "grad_norm": 1.5372233390808105, + "learning_rate": 2.756572107530076e-06, + "loss": 0.0417, "step": 48770 }, { - "epoch": 3.6224565572553096, - "grad_norm": 1.3808951377868652, - "learning_rate": 8.265260656468141e-06, - "loss": 0.0496, + "epoch": 7.244913114510619, + "grad_norm": 0.7862002849578857, + "learning_rate": 2.7550868854893808e-06, + "loss": 0.051, "step": 48780 }, { - "epoch": 3.623199168275657, - "grad_norm": 1.0675551891326904, - "learning_rate": 8.260804990346056e-06, - "loss": 0.0807, + "epoch": 7.246398336551314, + "grad_norm": 1.1187494993209839, + "learning_rate": 2.7536016634486856e-06, + "loss": 0.0469, "step": 48790 }, { - "epoch": 3.623941779296005, - "grad_norm": 0.58694988489151, - "learning_rate": 8.256349324223973e-06, - "loss": 0.0794, + "epoch": 7.24788355859201, + "grad_norm": 1.2784112691879272, + "learning_rate": 2.752116441407991e-06, + "loss": 0.0543, "step": 48800 }, { - "epoch": 3.6246843903163524, - "grad_norm": 1.7244460582733154, - "learning_rate": 8.251893658101886e-06, - "loss": 0.0631, + "epoch": 7.249368780632705, + "grad_norm": 0.7661743760108948, + "learning_rate": 2.7506312193672958e-06, + "loss": 0.0743, "step": 48810 }, { - "epoch": 3.6254270013367, - "grad_norm": 0.8975712060928345, - "learning_rate": 8.247437991979801e-06, - "loss": 0.0395, + "epoch": 7.2508540026734, + "grad_norm": 1.1764309406280518, + "learning_rate": 2.7491459973266006e-06, + "loss": 0.0626, "step": 48820 }, { - "epoch": 3.6261696123570473, - "grad_norm": 0.6970779299736023, - "learning_rate": 8.242982325857715e-06, - "loss": 0.0548, + "epoch": 7.2523392247140945, + "grad_norm": 0.6383646726608276, + "learning_rate": 2.747660775285905e-06, + "loss": 0.059, "step": 48830 }, { - "epoch": 3.6269122233773947, - "grad_norm": 3.2898783683776855, - "learning_rate": 8.238526659735631e-06, - "loss": 0.0594, + "epoch": 7.253824446754789, + "grad_norm": 1.1246609687805176, + "learning_rate": 2.7461755532452108e-06, + "loss": 0.0557, "step": 48840 }, { - "epoch": 3.6276548343977426, - "grad_norm": 1.9671893119812012, - "learning_rate": 8.234070993613545e-06, - "loss": 0.0702, + "epoch": 7.255309668795485, + "grad_norm": 0.9652683734893799, + "learning_rate": 2.744690331204515e-06, + "loss": 0.0635, "step": 48850 }, { - "epoch": 3.62839744541809, - "grad_norm": 0.6699275970458984, - "learning_rate": 8.22961532749146e-06, - "loss": 0.0674, + "epoch": 7.25679489083618, + "grad_norm": 0.890220046043396, + "learning_rate": 2.74320510916382e-06, + "loss": 0.0586, "step": 48860 }, { - "epoch": 3.6291400564384375, - "grad_norm": 1.7382103204727173, - "learning_rate": 8.225159661369375e-06, - "loss": 0.0578, + "epoch": 7.258280112876875, + "grad_norm": 0.9741783738136292, + "learning_rate": 2.741719887123125e-06, + "loss": 0.05, "step": 48870 }, { - "epoch": 3.629882667458785, - "grad_norm": 3.2885706424713135, - "learning_rate": 8.22070399524729e-06, - "loss": 0.0565, + "epoch": 7.25976533491757, + "grad_norm": 0.8578696250915527, + "learning_rate": 2.74023466508243e-06, + "loss": 0.0471, "step": 48880 }, { - "epoch": 3.6306252784791324, - "grad_norm": 1.1703078746795654, - "learning_rate": 8.216248329125205e-06, - "loss": 0.0696, + "epoch": 7.261250556958265, + "grad_norm": 0.3909326195716858, + "learning_rate": 2.738749443041735e-06, + "loss": 0.0516, "step": 48890 }, { - "epoch": 3.6313678894994803, - "grad_norm": 0.3696410059928894, - "learning_rate": 8.211792663003118e-06, - "loss": 0.0693, + "epoch": 7.262735778998961, + "grad_norm": 0.4995374083518982, + "learning_rate": 2.73726422100104e-06, + "loss": 0.0494, "step": 48900 }, { - "epoch": 3.6321105005198278, - "grad_norm": 2.525710105895996, - "learning_rate": 8.207336996881035e-06, - "loss": 0.0648, + "epoch": 7.2642210010396555, + "grad_norm": 1.06190824508667, + "learning_rate": 2.735778998960345e-06, + "loss": 0.0531, "step": 48910 }, { - "epoch": 3.632853111540175, - "grad_norm": 1.2038122415542603, - "learning_rate": 8.20288133075895e-06, - "loss": 0.0612, + "epoch": 7.26570622308035, + "grad_norm": 0.5144338607788086, + "learning_rate": 2.7342937769196496e-06, + "loss": 0.0656, "step": 48920 }, { - "epoch": 3.6335957225605227, - "grad_norm": 2.1405258178710938, - "learning_rate": 8.198425664636863e-06, - "loss": 0.0607, + "epoch": 7.267191445121045, + "grad_norm": 0.649617612361908, + "learning_rate": 2.7328085548789545e-06, + "loss": 0.066, "step": 48930 }, { - "epoch": 3.63433833358087, - "grad_norm": 1.3878464698791504, - "learning_rate": 8.193969998514778e-06, - "loss": 0.0556, + "epoch": 7.26867666716174, + "grad_norm": 1.098474383354187, + "learning_rate": 2.7313233328382593e-06, + "loss": 0.045, "step": 48940 }, { - "epoch": 3.635080944601218, - "grad_norm": 0.8806987404823303, - "learning_rate": 8.189514332392693e-06, - "loss": 0.0496, + "epoch": 7.270161889202436, + "grad_norm": 0.9607246518135071, + "learning_rate": 2.7298381107975646e-06, + "loss": 0.049, "step": 48950 }, { - "epoch": 3.6358235556215655, - "grad_norm": 0.7001392841339111, - "learning_rate": 8.185058666270608e-06, - "loss": 0.0623, + "epoch": 7.271647111243131, + "grad_norm": 0.7913243770599365, + "learning_rate": 2.7283528887568695e-06, + "loss": 0.0401, "step": 48960 }, { - "epoch": 3.636566166641913, - "grad_norm": 1.2697521448135376, - "learning_rate": 8.180603000148523e-06, - "loss": 0.0804, + "epoch": 7.273132333283826, + "grad_norm": 1.0159389972686768, + "learning_rate": 2.7268676667161743e-06, + "loss": 0.0648, "step": 48970 }, { - "epoch": 3.6373087776622604, - "grad_norm": 0.7916688323020935, - "learning_rate": 8.176147334026436e-06, - "loss": 0.0562, + "epoch": 7.274617555324521, + "grad_norm": 0.6008278131484985, + "learning_rate": 2.725382444675479e-06, + "loss": 0.0445, "step": 48980 }, { - "epoch": 3.638051388682608, - "grad_norm": 1.4177652597427368, - "learning_rate": 8.171691667904353e-06, - "loss": 0.0454, + "epoch": 7.2761027773652165, + "grad_norm": 0.5866870880126953, + "learning_rate": 2.7238972226347845e-06, + "loss": 0.063, "step": 48990 }, { - "epoch": 3.6387939997029557, - "grad_norm": 2.105250120162964, - "learning_rate": 8.167236001782266e-06, - "loss": 0.0614, + "epoch": 7.277587999405911, + "grad_norm": 0.5861631035804749, + "learning_rate": 2.722412000594089e-06, + "loss": 0.0534, "step": 49000 }, { - "epoch": 3.639536610723303, - "grad_norm": 1.3366777896881104, - "learning_rate": 8.162780335660181e-06, - "loss": 0.0743, + "epoch": 7.279073221446606, + "grad_norm": 2.045478582382202, + "learning_rate": 2.7209267785533937e-06, + "loss": 0.0855, "step": 49010 }, { - "epoch": 3.6402792217436506, - "grad_norm": 0.6253573298454285, - "learning_rate": 8.158324669538096e-06, - "loss": 0.0649, + "epoch": 7.280558443487301, + "grad_norm": 0.6939730644226074, + "learning_rate": 2.719441556512699e-06, + "loss": 0.0627, "step": 49020 }, { - "epoch": 3.6410218327639985, - "grad_norm": 0.4928571879863739, - "learning_rate": 8.153869003416011e-06, - "loss": 0.069, + "epoch": 7.282043665527996, + "grad_norm": 0.5861126780509949, + "learning_rate": 2.717956334472004e-06, + "loss": 0.049, "step": 49030 }, { - "epoch": 3.6417644437843455, - "grad_norm": 0.9008927345275879, - "learning_rate": 8.149413337293926e-06, - "loss": 0.0508, + "epoch": 7.283528887568692, + "grad_norm": 1.1887887716293335, + "learning_rate": 2.7164711124313087e-06, + "loss": 0.0506, "step": 49040 }, { - "epoch": 3.6425070548046934, - "grad_norm": 0.6431163549423218, - "learning_rate": 8.14495767117184e-06, - "loss": 0.0758, + "epoch": 7.285014109609387, + "grad_norm": 0.3314265310764313, + "learning_rate": 2.7149858903906136e-06, + "loss": 0.0403, "step": 49050 }, { - "epoch": 3.643249665825041, - "grad_norm": 3.3478305339813232, - "learning_rate": 8.140502005049756e-06, - "loss": 0.0692, + "epoch": 7.286499331650082, + "grad_norm": 1.404929518699646, + "learning_rate": 2.713500668349919e-06, + "loss": 0.0569, "step": 49060 }, { - "epoch": 3.6439922768453883, - "grad_norm": 1.4182404279708862, - "learning_rate": 8.13604633892767e-06, - "loss": 0.0813, + "epoch": 7.287984553690777, + "grad_norm": 0.8105013370513916, + "learning_rate": 2.7120154463092237e-06, + "loss": 0.0618, "step": 49070 }, { - "epoch": 3.644734887865736, - "grad_norm": 0.8223309516906738, - "learning_rate": 8.131590672805584e-06, - "loss": 0.0589, + "epoch": 7.2894697757314715, + "grad_norm": 0.7986222505569458, + "learning_rate": 2.710530224268528e-06, + "loss": 0.0477, "step": 49080 }, { - "epoch": 3.6454774988860836, - "grad_norm": 3.398197889328003, - "learning_rate": 8.1271350066835e-06, - "loss": 0.049, + "epoch": 7.290954997772167, + "grad_norm": 1.259995460510254, + "learning_rate": 2.709045002227833e-06, + "loss": 0.0533, "step": 49090 }, { - "epoch": 3.646220109906431, - "grad_norm": 0.7745069861412048, - "learning_rate": 8.122679340561414e-06, - "loss": 0.0341, + "epoch": 7.292440219812862, + "grad_norm": 0.9592974781990051, + "learning_rate": 2.7075597801871383e-06, + "loss": 0.0497, "step": 49100 }, { - "epoch": 3.6469627209267785, - "grad_norm": 1.4048975706100464, - "learning_rate": 8.11822367443933e-06, - "loss": 0.0626, + "epoch": 7.293925441853557, + "grad_norm": 0.5274348258972168, + "learning_rate": 2.706074558146443e-06, + "loss": 0.0425, "step": 49110 }, { - "epoch": 3.647705331947126, - "grad_norm": 1.0643982887268066, - "learning_rate": 8.113768008317243e-06, - "loss": 0.0543, + "epoch": 7.295410663894252, + "grad_norm": 0.26715418696403503, + "learning_rate": 2.704589336105748e-06, + "loss": 0.0707, "step": 49120 }, { - "epoch": 3.648447942967474, - "grad_norm": 2.2764272689819336, - "learning_rate": 8.109312342195158e-06, - "loss": 0.0827, + "epoch": 7.296895885934947, + "grad_norm": 1.076434850692749, + "learning_rate": 2.703104114065053e-06, + "loss": 0.0803, "step": 49130 }, { - "epoch": 3.6491905539878213, - "grad_norm": 1.6373050212860107, - "learning_rate": 8.104856676073073e-06, - "loss": 0.0593, + "epoch": 7.298381107975643, + "grad_norm": 0.31761565804481506, + "learning_rate": 2.701618892024358e-06, + "loss": 0.0465, "step": 49140 }, { - "epoch": 3.649933165008169, - "grad_norm": 0.9760125875473022, - "learning_rate": 8.100401009950988e-06, - "loss": 0.0482, + "epoch": 7.299866330016338, + "grad_norm": 0.6720213294029236, + "learning_rate": 2.7001336699836626e-06, + "loss": 0.0476, "step": 49150 }, { - "epoch": 3.6506757760285162, - "grad_norm": 0.3532949388027191, - "learning_rate": 8.095945343828903e-06, - "loss": 0.0667, + "epoch": 7.3013515520570325, + "grad_norm": 1.105984091758728, + "learning_rate": 2.6986484479429674e-06, + "loss": 0.0452, "step": 49160 }, { - "epoch": 3.6514183870488637, - "grad_norm": 1.1069635152816772, - "learning_rate": 8.091489677706818e-06, - "loss": 0.054, + "epoch": 7.302836774097727, + "grad_norm": 1.1170014142990112, + "learning_rate": 2.6971632259022727e-06, + "loss": 0.057, "step": 49170 }, { - "epoch": 3.6521609980692116, - "grad_norm": 2.3868649005889893, - "learning_rate": 8.087034011584733e-06, - "loss": 0.0543, + "epoch": 7.304321996138423, + "grad_norm": 1.4183934926986694, + "learning_rate": 2.6956780038615776e-06, + "loss": 0.0737, "step": 49180 }, { - "epoch": 3.652903609089559, - "grad_norm": 2.724820375442505, - "learning_rate": 8.082578345462646e-06, - "loss": 0.0669, + "epoch": 7.305807218179118, + "grad_norm": 0.9773399829864502, + "learning_rate": 2.6941927818208824e-06, + "loss": 0.0402, "step": 49190 }, { - "epoch": 3.6536462201099065, - "grad_norm": 1.579540491104126, - "learning_rate": 8.078122679340561e-06, - "loss": 0.0574, + "epoch": 7.307292440219813, + "grad_norm": 0.5806409120559692, + "learning_rate": 2.6927075597801873e-06, + "loss": 0.04, "step": 49200 }, { - "epoch": 3.654388831130254, - "grad_norm": 1.0479410886764526, - "learning_rate": 8.073667013218478e-06, - "loss": 0.0563, + "epoch": 7.308777662260508, + "grad_norm": 0.7879276275634766, + "learning_rate": 2.6912223377394926e-06, + "loss": 0.0489, "step": 49210 }, { - "epoch": 3.6551314421506014, - "grad_norm": 0.6656275391578674, - "learning_rate": 8.069211347096391e-06, - "loss": 0.0449, + "epoch": 7.310262884301203, + "grad_norm": 0.8717116713523865, + "learning_rate": 2.6897371156987974e-06, + "loss": 0.0683, "step": 49220 }, { - "epoch": 3.6558740531709493, - "grad_norm": 0.6518800258636475, - "learning_rate": 8.064755680974306e-06, - "loss": 0.0884, + "epoch": 7.3117481063418985, + "grad_norm": 0.9644998908042908, + "learning_rate": 2.688251893658102e-06, + "loss": 0.0598, "step": 49230 }, { - "epoch": 3.6566166641912967, - "grad_norm": 0.2461584061384201, - "learning_rate": 8.06030001485222e-06, - "loss": 0.0426, + "epoch": 7.313233328382593, + "grad_norm": 0.4911693334579468, + "learning_rate": 2.6867666716174067e-06, + "loss": 0.0743, "step": 49240 }, { - "epoch": 3.657359275211644, - "grad_norm": 1.2033755779266357, - "learning_rate": 8.055844348730136e-06, - "loss": 0.052, + "epoch": 7.314718550423288, + "grad_norm": 1.0501363277435303, + "learning_rate": 2.685281449576712e-06, + "loss": 0.0755, "step": 49250 }, { - "epoch": 3.6581018862319916, - "grad_norm": 2.62395977973938, - "learning_rate": 8.051388682608051e-06, - "loss": 0.1079, + "epoch": 7.316203772463983, + "grad_norm": 0.6555094718933105, + "learning_rate": 2.683796227536017e-06, + "loss": 0.0685, "step": 49260 }, { - "epoch": 3.658844497252339, - "grad_norm": 0.4178759455680847, - "learning_rate": 8.046933016485964e-06, - "loss": 0.0778, + "epoch": 7.317688994504678, + "grad_norm": 1.0857270956039429, + "learning_rate": 2.6823110054953217e-06, + "loss": 0.0617, "step": 49270 }, { - "epoch": 3.659587108272687, - "grad_norm": 1.5172581672668457, - "learning_rate": 8.042477350363881e-06, - "loss": 0.1025, + "epoch": 7.319174216545374, + "grad_norm": 1.108014702796936, + "learning_rate": 2.680825783454627e-06, + "loss": 0.0573, "step": 49280 }, { - "epoch": 3.6603297192930344, - "grad_norm": 2.6945109367370605, - "learning_rate": 8.038021684241794e-06, - "loss": 0.0959, + "epoch": 7.320659438586069, + "grad_norm": 0.713036835193634, + "learning_rate": 2.679340561413932e-06, + "loss": 0.0479, "step": 49290 }, { - "epoch": 3.661072330313382, - "grad_norm": 1.2761311531066895, - "learning_rate": 8.03356601811971e-06, - "loss": 0.0477, + "epoch": 7.322144660626764, + "grad_norm": 0.8778752088546753, + "learning_rate": 2.6778553393732363e-06, + "loss": 0.0429, "step": 49300 }, { - "epoch": 3.6618149413337293, - "grad_norm": 0.8883626461029053, - "learning_rate": 8.029110351997623e-06, - "loss": 0.0615, + "epoch": 7.323629882667459, + "grad_norm": 0.5056967735290527, + "learning_rate": 2.676370117332541e-06, + "loss": 0.0466, "step": 49310 }, { - "epoch": 3.6625575523540768, - "grad_norm": 0.7210109829902649, - "learning_rate": 8.02465468587554e-06, - "loss": 0.0618, + "epoch": 7.3251151047081535, + "grad_norm": 1.1081126928329468, + "learning_rate": 2.6748848952918464e-06, + "loss": 0.0545, "step": 49320 }, { - "epoch": 3.6633001633744247, - "grad_norm": 1.4874513149261475, - "learning_rate": 8.020199019753454e-06, - "loss": 0.0522, + "epoch": 7.326600326748849, + "grad_norm": 0.40196236968040466, + "learning_rate": 2.6733996732511513e-06, + "loss": 0.0563, "step": 49330 }, { - "epoch": 3.664042774394772, - "grad_norm": 0.9390859603881836, - "learning_rate": 8.015743353631368e-06, - "loss": 0.0567, + "epoch": 7.328085548789544, + "grad_norm": 1.0329406261444092, + "learning_rate": 2.671914451210456e-06, + "loss": 0.0597, "step": 49340 }, { - "epoch": 3.6647853854151196, - "grad_norm": 0.896518349647522, - "learning_rate": 8.011287687509283e-06, - "loss": 0.0589, + "epoch": 7.329570770830239, + "grad_norm": 1.0409470796585083, + "learning_rate": 2.670429229169761e-06, + "loss": 0.056, "step": 49350 }, { - "epoch": 3.665527996435467, - "grad_norm": 2.0531513690948486, - "learning_rate": 8.006832021387198e-06, - "loss": 0.0871, + "epoch": 7.331055992870934, + "grad_norm": 1.3227382898330688, + "learning_rate": 2.6689440071290663e-06, + "loss": 0.0516, "step": 49360 }, { - "epoch": 3.6662706074558145, - "grad_norm": 1.1506352424621582, - "learning_rate": 8.002376355265113e-06, - "loss": 0.068, + "epoch": 7.332541214911629, + "grad_norm": 0.9763649702072144, + "learning_rate": 2.667458785088371e-06, + "loss": 0.0756, "step": 49370 }, { - "epoch": 3.6670132184761624, - "grad_norm": 0.7232264280319214, - "learning_rate": 7.997920689143028e-06, - "loss": 0.0859, + "epoch": 7.334026436952325, + "grad_norm": 0.8181225657463074, + "learning_rate": 2.6659735630476756e-06, + "loss": 0.0639, "step": 49380 }, { - "epoch": 3.66775582949651, - "grad_norm": 2.220487594604492, - "learning_rate": 7.99346502302094e-06, - "loss": 0.0333, + "epoch": 7.33551165899302, + "grad_norm": 1.3974295854568481, + "learning_rate": 2.6644883410069804e-06, + "loss": 0.0633, "step": 49390 }, { - "epoch": 3.6684984405168573, - "grad_norm": 0.4155381917953491, - "learning_rate": 7.989009356898858e-06, - "loss": 0.0488, + "epoch": 7.3369968810337145, + "grad_norm": 0.905113935470581, + "learning_rate": 2.6630031189662857e-06, + "loss": 0.0689, "step": 49400 }, { - "epoch": 3.6692410515372047, - "grad_norm": 0.726426362991333, - "learning_rate": 7.98455369077677e-06, - "loss": 0.0712, + "epoch": 7.338482103074409, + "grad_norm": 0.6330129504203796, + "learning_rate": 2.6615178969255906e-06, + "loss": 0.0512, "step": 49410 }, { - "epoch": 3.669983662557552, - "grad_norm": 1.152137041091919, - "learning_rate": 7.980098024654686e-06, - "loss": 0.0479, + "epoch": 7.339967325115104, + "grad_norm": 0.45538121461868286, + "learning_rate": 2.6600326748848954e-06, + "loss": 0.0585, "step": 49420 }, { - "epoch": 3.6707262735779, - "grad_norm": 0.6122065782546997, - "learning_rate": 7.9756423585326e-06, - "loss": 0.0538, + "epoch": 7.3414525471558, + "grad_norm": 1.3823957443237305, + "learning_rate": 2.6585474528442007e-06, + "loss": 0.0502, "step": 49430 }, { - "epoch": 3.6714688845982475, - "grad_norm": 1.2292253971099854, - "learning_rate": 7.971186692410516e-06, - "loss": 0.046, + "epoch": 7.342937769196495, + "grad_norm": 1.288536548614502, + "learning_rate": 2.6570622308035056e-06, + "loss": 0.0524, "step": 49440 }, { - "epoch": 3.672211495618595, - "grad_norm": 0.447689950466156, - "learning_rate": 7.96673102628843e-06, - "loss": 0.0483, + "epoch": 7.34442299123719, + "grad_norm": 0.4768843948841095, + "learning_rate": 2.6555770087628104e-06, + "loss": 0.0462, "step": 49450 }, { - "epoch": 3.6729541066389424, - "grad_norm": 0.4693281352519989, - "learning_rate": 7.962275360166344e-06, - "loss": 0.0252, + "epoch": 7.345908213277885, + "grad_norm": 0.8534879088401794, + "learning_rate": 2.654091786722115e-06, + "loss": 0.0392, "step": 49460 }, { - "epoch": 3.67369671765929, - "grad_norm": 1.050255298614502, - "learning_rate": 7.95781969404426e-06, - "loss": 0.0934, + "epoch": 7.34739343531858, + "grad_norm": 1.0308243036270142, + "learning_rate": 2.65260656468142e-06, + "loss": 0.046, "step": 49470 }, { - "epoch": 3.6744393286796377, - "grad_norm": 1.7105246782302856, - "learning_rate": 7.953364027922174e-06, - "loss": 0.0491, + "epoch": 7.3488786573592755, + "grad_norm": 0.5795043110847473, + "learning_rate": 2.651121342640725e-06, + "loss": 0.0486, "step": 49480 }, { - "epoch": 3.675181939699985, - "grad_norm": 1.1339107751846313, - "learning_rate": 7.948908361800089e-06, - "loss": 0.0531, + "epoch": 7.35036387939997, + "grad_norm": 0.5937952995300293, + "learning_rate": 2.64963612060003e-06, + "loss": 0.0654, "step": 49490 }, { - "epoch": 3.6759245507203326, - "grad_norm": 2.340886116027832, - "learning_rate": 7.944452695678004e-06, - "loss": 0.0547, + "epoch": 7.351849101440665, + "grad_norm": 0.793193519115448, + "learning_rate": 2.6481508985593347e-06, + "loss": 0.0508, "step": 49500 }, { - "epoch": 3.67666716174068, - "grad_norm": 0.6771245002746582, - "learning_rate": 7.939997029555919e-06, - "loss": 0.0726, + "epoch": 7.35333432348136, + "grad_norm": 1.429418921470642, + "learning_rate": 2.64666567651864e-06, + "loss": 0.0566, "step": 49510 }, { - "epoch": 3.6774097727610275, - "grad_norm": 1.6597684621810913, - "learning_rate": 7.935541363433834e-06, - "loss": 0.0894, + "epoch": 7.354819545522055, + "grad_norm": 0.6977571845054626, + "learning_rate": 2.645180454477945e-06, + "loss": 0.0517, "step": 49520 }, { - "epoch": 3.6781523837813754, - "grad_norm": 2.375394821166992, - "learning_rate": 7.931085697311747e-06, - "loss": 0.0645, + "epoch": 7.356304767562751, + "grad_norm": 0.6724829077720642, + "learning_rate": 2.6436952324372493e-06, + "loss": 0.0688, "step": 49530 }, { - "epoch": 3.678894994801723, - "grad_norm": 1.4455935955047607, - "learning_rate": 7.926630031189662e-06, - "loss": 0.06, + "epoch": 7.357789989603446, + "grad_norm": 0.5872288346290588, + "learning_rate": 2.642210010396554e-06, + "loss": 0.0661, "step": 49540 }, { - "epoch": 3.6796376058220703, - "grad_norm": 0.4095980226993561, - "learning_rate": 7.922174365067577e-06, - "loss": 0.0667, + "epoch": 7.359275211644141, + "grad_norm": 0.7089404463768005, + "learning_rate": 2.6407247883558594e-06, + "loss": 0.0604, "step": 49550 }, { - "epoch": 3.680380216842418, - "grad_norm": 0.5983967781066895, - "learning_rate": 7.917718698945492e-06, - "loss": 0.0564, + "epoch": 7.360760433684836, + "grad_norm": 1.2058686017990112, + "learning_rate": 2.6392395663151643e-06, + "loss": 0.0555, "step": 49560 }, { - "epoch": 3.6811228278627652, - "grad_norm": 0.8008362054824829, - "learning_rate": 7.913263032823407e-06, - "loss": 0.0642, + "epoch": 7.362245655725531, + "grad_norm": 0.7654306888580322, + "learning_rate": 2.637754344274469e-06, + "loss": 0.0664, "step": 49570 }, { - "epoch": 3.681865438883113, - "grad_norm": 4.587621688842773, - "learning_rate": 7.908807366701322e-06, - "loss": 0.0828, + "epoch": 7.363730877766226, + "grad_norm": 0.548048734664917, + "learning_rate": 2.6362691222337744e-06, + "loss": 0.0561, "step": 49580 }, { - "epoch": 3.6826080499034606, - "grad_norm": 2.4655842781066895, - "learning_rate": 7.904351700579237e-06, - "loss": 0.0508, + "epoch": 7.365216099806921, + "grad_norm": 0.9300860166549683, + "learning_rate": 2.6347839001930793e-06, + "loss": 0.0604, "step": 49590 }, { - "epoch": 3.683350660923808, - "grad_norm": 3.4618539810180664, - "learning_rate": 7.89989603445715e-06, - "loss": 0.0668, + "epoch": 7.366701321847616, + "grad_norm": 0.4907565116882324, + "learning_rate": 2.633298678152384e-06, + "loss": 0.0565, "step": 49600 }, { - "epoch": 3.684093271944156, - "grad_norm": 0.5393896698951721, - "learning_rate": 7.895440368335066e-06, - "loss": 0.0231, + "epoch": 7.368186543888311, + "grad_norm": 0.4754343032836914, + "learning_rate": 2.6318134561116885e-06, + "loss": 0.0431, "step": 49610 }, { - "epoch": 3.684835882964503, - "grad_norm": 2.3107566833496094, - "learning_rate": 7.890984702212982e-06, - "loss": 0.0643, + "epoch": 7.369671765929007, + "grad_norm": 0.9544875621795654, + "learning_rate": 2.630328234070994e-06, + "loss": 0.0464, "step": 49620 }, { - "epoch": 3.685578493984851, - "grad_norm": 0.5587324500083923, - "learning_rate": 7.886529036090896e-06, - "loss": 0.0612, + "epoch": 7.371156987969702, + "grad_norm": 1.2123874425888062, + "learning_rate": 2.6288430120302987e-06, + "loss": 0.0611, "step": 49630 }, - { - "epoch": 3.6863211050051983, - "grad_norm": 3.5032103061676025, - "learning_rate": 7.88207336996881e-06, - "loss": 0.0796, + { + "epoch": 7.3726422100103965, + "grad_norm": 0.28933414816856384, + "learning_rate": 2.6273577899896035e-06, + "loss": 0.0633, "step": 49640 }, { - "epoch": 3.6870637160255457, - "grad_norm": 1.4119980335235596, - "learning_rate": 7.877617703846724e-06, - "loss": 0.0507, + "epoch": 7.3741274320510914, + "grad_norm": 1.4906394481658936, + "learning_rate": 2.6258725679489084e-06, + "loss": 0.0613, "step": 49650 }, { - "epoch": 3.6878063270458936, - "grad_norm": 2.8842852115631104, - "learning_rate": 7.87316203772464e-06, - "loss": 0.0847, + "epoch": 7.375612654091786, + "grad_norm": 0.9483470320701599, + "learning_rate": 2.6243873459082137e-06, + "loss": 0.0454, "step": 49660 }, { - "epoch": 3.688548938066241, - "grad_norm": 2.1859562397003174, - "learning_rate": 7.868706371602556e-06, - "loss": 0.0462, + "epoch": 7.377097876132482, + "grad_norm": 1.8152319192886353, + "learning_rate": 2.6229021238675185e-06, + "loss": 0.0736, "step": 49670 }, { - "epoch": 3.6892915490865885, - "grad_norm": 1.2712724208831787, - "learning_rate": 7.864250705480469e-06, - "loss": 0.0603, + "epoch": 7.378583098173177, + "grad_norm": 0.5155038833618164, + "learning_rate": 2.6214169018268234e-06, + "loss": 0.05, "step": 49680 }, { - "epoch": 3.690034160106936, - "grad_norm": 0.29489457607269287, - "learning_rate": 7.859795039358386e-06, - "loss": 0.0504, + "epoch": 7.380068320213872, + "grad_norm": 0.7419806718826294, + "learning_rate": 2.6199316797861287e-06, + "loss": 0.0566, "step": 49690 }, { - "epoch": 3.6907767711272834, - "grad_norm": 1.2111194133758545, - "learning_rate": 7.855339373236299e-06, - "loss": 0.0466, + "epoch": 7.381553542254567, + "grad_norm": 1.56878662109375, + "learning_rate": 2.618446457745433e-06, + "loss": 0.0645, "step": 49700 }, { - "epoch": 3.6915193821476313, - "grad_norm": 0.5714041590690613, - "learning_rate": 7.850883707114214e-06, - "loss": 0.0915, + "epoch": 7.383038764295262, + "grad_norm": 0.6593931913375854, + "learning_rate": 2.616961235704738e-06, + "loss": 0.0588, "step": 49710 }, { - "epoch": 3.6922619931679788, - "grad_norm": 1.720264196395874, - "learning_rate": 7.846428040992127e-06, - "loss": 0.0974, + "epoch": 7.3845239863359575, + "grad_norm": 1.0526626110076904, + "learning_rate": 2.615476013664043e-06, + "loss": 0.0613, "step": 49720 }, { - "epoch": 3.693004604188326, - "grad_norm": 1.2143886089324951, - "learning_rate": 7.841972374870044e-06, - "loss": 0.047, + "epoch": 7.386009208376652, + "grad_norm": 1.0721087455749512, + "learning_rate": 2.613990791623348e-06, + "loss": 0.0665, "step": 49730 }, { - "epoch": 3.6937472152086737, - "grad_norm": 1.8155580759048462, - "learning_rate": 7.837516708747959e-06, - "loss": 0.0645, + "epoch": 7.387494430417347, + "grad_norm": 0.5129325985908508, + "learning_rate": 2.612505569582653e-06, + "loss": 0.0565, "step": 49740 }, { - "epoch": 3.694489826229021, - "grad_norm": 1.720137596130371, - "learning_rate": 7.833061042625872e-06, - "loss": 0.0603, + "epoch": 7.388979652458042, + "grad_norm": 0.7529094219207764, + "learning_rate": 2.611020347541958e-06, + "loss": 0.0632, "step": 49750 }, { - "epoch": 3.695232437249369, - "grad_norm": 1.3623203039169312, - "learning_rate": 7.828605376503787e-06, - "loss": 0.0717, + "epoch": 7.390464874498738, + "grad_norm": 0.9041786789894104, + "learning_rate": 2.6095351255012622e-06, + "loss": 0.0664, "step": 49760 }, { - "epoch": 3.6959750482697165, - "grad_norm": 0.7780874967575073, - "learning_rate": 7.824149710381702e-06, - "loss": 0.0413, + "epoch": 7.391950096539433, + "grad_norm": 0.5812846422195435, + "learning_rate": 2.608049903460568e-06, + "loss": 0.0495, "step": 49770 }, { - "epoch": 3.696717659290064, - "grad_norm": 1.2925565242767334, - "learning_rate": 7.819694044259617e-06, - "loss": 0.0699, + "epoch": 7.393435318580128, + "grad_norm": 1.0239791870117188, + "learning_rate": 2.6065646814198724e-06, + "loss": 0.0541, "step": 49780 }, { - "epoch": 3.6974602703104114, - "grad_norm": 1.491782546043396, - "learning_rate": 7.815238378137532e-06, - "loss": 0.0693, + "epoch": 7.394920540620823, + "grad_norm": 0.5622916221618652, + "learning_rate": 2.6050794593791772e-06, + "loss": 0.0532, "step": 49790 }, { - "epoch": 3.698202881330759, - "grad_norm": 1.258483648300171, - "learning_rate": 7.810782712015445e-06, - "loss": 0.0523, + "epoch": 7.396405762661518, + "grad_norm": 1.1932148933410645, + "learning_rate": 2.603594237338482e-06, + "loss": 0.0496, "step": 49800 }, { - "epoch": 3.6989454923511067, - "grad_norm": 0.5992489457130432, - "learning_rate": 7.806327045893362e-06, - "loss": 0.0645, + "epoch": 7.397890984702213, + "grad_norm": 1.2361640930175781, + "learning_rate": 2.6021090152977874e-06, + "loss": 0.0689, "step": 49810 }, { - "epoch": 3.699688103371454, - "grad_norm": 2.4607632160186768, - "learning_rate": 7.801871379771275e-06, - "loss": 0.0682, + "epoch": 7.399376206742908, + "grad_norm": 1.5666189193725586, + "learning_rate": 2.6006237932570922e-06, + "loss": 0.0653, "step": 49820 }, { - "epoch": 3.7004307143918016, - "grad_norm": 1.1010299921035767, - "learning_rate": 7.79741571364919e-06, - "loss": 0.0794, + "epoch": 7.400861428783603, + "grad_norm": 0.3307894766330719, + "learning_rate": 2.599138571216397e-06, + "loss": 0.0508, "step": 49830 }, { - "epoch": 3.701173325412149, - "grad_norm": 3.313181161880493, - "learning_rate": 7.792960047527105e-06, - "loss": 0.0543, + "epoch": 7.402346650824298, + "grad_norm": 1.0216509103775024, + "learning_rate": 2.5976533491757024e-06, + "loss": 0.0706, "step": 49840 }, { - "epoch": 3.7019159364324965, - "grad_norm": 2.4023070335388184, - "learning_rate": 7.78850438140502e-06, - "loss": 0.0673, + "epoch": 7.403831872864993, + "grad_norm": 0.5628541111946106, + "learning_rate": 2.596168127135007e-06, + "loss": 0.0453, "step": 49850 }, { - "epoch": 3.7026585474528444, - "grad_norm": 1.3617714643478394, - "learning_rate": 7.784048715282935e-06, - "loss": 0.0477, + "epoch": 7.405317094905689, + "grad_norm": 0.6356275677680969, + "learning_rate": 2.5946829050943117e-06, + "loss": 0.0502, "step": 49860 }, { - "epoch": 3.703401158473192, - "grad_norm": 1.4105480909347534, - "learning_rate": 7.779593049160849e-06, - "loss": 0.0659, + "epoch": 7.406802316946384, + "grad_norm": 0.8141928911209106, + "learning_rate": 2.5931976830536165e-06, + "loss": 0.0712, "step": 49870 }, { - "epoch": 3.7041437694935393, - "grad_norm": 2.565361261367798, - "learning_rate": 7.775137383038765e-06, - "loss": 0.0741, + "epoch": 7.408287538987079, + "grad_norm": 0.5898461937904358, + "learning_rate": 2.591712461012922e-06, + "loss": 0.0595, "step": 49880 }, { - "epoch": 3.7048863805138867, - "grad_norm": 1.3081820011138916, - "learning_rate": 7.770681716916679e-06, - "loss": 0.0527, + "epoch": 7.4097727610277735, + "grad_norm": 1.0788843631744385, + "learning_rate": 2.5902272389722267e-06, + "loss": 0.0512, "step": 49890 }, { - "epoch": 3.705628991534234, - "grad_norm": 0.5387427806854248, - "learning_rate": 7.766226050794594e-06, - "loss": 0.0848, + "epoch": 7.411257983068468, + "grad_norm": 0.790094792842865, + "learning_rate": 2.5887420169315315e-06, + "loss": 0.0583, "step": 49900 }, { - "epoch": 3.706371602554582, - "grad_norm": 1.5753438472747803, - "learning_rate": 7.761770384672509e-06, - "loss": 0.0789, + "epoch": 7.412743205109164, + "grad_norm": 1.4508802890777588, + "learning_rate": 2.587256794890836e-06, + "loss": 0.0608, "step": 49910 }, { - "epoch": 3.7071142135749295, - "grad_norm": 1.9173258543014526, - "learning_rate": 7.757314718550424e-06, - "loss": 0.0863, + "epoch": 7.414228427149859, + "grad_norm": 0.36482828855514526, + "learning_rate": 2.5857715728501416e-06, + "loss": 0.0665, "step": 49920 }, { - "epoch": 3.707856824595277, - "grad_norm": 0.6267051696777344, - "learning_rate": 7.752859052428339e-06, - "loss": 0.0511, + "epoch": 7.415713649190554, + "grad_norm": 0.8149128556251526, + "learning_rate": 2.584286350809446e-06, + "loss": 0.0533, "step": 49930 }, { - "epoch": 3.7085994356156244, - "grad_norm": 0.7196197509765625, - "learning_rate": 7.748403386306252e-06, - "loss": 0.07, + "epoch": 7.417198871231249, + "grad_norm": 1.4138531684875488, + "learning_rate": 2.582801128768751e-06, + "loss": 0.0476, "step": 49940 }, { - "epoch": 3.709342046635972, - "grad_norm": 1.6515315771102905, - "learning_rate": 7.743947720184169e-06, - "loss": 0.0634, + "epoch": 7.418684093271944, + "grad_norm": 1.1289254426956177, + "learning_rate": 2.5813159067280562e-06, + "loss": 0.0567, "step": 49950 }, { - "epoch": 3.71008465765632, - "grad_norm": 1.1136648654937744, - "learning_rate": 7.739492054062084e-06, - "loss": 0.0372, + "epoch": 7.42016931531264, + "grad_norm": 0.5501538515090942, + "learning_rate": 2.579830684687361e-06, + "loss": 0.0384, "step": 49960 }, { - "epoch": 3.7108272686766672, - "grad_norm": 1.6340572834014893, - "learning_rate": 7.735036387939997e-06, - "loss": 0.0599, + "epoch": 7.4216545373533345, + "grad_norm": 0.5963647961616516, + "learning_rate": 2.578345462646666e-06, + "loss": 0.0418, "step": 49970 }, { - "epoch": 3.7115698796970147, - "grad_norm": 1.4177989959716797, - "learning_rate": 7.730580721817912e-06, - "loss": 0.077, + "epoch": 7.423139759394029, + "grad_norm": 0.9482192993164062, + "learning_rate": 2.576860240605971e-06, + "loss": 0.0471, "step": 49980 }, { - "epoch": 3.712312490717362, - "grad_norm": 1.4736772775650024, - "learning_rate": 7.726125055695827e-06, - "loss": 0.0872, + "epoch": 7.424624981434724, + "grad_norm": 0.9947309494018555, + "learning_rate": 2.575375018565276e-06, + "loss": 0.0615, "step": 49990 }, { - "epoch": 3.7130551017377096, - "grad_norm": 1.0160552263259888, - "learning_rate": 7.721669389573742e-06, - "loss": 0.0432, + "epoch": 7.426110203475419, + "grad_norm": 0.7360773682594299, + "learning_rate": 2.5738897965245805e-06, + "loss": 0.0619, "step": 50000 }, { - "epoch": 3.7137977127580575, - "grad_norm": 2.048893451690674, - "learning_rate": 7.717213723451655e-06, - "loss": 0.0606, + "epoch": 7.427595425516115, + "grad_norm": 1.2429487705230713, + "learning_rate": 2.5724045744838854e-06, + "loss": 0.0536, "step": 50010 }, { - "epoch": 3.714540323778405, - "grad_norm": 0.3452848494052887, - "learning_rate": 7.71275805732957e-06, - "loss": 0.0257, + "epoch": 7.42908064755681, + "grad_norm": 0.48328685760498047, + "learning_rate": 2.5709193524431902e-06, + "loss": 0.0524, "step": 50020 }, { - "epoch": 3.7152829347987524, - "grad_norm": 2.9031543731689453, - "learning_rate": 7.708302391207487e-06, - "loss": 0.0525, + "epoch": 7.430565869597505, + "grad_norm": 0.6668089628219604, + "learning_rate": 2.5694341304024955e-06, + "loss": 0.0378, "step": 50030 }, { - "epoch": 3.7160255458191, - "grad_norm": 1.9653599262237549, - "learning_rate": 7.7038467250854e-06, - "loss": 0.0811, + "epoch": 7.4320510916382, + "grad_norm": 0.7315511703491211, + "learning_rate": 2.5679489083618004e-06, + "loss": 0.0507, "step": 50040 }, { - "epoch": 3.7167681568394473, - "grad_norm": 0.6306089758872986, - "learning_rate": 7.699391058963315e-06, - "loss": 0.0619, + "epoch": 7.433536313678895, + "grad_norm": 0.9499955773353577, + "learning_rate": 2.566463686321105e-06, + "loss": 0.0583, "step": 50050 }, { - "epoch": 3.717510767859795, - "grad_norm": 2.5835585594177246, - "learning_rate": 7.694935392841229e-06, - "loss": 0.0772, + "epoch": 7.43502153571959, + "grad_norm": 1.1920688152313232, + "learning_rate": 2.56497846428041e-06, + "loss": 0.0585, "step": 50060 }, { - "epoch": 3.7182533788801426, - "grad_norm": 1.6114825010299683, - "learning_rate": 7.690479726719145e-06, - "loss": 0.0758, + "epoch": 7.436506757760285, + "grad_norm": 0.6322932243347168, + "learning_rate": 2.5634932422397153e-06, + "loss": 0.0559, "step": 50070 }, { - "epoch": 3.71899598990049, - "grad_norm": 2.363929271697998, - "learning_rate": 7.68602406059706e-06, - "loss": 0.0746, + "epoch": 7.43799197980098, + "grad_norm": 0.2330750972032547, + "learning_rate": 2.5620080201990198e-06, + "loss": 0.0438, "step": 50080 }, { - "epoch": 3.7197386009208375, - "grad_norm": 0.5508888959884644, - "learning_rate": 7.681568394474973e-06, - "loss": 0.0874, + "epoch": 7.439477201841675, + "grad_norm": 1.0158408880233765, + "learning_rate": 2.5605227981583246e-06, + "loss": 0.0465, "step": 50090 }, { - "epoch": 3.720481211941185, - "grad_norm": 0.6778455972671509, - "learning_rate": 7.67711272835289e-06, - "loss": 0.0444, + "epoch": 7.44096242388237, + "grad_norm": 0.9743531942367554, + "learning_rate": 2.55903757611763e-06, + "loss": 0.0577, "step": 50100 }, { - "epoch": 3.721223822961533, - "grad_norm": 0.6257083415985107, - "learning_rate": 7.672657062230803e-06, - "loss": 0.0501, + "epoch": 7.442447645923066, + "grad_norm": 1.198718786239624, + "learning_rate": 2.5575523540769348e-06, + "loss": 0.0546, "step": 50110 }, { - "epoch": 3.7219664339818803, - "grad_norm": 0.7296163439750671, - "learning_rate": 7.668201396108718e-06, - "loss": 0.0719, + "epoch": 7.443932867963761, + "grad_norm": 0.6434746384620667, + "learning_rate": 2.5560671320362396e-06, + "loss": 0.0647, "step": 50120 }, { - "epoch": 3.7227090450022278, - "grad_norm": 1.6046028137207031, - "learning_rate": 7.663745729986632e-06, - "loss": 0.0848, + "epoch": 7.4454180900044555, + "grad_norm": 2.427293539047241, + "learning_rate": 2.5545819099955445e-06, + "loss": 0.0567, "step": 50130 }, { - "epoch": 3.723451656022575, - "grad_norm": 2.0736193656921387, - "learning_rate": 7.659290063864548e-06, - "loss": 0.0585, + "epoch": 7.44690331204515, + "grad_norm": 0.7034928202629089, + "learning_rate": 2.5530966879548498e-06, + "loss": 0.0615, "step": 50140 }, { - "epoch": 3.7241942670429227, - "grad_norm": 3.5360021591186523, - "learning_rate": 7.654834397742463e-06, - "loss": 0.0601, + "epoch": 7.448388534085846, + "grad_norm": 0.41088375449180603, + "learning_rate": 2.5516114659141546e-06, + "loss": 0.044, "step": 50150 }, { - "epoch": 3.7249368780632706, - "grad_norm": 1.6731319427490234, - "learning_rate": 7.650378731620377e-06, - "loss": 0.0783, + "epoch": 7.449873756126541, + "grad_norm": 0.7987599968910217, + "learning_rate": 2.550126243873459e-06, + "loss": 0.0553, "step": 50160 }, { - "epoch": 3.725679489083618, - "grad_norm": 1.434248685836792, - "learning_rate": 7.645923065498292e-06, - "loss": 0.0688, + "epoch": 7.451358978167236, + "grad_norm": 1.0732996463775635, + "learning_rate": 2.548641021832764e-06, + "loss": 0.0567, "step": 50170 }, { - "epoch": 3.7264221001039655, - "grad_norm": 2.1045992374420166, - "learning_rate": 7.641467399376207e-06, - "loss": 0.0679, + "epoch": 7.452844200207931, + "grad_norm": 0.3790551722049713, + "learning_rate": 2.547155799792069e-06, + "loss": 0.0524, "step": 50180 }, { - "epoch": 3.7271647111243134, - "grad_norm": 2.572453022003174, - "learning_rate": 7.637011733254122e-06, - "loss": 0.0378, + "epoch": 7.454329422248626, + "grad_norm": 1.484919548034668, + "learning_rate": 2.545670577751374e-06, + "loss": 0.055, "step": 50190 }, { - "epoch": 3.7279073221446604, - "grad_norm": 2.0661511421203613, - "learning_rate": 7.632556067132037e-06, - "loss": 0.0657, + "epoch": 7.455814644289322, + "grad_norm": 0.797602117061615, + "learning_rate": 2.544185355710679e-06, + "loss": 0.0697, "step": 50200 }, { - "epoch": 3.7286499331650083, - "grad_norm": 4.1037278175354, - "learning_rate": 7.628100401009952e-06, - "loss": 0.0909, + "epoch": 7.4572998663300165, + "grad_norm": 0.8872159123420715, + "learning_rate": 2.542700133669984e-06, + "loss": 0.0582, "step": 50210 }, { - "epoch": 3.7293925441853557, - "grad_norm": 2.651695728302002, - "learning_rate": 7.623644734887866e-06, - "loss": 0.07, + "epoch": 7.458785088370711, + "grad_norm": 1.2550395727157593, + "learning_rate": 2.541214911629289e-06, + "loss": 0.0584, "step": 50220 }, { - "epoch": 3.730135155205703, - "grad_norm": 1.7881807088851929, - "learning_rate": 7.619189068765781e-06, - "loss": 0.0622, + "epoch": 7.460270310411406, + "grad_norm": 0.709187388420105, + "learning_rate": 2.5397296895885935e-06, + "loss": 0.0453, "step": 50230 }, { - "epoch": 3.730877766226051, - "grad_norm": 0.9953715801239014, - "learning_rate": 7.614733402643695e-06, - "loss": 0.0471, + "epoch": 7.461755532452101, + "grad_norm": 1.2298972606658936, + "learning_rate": 2.5382444675478983e-06, + "loss": 0.0515, "step": 50240 }, { - "epoch": 3.7316203772463985, - "grad_norm": 1.1658798456192017, - "learning_rate": 7.610277736521611e-06, - "loss": 0.0636, + "epoch": 7.463240754492797, + "grad_norm": 0.9435555934906006, + "learning_rate": 2.5367592455072036e-06, + "loss": 0.0594, "step": 50250 }, { - "epoch": 3.732362988266746, - "grad_norm": 1.1256842613220215, - "learning_rate": 7.605822070399525e-06, - "loss": 0.0665, + "epoch": 7.464725976533492, + "grad_norm": 1.29453706741333, + "learning_rate": 2.5352740234665085e-06, + "loss": 0.0544, "step": 50260 }, { - "epoch": 3.7331055992870934, - "grad_norm": 0.6367254853248596, - "learning_rate": 7.601366404277439e-06, - "loss": 0.052, + "epoch": 7.466211198574187, + "grad_norm": 0.9184277653694153, + "learning_rate": 2.5337888014258133e-06, + "loss": 0.0627, "step": 50270 }, { - "epoch": 3.733848210307441, - "grad_norm": 2.0500311851501465, - "learning_rate": 7.596910738155354e-06, - "loss": 0.0545, + "epoch": 7.467696420614882, + "grad_norm": 0.1898077428340912, + "learning_rate": 2.532303579385118e-06, + "loss": 0.0449, "step": 50280 }, { - "epoch": 3.7345908213277887, - "grad_norm": 0.8507987856864929, - "learning_rate": 7.59245507203327e-06, - "loss": 0.0459, + "epoch": 7.469181642655577, + "grad_norm": 0.5328799486160278, + "learning_rate": 2.5308183573444235e-06, + "loss": 0.0458, "step": 50290 }, { - "epoch": 3.735333432348136, - "grad_norm": 1.231722354888916, - "learning_rate": 7.587999405911184e-06, - "loss": 0.0689, + "epoch": 7.470666864696272, + "grad_norm": 0.43151047825813293, + "learning_rate": 2.5293331353037283e-06, + "loss": 0.0567, "step": 50300 }, { - "epoch": 3.7360760433684836, - "grad_norm": 1.071260929107666, - "learning_rate": 7.583543739789098e-06, - "loss": 0.0589, + "epoch": 7.472152086736967, + "grad_norm": 1.438880205154419, + "learning_rate": 2.5278479132630328e-06, + "loss": 0.0638, "step": 50310 }, { - "epoch": 3.736818654388831, - "grad_norm": 0.9924083948135376, - "learning_rate": 7.5790880736670124e-06, - "loss": 0.0662, + "epoch": 7.473637308777662, + "grad_norm": 1.543481469154358, + "learning_rate": 2.5263626912223376e-06, + "loss": 0.0687, "step": 50320 }, { - "epoch": 3.7375612654091785, - "grad_norm": 3.8950726985931396, - "learning_rate": 7.574632407544928e-06, - "loss": 0.0801, + "epoch": 7.475122530818357, + "grad_norm": 0.7402935028076172, + "learning_rate": 2.524877469181643e-06, + "loss": 0.0657, "step": 50330 }, { - "epoch": 3.7383038764295264, - "grad_norm": 1.2569303512573242, - "learning_rate": 7.570176741422843e-06, - "loss": 0.0549, + "epoch": 7.476607752859053, + "grad_norm": 1.9598997831344604, + "learning_rate": 2.5233922471409478e-06, + "loss": 0.0535, "step": 50340 }, { - "epoch": 3.739046487449874, - "grad_norm": 2.5183095932006836, - "learning_rate": 7.565721075300757e-06, - "loss": 0.071, + "epoch": 7.478092974899748, + "grad_norm": 0.6482336521148682, + "learning_rate": 2.5219070251002526e-06, + "loss": 0.0571, "step": 50350 }, { - "epoch": 3.7397890984702213, - "grad_norm": 0.9033598303794861, - "learning_rate": 7.561265409178673e-06, - "loss": 0.0671, + "epoch": 7.479578196940443, + "grad_norm": 0.7986770272254944, + "learning_rate": 2.520421803059558e-06, + "loss": 0.049, "step": 50360 }, { - "epoch": 3.740531709490569, - "grad_norm": 3.6152873039245605, - "learning_rate": 7.556809743056587e-06, - "loss": 0.0498, + "epoch": 7.481063418981138, + "grad_norm": 0.5971524715423584, + "learning_rate": 2.5189365810188627e-06, + "loss": 0.0464, "step": 50370 }, { - "epoch": 3.7412743205109162, - "grad_norm": 0.6265145540237427, - "learning_rate": 7.5523540769345015e-06, - "loss": 0.0791, + "epoch": 7.4825486410218325, + "grad_norm": 0.5334088802337646, + "learning_rate": 2.517451358978167e-06, + "loss": 0.0379, "step": 50380 }, { - "epoch": 3.742016931531264, - "grad_norm": 0.47644293308258057, - "learning_rate": 7.547898410812416e-06, - "loss": 0.0719, + "epoch": 7.484033863062528, + "grad_norm": 0.7337051630020142, + "learning_rate": 2.515966136937472e-06, + "loss": 0.0442, "step": 50390 }, { - "epoch": 3.7427595425516116, - "grad_norm": 1.28927743434906, - "learning_rate": 7.543442744690332e-06, - "loss": 0.0471, + "epoch": 7.485519085103223, + "grad_norm": 0.6580100655555725, + "learning_rate": 2.5144809148967773e-06, + "loss": 0.0558, "step": 50400 }, { - "epoch": 3.743502153571959, - "grad_norm": 0.5300150513648987, - "learning_rate": 7.5389870785682465e-06, - "loss": 0.0565, + "epoch": 7.487004307143918, + "grad_norm": 2.0155866146087646, + "learning_rate": 2.512995692856082e-06, + "loss": 0.0524, "step": 50410 }, { - "epoch": 3.7442447645923065, - "grad_norm": 0.5767967104911804, - "learning_rate": 7.534531412446161e-06, - "loss": 0.0767, + "epoch": 7.488489529184613, + "grad_norm": 1.34869384765625, + "learning_rate": 2.511510470815387e-06, + "loss": 0.0498, "step": 50420 }, { - "epoch": 3.744987375612654, - "grad_norm": 1.1901088953018188, - "learning_rate": 7.530075746324075e-06, - "loss": 0.0639, + "epoch": 7.489974751225308, + "grad_norm": 0.701053261756897, + "learning_rate": 2.510025248774692e-06, + "loss": 0.0629, "step": 50430 }, { - "epoch": 3.745729986633002, - "grad_norm": 0.5107831954956055, - "learning_rate": 7.525620080201991e-06, - "loss": 0.0423, + "epoch": 7.491459973266004, + "grad_norm": 0.890023410320282, + "learning_rate": 2.508540026733997e-06, + "loss": 0.044, "step": 50440 }, { - "epoch": 3.7464725976533493, - "grad_norm": 0.25454196333885193, - "learning_rate": 7.521164414079905e-06, - "loss": 0.0639, + "epoch": 7.492945195306699, + "grad_norm": 1.885815978050232, + "learning_rate": 2.507054804693302e-06, + "loss": 0.0562, "step": 50450 }, { - "epoch": 3.7472152086736967, - "grad_norm": 0.2913890480995178, - "learning_rate": 7.51670874795782e-06, - "loss": 0.0553, + "epoch": 7.4944304173473935, + "grad_norm": 0.8531511425971985, + "learning_rate": 2.5055695826526065e-06, + "loss": 0.0661, "step": 50460 }, { - "epoch": 3.747957819694044, - "grad_norm": 1.8346703052520752, - "learning_rate": 7.512253081835736e-06, - "loss": 0.0751, + "epoch": 7.495915639388088, + "grad_norm": 1.0763678550720215, + "learning_rate": 2.504084360611912e-06, + "loss": 0.0709, "step": 50470 }, { - "epoch": 3.7487004307143916, - "grad_norm": 1.856366753578186, - "learning_rate": 7.50779741571365e-06, - "loss": 0.0852, + "epoch": 7.497400861428783, + "grad_norm": 1.0584913492202759, + "learning_rate": 2.5025991385712166e-06, + "loss": 0.0465, "step": 50480 }, { - "epoch": 3.7494430417347395, - "grad_norm": 1.0152894258499146, - "learning_rate": 7.503341749591564e-06, - "loss": 0.0439, + "epoch": 7.498886083469479, + "grad_norm": 0.806760311126709, + "learning_rate": 2.5011139165305215e-06, + "loss": 0.046, "step": 50490 }, { - "epoch": 3.750185652755087, - "grad_norm": 1.121036171913147, - "learning_rate": 7.498886083469479e-06, - "loss": 0.0781, + "epoch": 7.500371305510174, + "grad_norm": 0.5623704791069031, + "learning_rate": 2.4996286944898267e-06, + "loss": 0.066, "step": 50500 }, { - "epoch": 3.7509282637754344, - "grad_norm": 3.682908535003662, - "learning_rate": 7.494430417347393e-06, - "loss": 0.0624, + "epoch": 7.501856527550869, + "grad_norm": 1.3250830173492432, + "learning_rate": 2.498143472449131e-06, + "loss": 0.0697, "step": 50510 }, { - "epoch": 3.751670874795782, - "grad_norm": 1.5728288888931274, - "learning_rate": 7.489974751225309e-06, - "loss": 0.0812, + "epoch": 7.503341749591564, + "grad_norm": 1.0660487413406372, + "learning_rate": 2.4966582504084365e-06, + "loss": 0.0576, "step": 50520 }, { - "epoch": 3.7524134858161293, - "grad_norm": 1.3011523485183716, - "learning_rate": 7.485519085103223e-06, - "loss": 0.1094, + "epoch": 7.504826971632259, + "grad_norm": 1.622944951057434, + "learning_rate": 2.4951730283677413e-06, + "loss": 0.0606, "step": 50530 }, { - "epoch": 3.753156096836477, - "grad_norm": 1.683307409286499, - "learning_rate": 7.481063418981138e-06, - "loss": 0.0715, + "epoch": 7.506312193672954, + "grad_norm": 1.4378278255462646, + "learning_rate": 2.493687806327046e-06, + "loss": 0.0601, "step": 50540 }, { - "epoch": 3.7538987078568247, - "grad_norm": 0.9677258729934692, - "learning_rate": 7.476607752859052e-06, - "loss": 0.0619, + "epoch": 7.507797415713649, + "grad_norm": 0.5110106468200684, + "learning_rate": 2.492202584286351e-06, + "loss": 0.0478, "step": 50550 }, { - "epoch": 3.754641318877172, - "grad_norm": 1.786702036857605, - "learning_rate": 7.472152086736967e-06, - "loss": 0.0721, + "epoch": 7.509282637754344, + "grad_norm": 1.3880940675735474, + "learning_rate": 2.490717362245656e-06, + "loss": 0.0669, "step": 50560 }, { - "epoch": 3.7553839298975196, - "grad_norm": 4.346724987030029, - "learning_rate": 7.467696420614882e-06, - "loss": 0.0477, + "epoch": 7.510767859795039, + "grad_norm": 0.3109069764614105, + "learning_rate": 2.4892321402049607e-06, + "loss": 0.0534, "step": 50570 }, { - "epoch": 3.756126540917867, - "grad_norm": 0.49064943194389343, - "learning_rate": 7.463240754492797e-06, - "loss": 0.073, + "epoch": 7.512253081835734, + "grad_norm": 1.1899399757385254, + "learning_rate": 2.4877469181642656e-06, + "loss": 0.0574, "step": 50580 }, { - "epoch": 3.756869151938215, - "grad_norm": 0.6625070571899414, - "learning_rate": 7.458785088370711e-06, - "loss": 0.0626, + "epoch": 7.51373830387643, + "grad_norm": 0.43168699741363525, + "learning_rate": 2.4862616961235704e-06, + "loss": 0.0587, "step": 50590 }, { - "epoch": 3.7576117629585624, - "grad_norm": 3.1530327796936035, - "learning_rate": 7.454329422248626e-06, - "loss": 0.0642, + "epoch": 7.515223525917125, + "grad_norm": 1.5160905122756958, + "learning_rate": 2.4847764740828757e-06, + "loss": 0.0699, "step": 50600 }, { - "epoch": 3.75835437397891, - "grad_norm": 0.38084813952445984, - "learning_rate": 7.449873756126541e-06, - "loss": 0.0687, + "epoch": 7.51670874795782, + "grad_norm": 0.7211483120918274, + "learning_rate": 2.4832912520421806e-06, + "loss": 0.0549, "step": 50610 }, { - "epoch": 3.7590969849992573, - "grad_norm": 0.8041538000106812, - "learning_rate": 7.4454180900044555e-06, + "epoch": 7.5181939699985145, + "grad_norm": 1.1276590824127197, + "learning_rate": 2.4818060300014854e-06, "loss": 0.0677, "step": 50620 }, { - "epoch": 3.7598395960196047, - "grad_norm": 1.4357954263687134, - "learning_rate": 7.440962423882371e-06, - "loss": 0.0461, + "epoch": 7.519679192039209, + "grad_norm": 0.8371122479438782, + "learning_rate": 2.4803208079607903e-06, + "loss": 0.05, "step": 50630 }, { - "epoch": 3.7605822070399526, - "grad_norm": 0.9471798539161682, - "learning_rate": 7.4365067577602855e-06, - "loss": 0.0871, + "epoch": 7.521164414079905, + "grad_norm": 0.7217128872871399, + "learning_rate": 2.478835585920095e-06, + "loss": 0.0599, "step": 50640 }, { - "epoch": 3.7613248180603, - "grad_norm": 1.356444001197815, - "learning_rate": 7.4320510916382005e-06, - "loss": 0.0514, + "epoch": 7.5226496361206, + "grad_norm": 1.5906058549880981, + "learning_rate": 2.4773503638794004e-06, + "loss": 0.0576, "step": 50650 }, { - "epoch": 3.7620674290806475, - "grad_norm": 0.9252959489822388, - "learning_rate": 7.427595425516115e-06, - "loss": 0.0675, + "epoch": 7.524134858161295, + "grad_norm": 0.8109161257743835, + "learning_rate": 2.475865141838705e-06, + "loss": 0.0629, "step": 50660 }, { - "epoch": 3.762810040100995, - "grad_norm": 1.9478005170822144, - "learning_rate": 7.42313975939403e-06, - "loss": 0.0473, + "epoch": 7.52562008020199, + "grad_norm": 0.9763447642326355, + "learning_rate": 2.47437991979801e-06, + "loss": 0.0378, "step": 50670 }, { - "epoch": 3.7635526511213424, - "grad_norm": 0.6486461162567139, - "learning_rate": 7.418684093271944e-06, - "loss": 0.0632, + "epoch": 7.527105302242685, + "grad_norm": 1.2344034910202026, + "learning_rate": 2.472894697757315e-06, + "loss": 0.0531, "step": 50680 }, { - "epoch": 3.7642952621416903, - "grad_norm": 1.0158225297927856, - "learning_rate": 7.41422842714986e-06, - "loss": 0.0945, + "epoch": 7.528590524283381, + "grad_norm": 1.221603512763977, + "learning_rate": 2.47140947571662e-06, + "loss": 0.053, "step": 50690 }, { - "epoch": 3.7650378731620378, - "grad_norm": 1.4446204900741577, - "learning_rate": 7.409772761027774e-06, - "loss": 0.0441, + "epoch": 7.5300757463240755, + "grad_norm": 1.376077651977539, + "learning_rate": 2.4699242536759247e-06, + "loss": 0.049, "step": 50700 }, { - "epoch": 3.765780484182385, - "grad_norm": 1.2535960674285889, - "learning_rate": 7.405317094905689e-06, - "loss": 0.0414, + "epoch": 7.53156096836477, + "grad_norm": 1.7430099248886108, + "learning_rate": 2.4684390316352296e-06, + "loss": 0.0603, "step": 50710 }, { - "epoch": 3.7665230952027327, - "grad_norm": 0.49216005206108093, - "learning_rate": 7.400861428783603e-06, - "loss": 0.0505, + "epoch": 7.533046190405465, + "grad_norm": 0.40242937207221985, + "learning_rate": 2.4669538095945344e-06, + "loss": 0.0584, "step": 50720 }, { - "epoch": 3.76726570622308, - "grad_norm": 1.1775668859481812, - "learning_rate": 7.396405762661518e-06, - "loss": 0.0713, + "epoch": 7.53453141244616, + "grad_norm": 0.4440084993839264, + "learning_rate": 2.4654685875538393e-06, + "loss": 0.0441, "step": 50730 }, { - "epoch": 3.768008317243428, - "grad_norm": 1.1207523345947266, - "learning_rate": 7.391950096539433e-06, - "loss": 0.0645, + "epoch": 7.536016634486856, + "grad_norm": 1.1307166814804077, + "learning_rate": 2.4639833655131446e-06, + "loss": 0.0449, "step": 50740 }, { - "epoch": 3.7687509282637754, - "grad_norm": 0.7334256172180176, - "learning_rate": 7.387494430417348e-06, - "loss": 0.0407, + "epoch": 7.537501856527551, + "grad_norm": 1.3973692655563354, + "learning_rate": 2.4624981434724494e-06, + "loss": 0.0549, "step": 50750 }, { - "epoch": 3.769493539284123, - "grad_norm": 0.34325599670410156, - "learning_rate": 7.383038764295263e-06, - "loss": 0.0618, + "epoch": 7.538987078568246, + "grad_norm": 1.0603303909301758, + "learning_rate": 2.4610129214317543e-06, + "loss": 0.0487, "step": 50760 }, { - "epoch": 3.770236150304471, - "grad_norm": 1.9858282804489136, - "learning_rate": 7.378583098173177e-06, - "loss": 0.0502, + "epoch": 7.540472300608941, + "grad_norm": 0.7205509543418884, + "learning_rate": 2.459527699391059e-06, + "loss": 0.0433, "step": 50770 }, { - "epoch": 3.770978761324818, - "grad_norm": 0.8460551500320435, - "learning_rate": 7.374127432051092e-06, - "loss": 0.0662, + "epoch": 7.5419575226496365, + "grad_norm": 0.5332754254341125, + "learning_rate": 2.458042477350364e-06, + "loss": 0.0485, "step": 50780 }, { - "epoch": 3.7717213723451657, - "grad_norm": 1.374432921409607, - "learning_rate": 7.369671765929006e-06, - "loss": 0.0294, + "epoch": 7.543442744690331, + "grad_norm": 1.2866615056991577, + "learning_rate": 2.456557255309669e-06, + "loss": 0.0558, "step": 50790 }, { - "epoch": 3.772463983365513, - "grad_norm": 0.547275722026825, - "learning_rate": 7.365216099806921e-06, - "loss": 0.0512, + "epoch": 7.544927966731026, + "grad_norm": 1.0548276901245117, + "learning_rate": 2.455072033268974e-06, + "loss": 0.0554, "step": 50800 }, { - "epoch": 3.7732065943858606, - "grad_norm": 2.171943426132202, - "learning_rate": 7.360760433684836e-06, - "loss": 0.074, + "epoch": 7.546413188771721, + "grad_norm": 1.173340916633606, + "learning_rate": 2.4535868112282786e-06, + "loss": 0.0651, "step": 50810 }, { - "epoch": 3.7739492054062085, - "grad_norm": 5.715026378631592, - "learning_rate": 7.356304767562751e-06, - "loss": 0.0664, + "epoch": 7.547898410812416, + "grad_norm": 1.104522466659546, + "learning_rate": 2.452101589187584e-06, + "loss": 0.0596, "step": 50820 }, { - "epoch": 3.774691816426556, - "grad_norm": 0.8222048282623291, - "learning_rate": 7.351849101440665e-06, - "loss": 0.0573, + "epoch": 7.549383632853112, + "grad_norm": 0.3949863314628601, + "learning_rate": 2.4506163671468887e-06, + "loss": 0.0492, "step": 50830 }, { - "epoch": 3.7754344274469034, - "grad_norm": 0.7894913554191589, - "learning_rate": 7.34739343531858e-06, - "loss": 0.0447, + "epoch": 7.550868854893807, + "grad_norm": 1.375347375869751, + "learning_rate": 2.4491311451061936e-06, + "loss": 0.0569, "step": 50840 }, { - "epoch": 3.776177038467251, - "grad_norm": 0.9093634486198425, - "learning_rate": 7.342937769196494e-06, - "loss": 0.0685, + "epoch": 7.552354076934502, + "grad_norm": 1.8367573022842407, + "learning_rate": 2.4476459230654984e-06, + "loss": 0.0443, "step": 50850 }, { - "epoch": 3.7769196494875983, - "grad_norm": 0.49111273884773254, - "learning_rate": 7.338482103074409e-06, - "loss": 0.0421, + "epoch": 7.553839298975197, + "grad_norm": 1.2803938388824463, + "learning_rate": 2.4461607010248033e-06, + "loss": 0.0462, "step": 50860 }, { - "epoch": 3.777662260507946, - "grad_norm": 2.105043411254883, - "learning_rate": 7.334026436952325e-06, - "loss": 0.0791, + "epoch": 7.555324521015892, + "grad_norm": 0.6907781958580017, + "learning_rate": 2.4446754789841086e-06, + "loss": 0.0558, "step": 50870 }, { - "epoch": 3.7784048715282936, - "grad_norm": 0.9441326856613159, - "learning_rate": 7.329570770830239e-06, - "loss": 0.0506, + "epoch": 7.556809743056587, + "grad_norm": 0.6908086538314819, + "learning_rate": 2.4431902569434134e-06, + "loss": 0.0519, "step": 50880 }, { - "epoch": 3.779147482548641, - "grad_norm": 0.6536591053009033, - "learning_rate": 7.325115104708154e-06, - "loss": 0.0574, + "epoch": 7.558294965097282, + "grad_norm": 0.7062651515007019, + "learning_rate": 2.4417050349027183e-06, + "loss": 0.0699, "step": 50890 }, { - "epoch": 3.7798900935689885, - "grad_norm": 0.7419950366020203, - "learning_rate": 7.3206594385860685e-06, - "loss": 0.0576, + "epoch": 7.559780187137977, + "grad_norm": 0.5621799826622009, + "learning_rate": 2.440219812862023e-06, + "loss": 0.0482, "step": 50900 }, { - "epoch": 3.780632704589336, - "grad_norm": 0.9035283923149109, - "learning_rate": 7.3162037724639835e-06, - "loss": 0.0522, + "epoch": 7.561265409178672, + "grad_norm": 1.0808478593826294, + "learning_rate": 2.438734590821328e-06, + "loss": 0.053, "step": 50910 }, { - "epoch": 3.781375315609684, - "grad_norm": 1.0308187007904053, - "learning_rate": 7.3117481063418985e-06, - "loss": 0.0575, + "epoch": 7.562750631219368, + "grad_norm": 0.8863072991371155, + "learning_rate": 2.437249368780633e-06, + "loss": 0.0522, "step": 50920 }, { - "epoch": 3.7821179266300313, - "grad_norm": 1.2720097303390503, - "learning_rate": 7.3072924402198135e-06, - "loss": 0.0663, + "epoch": 7.564235853260063, + "grad_norm": 0.433900386095047, + "learning_rate": 2.4357641467399377e-06, + "loss": 0.0557, "step": 50930 }, { - "epoch": 3.7828605376503788, - "grad_norm": 2.122695207595825, - "learning_rate": 7.302836774097728e-06, - "loss": 0.0483, + "epoch": 7.5657210753007575, + "grad_norm": 0.46487393975257874, + "learning_rate": 2.4342789246992426e-06, + "loss": 0.0559, "step": 50940 }, { - "epoch": 3.7836031486707262, - "grad_norm": 1.060192346572876, - "learning_rate": 7.298381107975643e-06, - "loss": 0.0663, + "epoch": 7.5672062973414524, + "grad_norm": 1.7626889944076538, + "learning_rate": 2.432793702658548e-06, + "loss": 0.048, "step": 50950 }, { - "epoch": 3.7843457596910737, - "grad_norm": 2.148590087890625, - "learning_rate": 7.293925441853557e-06, - "loss": 0.068, + "epoch": 7.568691519382147, + "grad_norm": 1.2852072715759277, + "learning_rate": 2.4313084806178523e-06, + "loss": 0.0463, "step": 50960 }, { - "epoch": 3.7850883707114216, - "grad_norm": 3.7318813800811768, - "learning_rate": 7.289469775731472e-06, - "loss": 0.0743, + "epoch": 7.570176741422843, + "grad_norm": 1.1062737703323364, + "learning_rate": 2.4298232585771576e-06, + "loss": 0.0449, "step": 50970 }, { - "epoch": 3.785830981731769, - "grad_norm": 0.6228942275047302, - "learning_rate": 7.285014109609387e-06, - "loss": 0.0545, + "epoch": 7.571661963463538, + "grad_norm": 0.741830587387085, + "learning_rate": 2.4283380365364624e-06, + "loss": 0.0538, "step": 50980 }, { - "epoch": 3.7865735927521165, - "grad_norm": 0.46231064200401306, - "learning_rate": 7.280558443487302e-06, - "loss": 0.0374, + "epoch": 7.573147185504233, + "grad_norm": 1.578948736190796, + "learning_rate": 2.4268528144957673e-06, + "loss": 0.0631, "step": 50990 }, { - "epoch": 3.787316203772464, - "grad_norm": 1.6403611898422241, - "learning_rate": 7.276102777365217e-06, - "loss": 0.0674, + "epoch": 7.574632407544928, + "grad_norm": 0.8625466227531433, + "learning_rate": 2.4253675924550725e-06, + "loss": 0.0437, "step": 51000 }, { - "epoch": 3.7880588147928114, - "grad_norm": 0.5807299613952637, - "learning_rate": 7.271647111243131e-06, - "loss": 0.0653, + "epoch": 7.576117629585623, + "grad_norm": 1.6692250967025757, + "learning_rate": 2.423882370414377e-06, + "loss": 0.0381, "step": 51010 }, { - "epoch": 3.7888014258131593, - "grad_norm": 2.2317705154418945, - "learning_rate": 7.267191445121046e-06, - "loss": 0.0464, + "epoch": 7.5776028516263185, + "grad_norm": 0.9216070771217346, + "learning_rate": 2.4223971483736823e-06, + "loss": 0.0446, "step": 51020 }, { - "epoch": 3.7895440368335067, - "grad_norm": 1.0599946975708008, - "learning_rate": 7.26273577899896e-06, - "loss": 0.0787, + "epoch": 7.579088073667013, + "grad_norm": 1.6464141607284546, + "learning_rate": 2.420911926332987e-06, + "loss": 0.0638, "step": 51030 }, { - "epoch": 3.790286647853854, - "grad_norm": 0.7271379232406616, - "learning_rate": 7.258280112876876e-06, - "loss": 0.0682, + "epoch": 7.580573295707708, + "grad_norm": 0.4722020626068115, + "learning_rate": 2.419426704292292e-06, + "loss": 0.0501, "step": 51040 }, { - "epoch": 3.7910292588742016, - "grad_norm": 1.6912349462509155, - "learning_rate": 7.25382444675479e-06, - "loss": 0.0537, + "epoch": 7.582058517748403, + "grad_norm": 0.9962632060050964, + "learning_rate": 2.417941482251597e-06, + "loss": 0.0555, "step": 51050 }, { - "epoch": 3.791771869894549, - "grad_norm": 1.233393907546997, - "learning_rate": 7.249368780632705e-06, - "loss": 0.0698, + "epoch": 7.583543739789098, + "grad_norm": 0.5598209500312805, + "learning_rate": 2.4164562602109017e-06, + "loss": 0.0474, "step": 51060 }, { - "epoch": 3.792514480914897, - "grad_norm": 1.0007754564285278, - "learning_rate": 7.244913114510619e-06, - "loss": 0.0515, + "epoch": 7.585028961829794, + "grad_norm": 1.4611873626708984, + "learning_rate": 2.4149710381702065e-06, + "loss": 0.0639, "step": 51070 }, { - "epoch": 3.7932570919352444, - "grad_norm": 1.4426878690719604, - "learning_rate": 7.240457448388534e-06, - "loss": 0.0731, + "epoch": 7.586514183870489, + "grad_norm": 0.9028664231300354, + "learning_rate": 2.4134858161295114e-06, + "loss": 0.0561, "step": 51080 }, { - "epoch": 3.793999702955592, - "grad_norm": 0.7018789649009705, - "learning_rate": 7.236001782266448e-06, - "loss": 0.0422, + "epoch": 7.587999405911184, + "grad_norm": 0.5143422484397888, + "learning_rate": 2.4120005940888163e-06, + "loss": 0.0571, "step": 51090 }, { - "epoch": 3.7947423139759393, - "grad_norm": 0.8804644346237183, - "learning_rate": 7.231546116144364e-06, - "loss": 0.0557, + "epoch": 7.589484627951879, + "grad_norm": 1.009613275527954, + "learning_rate": 2.4105153720481215e-06, + "loss": 0.048, "step": 51100 }, { - "epoch": 3.7954849249962868, - "grad_norm": 0.588465690612793, - "learning_rate": 7.227090450022278e-06, - "loss": 0.0795, + "epoch": 7.5909698499925735, + "grad_norm": 0.5486442446708679, + "learning_rate": 2.409030150007426e-06, + "loss": 0.0601, "step": 51110 }, { - "epoch": 3.7962275360166347, - "grad_norm": 1.5997480154037476, - "learning_rate": 7.222634783900193e-06, - "loss": 0.0358, + "epoch": 7.592455072033269, + "grad_norm": 0.6895651817321777, + "learning_rate": 2.4075449279667313e-06, + "loss": 0.0704, "step": 51120 }, { - "epoch": 3.796970147036982, - "grad_norm": 0.4901600182056427, - "learning_rate": 7.218179117778108e-06, - "loss": 0.0743, + "epoch": 7.593940294073964, + "grad_norm": 1.1183245182037354, + "learning_rate": 2.406059705926036e-06, + "loss": 0.0511, "step": 51130 }, { - "epoch": 3.7977127580573296, - "grad_norm": 1.299644947052002, - "learning_rate": 7.2137234516560225e-06, - "loss": 0.0445, + "epoch": 7.595425516114659, + "grad_norm": 0.41055673360824585, + "learning_rate": 2.404574483885341e-06, + "loss": 0.0451, "step": 51140 }, { - "epoch": 3.798455369077677, - "grad_norm": 2.474388599395752, - "learning_rate": 7.2092677855339375e-06, - "loss": 0.0676, + "epoch": 7.596910738155354, + "grad_norm": 0.33190709352493286, + "learning_rate": 2.4030892618446462e-06, + "loss": 0.0369, "step": 51150 }, { - "epoch": 3.7991979800980245, - "grad_norm": 0.26905447244644165, - "learning_rate": 7.2048121194118525e-06, - "loss": 0.0655, + "epoch": 7.598395960196049, + "grad_norm": 0.44257375597953796, + "learning_rate": 2.4016040398039507e-06, + "loss": 0.0492, "step": 51160 }, { - "epoch": 3.7999405911183723, - "grad_norm": 1.1446030139923096, - "learning_rate": 7.2003564532897675e-06, - "loss": 0.056, + "epoch": 7.599881182236745, + "grad_norm": 1.0603954792022705, + "learning_rate": 2.400118817763256e-06, + "loss": 0.0397, "step": 51170 }, { - "epoch": 3.80068320213872, - "grad_norm": 0.9644284844398499, - "learning_rate": 7.195900787167682e-06, - "loss": 0.0443, + "epoch": 7.60136640427744, + "grad_norm": 1.061376929283142, + "learning_rate": 2.398633595722561e-06, + "loss": 0.0646, "step": 51180 }, { - "epoch": 3.8014258131590672, - "grad_norm": 1.8860034942626953, - "learning_rate": 7.191445121045597e-06, - "loss": 0.0591, + "epoch": 7.6028516263181345, + "grad_norm": 0.30382776260375977, + "learning_rate": 2.3971483736818657e-06, + "loss": 0.0471, "step": 51190 }, { - "epoch": 3.8021684241794147, - "grad_norm": 1.3141664266586304, - "learning_rate": 7.186989454923511e-06, - "loss": 0.0319, + "epoch": 7.604336848358829, + "grad_norm": 0.6130696535110474, + "learning_rate": 2.3956631516411705e-06, + "loss": 0.059, "step": 51200 }, { - "epoch": 3.802911035199762, - "grad_norm": 1.2933363914489746, - "learning_rate": 7.182533788801426e-06, - "loss": 0.0596, + "epoch": 7.605822070399524, + "grad_norm": 0.900826096534729, + "learning_rate": 2.3941779296004754e-06, + "loss": 0.0568, "step": 51210 }, { - "epoch": 3.80365364622011, - "grad_norm": 1.3324415683746338, - "learning_rate": 7.178078122679341e-06, - "loss": 0.0632, + "epoch": 7.60730729244022, + "grad_norm": 1.1771337985992432, + "learning_rate": 2.3926927075597802e-06, + "loss": 0.0648, "step": 51220 }, { - "epoch": 3.8043962572404575, - "grad_norm": 1.7264912128448486, - "learning_rate": 7.173622456557256e-06, - "loss": 0.0858, + "epoch": 7.608792514480915, + "grad_norm": 1.5176537036895752, + "learning_rate": 2.3912074855190855e-06, + "loss": 0.0504, "step": 51230 }, { - "epoch": 3.805138868260805, - "grad_norm": 1.394806146621704, - "learning_rate": 7.16916679043517e-06, - "loss": 0.0673, + "epoch": 7.61027773652161, + "grad_norm": 0.759335994720459, + "learning_rate": 2.38972226347839e-06, + "loss": 0.0348, "step": 51240 }, - { - "epoch": 3.8058814792811524, - "grad_norm": 0.5998439788818359, - "learning_rate": 7.164711124313085e-06, - "loss": 0.0434, + { + "epoch": 7.611762958562305, + "grad_norm": 1.2962226867675781, + "learning_rate": 2.3882370414376952e-06, + "loss": 0.0683, "step": 51250 }, { - "epoch": 3.8066240903015, - "grad_norm": 1.282382607460022, - "learning_rate": 7.160255458191e-06, - "loss": 0.0696, + "epoch": 7.613248180603, + "grad_norm": 0.6675172448158264, + "learning_rate": 2.386751819397e-06, + "loss": 0.0649, "step": 51260 }, { - "epoch": 3.8073667013218477, - "grad_norm": 1.0203378200531006, - "learning_rate": 7.155799792068915e-06, - "loss": 0.0709, + "epoch": 7.6147334026436955, + "grad_norm": 0.7499662637710571, + "learning_rate": 2.385266597356305e-06, + "loss": 0.0605, "step": 51270 }, { - "epoch": 3.808109312342195, - "grad_norm": 1.8482102155685425, - "learning_rate": 7.15134412594683e-06, - "loss": 0.0499, + "epoch": 7.61621862468439, + "grad_norm": 0.8165285587310791, + "learning_rate": 2.38378137531561e-06, + "loss": 0.0493, "step": 51280 }, { - "epoch": 3.8088519233625426, - "grad_norm": 1.2708896398544312, - "learning_rate": 7.146888459824744e-06, - "loss": 0.0467, + "epoch": 7.617703846725085, + "grad_norm": 1.2159758806228638, + "learning_rate": 2.3822961532749147e-06, + "loss": 0.0489, "step": 51290 }, { - "epoch": 3.80959453438289, - "grad_norm": 2.240234375, - "learning_rate": 7.142432793702659e-06, - "loss": 0.0742, + "epoch": 7.61918906876578, + "grad_norm": 1.0896236896514893, + "learning_rate": 2.38081093123422e-06, + "loss": 0.0632, "step": 51300 }, { - "epoch": 3.8103371454032375, - "grad_norm": 1.0520896911621094, - "learning_rate": 7.137977127580573e-06, - "loss": 0.0631, + "epoch": 7.620674290806475, + "grad_norm": 1.3957602977752686, + "learning_rate": 2.3793257091935244e-06, + "loss": 0.0464, "step": 51310 }, { - "epoch": 3.8110797564235854, - "grad_norm": 0.6658138036727905, - "learning_rate": 7.133521461458488e-06, - "loss": 0.0522, + "epoch": 7.622159512847171, + "grad_norm": 0.7095270156860352, + "learning_rate": 2.3778404871528297e-06, + "loss": 0.0583, "step": 51320 }, { - "epoch": 3.811822367443933, - "grad_norm": 1.1442534923553467, - "learning_rate": 7.129065795336403e-06, - "loss": 0.0711, + "epoch": 7.623644734887866, + "grad_norm": 0.8131939768791199, + "learning_rate": 2.3763552651121345e-06, + "loss": 0.0651, "step": 51330 }, { - "epoch": 3.8125649784642803, - "grad_norm": 2.0070974826812744, - "learning_rate": 7.124610129214318e-06, - "loss": 0.0479, + "epoch": 7.625129956928561, + "grad_norm": 0.3545960783958435, + "learning_rate": 2.3748700430714394e-06, + "loss": 0.0441, "step": 51340 }, { - "epoch": 3.8133075894846282, - "grad_norm": 1.5337635278701782, - "learning_rate": 7.120154463092232e-06, - "loss": 0.0552, + "epoch": 7.626615178969256, + "grad_norm": 1.155347466468811, + "learning_rate": 2.3733848210307442e-06, + "loss": 0.0453, "step": 51350 }, { - "epoch": 3.8140502005049752, - "grad_norm": 2.758072853088379, - "learning_rate": 7.115698796970147e-06, - "loss": 0.0632, + "epoch": 7.628100401009951, + "grad_norm": 1.5265934467315674, + "learning_rate": 2.371899598990049e-06, + "loss": 0.051, "step": 51360 }, { - "epoch": 3.814792811525323, - "grad_norm": 1.1585407257080078, - "learning_rate": 7.111243130848061e-06, - "loss": 0.0559, + "epoch": 7.629585623050646, + "grad_norm": 0.5227043032646179, + "learning_rate": 2.370414376949354e-06, + "loss": 0.039, "step": 51370 }, { - "epoch": 3.8155354225456706, - "grad_norm": 0.7389517426490784, - "learning_rate": 7.106787464725976e-06, - "loss": 0.0523, + "epoch": 7.631070845091341, + "grad_norm": 0.9405038356781006, + "learning_rate": 2.3689291549086592e-06, + "loss": 0.0394, "step": 51380 }, { - "epoch": 3.816278033566018, - "grad_norm": 1.0852411985397339, - "learning_rate": 7.102331798603892e-06, - "loss": 0.0459, + "epoch": 7.632556067132036, + "grad_norm": 0.7868645787239075, + "learning_rate": 2.367443932867964e-06, + "loss": 0.0526, "step": 51390 }, { - "epoch": 3.817020644586366, - "grad_norm": 1.4274524450302124, - "learning_rate": 7.097876132481806e-06, - "loss": 0.0707, + "epoch": 7.634041289172731, + "grad_norm": 0.6418449878692627, + "learning_rate": 2.365958710827269e-06, + "loss": 0.0591, "step": 51400 }, { - "epoch": 3.8177632556067134, - "grad_norm": 2.2135133743286133, - "learning_rate": 7.093420466359721e-06, - "loss": 0.0488, + "epoch": 7.635526511213427, + "grad_norm": 0.6941629648208618, + "learning_rate": 2.364473488786574e-06, + "loss": 0.0659, "step": 51410 }, { - "epoch": 3.818505866627061, - "grad_norm": 2.0411906242370605, - "learning_rate": 7.0889648002376355e-06, - "loss": 0.0572, + "epoch": 7.637011733254122, + "grad_norm": 1.0778722763061523, + "learning_rate": 2.3629882667458787e-06, + "loss": 0.0574, "step": 51420 }, { - "epoch": 3.8192484776474083, - "grad_norm": 1.3621501922607422, - "learning_rate": 7.0845091341155505e-06, - "loss": 0.0438, + "epoch": 7.6384969552948165, + "grad_norm": 0.9663993716239929, + "learning_rate": 2.3615030447051835e-06, + "loss": 0.0558, "step": 51430 }, { - "epoch": 3.8199910886677557, - "grad_norm": 2.786686658859253, - "learning_rate": 7.080053467993465e-06, - "loss": 0.0702, + "epoch": 7.639982177335511, + "grad_norm": 0.7751511335372925, + "learning_rate": 2.3600178226644884e-06, + "loss": 0.0574, "step": 51440 }, { - "epoch": 3.8207336996881036, - "grad_norm": 0.6535211801528931, - "learning_rate": 7.0755978018713805e-06, - "loss": 0.0564, + "epoch": 7.641467399376207, + "grad_norm": 0.7096276879310608, + "learning_rate": 2.3585326006237936e-06, + "loss": 0.0555, "step": 51450 }, { - "epoch": 3.821476310708451, - "grad_norm": 0.9478582143783569, - "learning_rate": 7.071142135749295e-06, - "loss": 0.0522, + "epoch": 7.642952621416902, + "grad_norm": 0.23418889939785004, + "learning_rate": 2.357047378583098e-06, + "loss": 0.0493, "step": 51460 }, { - "epoch": 3.8222189217287985, - "grad_norm": 1.7308098077774048, - "learning_rate": 7.06668646962721e-06, - "loss": 0.0388, + "epoch": 7.644437843457597, + "grad_norm": 1.6592222452163696, + "learning_rate": 2.3555621565424034e-06, + "loss": 0.0617, "step": 51470 }, { - "epoch": 3.822961532749146, - "grad_norm": 1.9273875951766968, - "learning_rate": 7.062230803505124e-06, - "loss": 0.0684, + "epoch": 7.645923065498292, + "grad_norm": 1.0019330978393555, + "learning_rate": 2.3540769345017082e-06, + "loss": 0.0755, "step": 51480 }, { - "epoch": 3.8237041437694934, - "grad_norm": 0.6402938961982727, - "learning_rate": 7.057775137383039e-06, - "loss": 0.0933, + "epoch": 7.647408287538987, + "grad_norm": 0.5399784445762634, + "learning_rate": 2.352591712461013e-06, + "loss": 0.0481, "step": 51490 }, { - "epoch": 3.8244467547898413, - "grad_norm": 1.3616961240768433, - "learning_rate": 7.053319471260953e-06, - "loss": 0.0663, + "epoch": 7.648893509579683, + "grad_norm": 0.8970789909362793, + "learning_rate": 2.351106490420318e-06, + "loss": 0.0656, "step": 51500 }, { - "epoch": 3.8251893658101888, - "grad_norm": 1.957161784172058, - "learning_rate": 7.048863805138869e-06, - "loss": 0.0756, + "epoch": 7.6503787316203775, + "grad_norm": 0.7304027080535889, + "learning_rate": 2.349621268379623e-06, + "loss": 0.0467, "step": 51510 }, { - "epoch": 3.825931976830536, - "grad_norm": 2.335097074508667, - "learning_rate": 7.044408139016784e-06, - "loss": 0.0619, + "epoch": 7.651863953661072, + "grad_norm": 0.49368733167648315, + "learning_rate": 2.348136046338928e-06, + "loss": 0.0558, "step": 51520 }, { - "epoch": 3.8266745878508837, - "grad_norm": 0.9050447940826416, - "learning_rate": 7.039952472894698e-06, - "loss": 0.0537, + "epoch": 7.653349175701767, + "grad_norm": 0.6555716395378113, + "learning_rate": 2.346650824298233e-06, + "loss": 0.0563, "step": 51530 }, { - "epoch": 3.827417198871231, - "grad_norm": 0.2950853705406189, - "learning_rate": 7.035496806772613e-06, - "loss": 0.0598, + "epoch": 7.654834397742462, + "grad_norm": 1.2767904996871948, + "learning_rate": 2.3451656022575378e-06, + "loss": 0.0536, "step": 51540 }, { - "epoch": 3.828159809891579, - "grad_norm": 2.684269428253174, - "learning_rate": 7.031041140650527e-06, - "loss": 0.0896, + "epoch": 7.656319619783158, + "grad_norm": 0.8550059199333191, + "learning_rate": 2.3436803802168426e-06, + "loss": 0.0714, "step": 51550 }, { - "epoch": 3.8289024209119265, - "grad_norm": 2.0332887172698975, - "learning_rate": 7.026585474528442e-06, - "loss": 0.0801, + "epoch": 7.657804841823853, + "grad_norm": 0.6468340754508972, + "learning_rate": 2.3421951581761475e-06, + "loss": 0.0665, "step": 51560 }, { - "epoch": 3.829645031932274, - "grad_norm": 2.3198342323303223, - "learning_rate": 7.022129808406357e-06, - "loss": 0.0645, + "epoch": 7.659290063864548, + "grad_norm": 0.9511982798576355, + "learning_rate": 2.3407099361354524e-06, + "loss": 0.0522, "step": 51570 }, { - "epoch": 3.8303876429526214, - "grad_norm": 1.091840386390686, - "learning_rate": 7.017674142284272e-06, - "loss": 0.0553, + "epoch": 7.660775285905243, + "grad_norm": 1.087459921836853, + "learning_rate": 2.3392247140947576e-06, + "loss": 0.055, "step": 51580 }, { - "epoch": 3.831130253972969, - "grad_norm": 1.6161314249038696, - "learning_rate": 7.013218476162186e-06, - "loss": 0.0612, + "epoch": 7.662260507945938, + "grad_norm": 1.2931932210922241, + "learning_rate": 2.337739492054062e-06, + "loss": 0.0548, "step": 51590 }, { - "epoch": 3.8318728649933167, - "grad_norm": 0.7637621164321899, - "learning_rate": 7.008762810040101e-06, - "loss": 0.0349, + "epoch": 7.663745729986633, + "grad_norm": 1.1031076908111572, + "learning_rate": 2.3362542700133673e-06, + "loss": 0.0493, "step": 51600 }, { - "epoch": 3.832615476013664, - "grad_norm": 0.9852764010429382, - "learning_rate": 7.004307143918015e-06, - "loss": 0.0476, + "epoch": 7.665230952027328, + "grad_norm": 0.9600025415420532, + "learning_rate": 2.334769047972672e-06, + "loss": 0.0462, "step": 51610 }, { - "epoch": 3.8333580870340116, - "grad_norm": 2.236307144165039, - "learning_rate": 6.999851477795931e-06, - "loss": 0.0687, + "epoch": 7.666716174068023, + "grad_norm": 0.45130455493927, + "learning_rate": 2.333283825931977e-06, + "loss": 0.0672, "step": 51620 }, { - "epoch": 3.834100698054359, - "grad_norm": 0.9051099419593811, - "learning_rate": 6.995395811673845e-06, - "loss": 0.0861, + "epoch": 7.668201396108718, + "grad_norm": 1.0152677297592163, + "learning_rate": 2.331798603891282e-06, + "loss": 0.064, "step": 51630 }, { - "epoch": 3.8348433090747065, - "grad_norm": 0.784583568572998, - "learning_rate": 6.99094014555176e-06, - "loss": 0.0594, + "epoch": 7.669686618149413, + "grad_norm": 0.9798538684844971, + "learning_rate": 2.3303133818505868e-06, + "loss": 0.045, "step": 51640 }, { - "epoch": 3.8355859200950544, - "grad_norm": 1.5118728876113892, - "learning_rate": 6.9864844794296745e-06, - "loss": 0.0769, + "epoch": 7.671171840190109, + "grad_norm": 1.3446485996246338, + "learning_rate": 2.3288281598098916e-06, + "loss": 0.0712, "step": 51650 }, { - "epoch": 3.836328531115402, - "grad_norm": 1.1480764150619507, - "learning_rate": 6.9820288133075895e-06, - "loss": 0.0678, + "epoch": 7.672657062230804, + "grad_norm": 0.8345891237258911, + "learning_rate": 2.3273429377691965e-06, + "loss": 0.0673, "step": 51660 }, { - "epoch": 3.8370711421357493, - "grad_norm": 1.8852975368499756, - "learning_rate": 6.9775731471855045e-06, - "loss": 0.0714, + "epoch": 7.674142284271499, + "grad_norm": 0.708493173122406, + "learning_rate": 2.3258577157285018e-06, + "loss": 0.0695, "step": 51670 }, { - "epoch": 3.8378137531560967, - "grad_norm": 0.9265196323394775, - "learning_rate": 6.9731174810634195e-06, - "loss": 0.0424, + "epoch": 7.6756275063121935, + "grad_norm": 0.6146459579467773, + "learning_rate": 2.3243724936878066e-06, + "loss": 0.0478, "step": 51680 }, { - "epoch": 3.838556364176444, - "grad_norm": 0.8514029383659363, - "learning_rate": 6.9686618149413345e-06, - "loss": 0.0565, + "epoch": 7.677112728352888, + "grad_norm": 0.636083722114563, + "learning_rate": 2.3228872716471115e-06, + "loss": 0.0622, "step": 51690 }, { - "epoch": 3.839298975196792, - "grad_norm": 3.038891077041626, - "learning_rate": 6.964206148819249e-06, - "loss": 0.0627, + "epoch": 7.678597950393584, + "grad_norm": 0.8670116066932678, + "learning_rate": 2.3214020496064163e-06, + "loss": 0.0694, "step": 51700 }, { - "epoch": 3.8400415862171395, - "grad_norm": 2.795001268386841, - "learning_rate": 6.959750482697164e-06, - "loss": 0.0808, + "epoch": 7.680083172434279, + "grad_norm": 1.6330854892730713, + "learning_rate": 2.319916827565721e-06, + "loss": 0.0374, "step": 51710 }, { - "epoch": 3.840784197237487, - "grad_norm": 0.8033546805381775, - "learning_rate": 6.955294816575078e-06, - "loss": 0.0667, + "epoch": 7.681568394474974, + "grad_norm": 0.9551437497138977, + "learning_rate": 2.318431605525026e-06, + "loss": 0.0531, "step": 51720 }, { - "epoch": 3.8415268082578344, - "grad_norm": 3.8095524311065674, - "learning_rate": 6.950839150452993e-06, - "loss": 0.0525, + "epoch": 7.683053616515669, + "grad_norm": 1.1932165622711182, + "learning_rate": 2.3169463834843313e-06, + "loss": 0.0606, "step": 51730 }, { - "epoch": 3.842269419278182, - "grad_norm": 1.2857472896575928, - "learning_rate": 6.946383484330908e-06, - "loss": 0.0707, + "epoch": 7.684538838556364, + "grad_norm": 0.9700741767883301, + "learning_rate": 2.3154611614436358e-06, + "loss": 0.0709, "step": 51740 }, { - "epoch": 3.84301203029853, - "grad_norm": 0.5481483340263367, - "learning_rate": 6.941927818208823e-06, - "loss": 0.0624, + "epoch": 7.68602406059706, + "grad_norm": 0.6711783409118652, + "learning_rate": 2.313975939402941e-06, + "loss": 0.0566, "step": 51750 }, { - "epoch": 3.8437546413188772, - "grad_norm": 2.4594411849975586, - "learning_rate": 6.937472152086737e-06, - "loss": 0.0699, + "epoch": 7.6875092826377545, + "grad_norm": 1.296689748764038, + "learning_rate": 2.312490717362246e-06, + "loss": 0.0814, "step": 51760 }, { - "epoch": 3.8444972523392247, - "grad_norm": 2.338852882385254, - "learning_rate": 6.933016485964652e-06, - "loss": 0.0485, + "epoch": 7.688994504678449, + "grad_norm": 1.6995421648025513, + "learning_rate": 2.3110054953215508e-06, + "loss": 0.0648, "step": 51770 }, { - "epoch": 3.845239863359572, - "grad_norm": 0.8134142160415649, - "learning_rate": 6.928560819842566e-06, - "loss": 0.0581, + "epoch": 7.690479726719144, + "grad_norm": 1.0179144144058228, + "learning_rate": 2.3095202732808556e-06, + "loss": 0.0684, "step": 51780 }, { - "epoch": 3.8459824743799196, - "grad_norm": 2.4749584197998047, - "learning_rate": 6.924105153720481e-06, - "loss": 0.077, + "epoch": 7.691964948759839, + "grad_norm": 0.3230994641780853, + "learning_rate": 2.3080350512401605e-06, + "loss": 0.063, "step": 51790 }, { - "epoch": 3.8467250854002675, - "grad_norm": 0.5151814222335815, - "learning_rate": 6.919649487598397e-06, - "loss": 0.0554, + "epoch": 7.693450170800535, + "grad_norm": 0.6015810966491699, + "learning_rate": 2.3065498291994658e-06, + "loss": 0.0556, "step": 51800 }, { - "epoch": 3.847467696420615, - "grad_norm": 2.5601933002471924, - "learning_rate": 6.915193821476311e-06, - "loss": 0.0677, + "epoch": 7.69493539284123, + "grad_norm": 0.5402663946151733, + "learning_rate": 2.30506460715877e-06, + "loss": 0.0477, "step": 51810 }, { - "epoch": 3.8482103074409624, - "grad_norm": 1.637519121170044, - "learning_rate": 6.910738155354226e-06, - "loss": 0.1058, + "epoch": 7.696420614881925, + "grad_norm": 0.5034302473068237, + "learning_rate": 2.3035793851180755e-06, + "loss": 0.0502, "step": 51820 }, { - "epoch": 3.84895291846131, - "grad_norm": 0.6686950325965881, - "learning_rate": 6.90628248923214e-06, - "loss": 0.0781, + "epoch": 7.69790583692262, + "grad_norm": 1.468308448791504, + "learning_rate": 2.3020941630773803e-06, + "loss": 0.0629, "step": 51830 }, { - "epoch": 3.8496955294816573, - "grad_norm": 1.9050847291946411, - "learning_rate": 6.901826823110055e-06, - "loss": 0.0675, + "epoch": 7.6993910589633145, + "grad_norm": 0.7070731520652771, + "learning_rate": 2.300608941036685e-06, + "loss": 0.0487, "step": 51840 }, { - "epoch": 3.850438140502005, - "grad_norm": 1.7424448728561401, - "learning_rate": 6.897371156987969e-06, - "loss": 0.0585, + "epoch": 7.70087628100401, + "grad_norm": 0.8511458039283752, + "learning_rate": 2.29912371899599e-06, + "loss": 0.0618, "step": 51850 }, { - "epoch": 3.8511807515223526, - "grad_norm": 0.6480499505996704, - "learning_rate": 6.892915490865885e-06, - "loss": 0.0579, + "epoch": 7.702361503044705, + "grad_norm": 1.0228487253189087, + "learning_rate": 2.297638496955295e-06, + "loss": 0.0545, "step": 51860 }, { - "epoch": 3.8519233625427, - "grad_norm": 0.8286868929862976, - "learning_rate": 6.888459824743799e-06, - "loss": 0.0416, + "epoch": 7.7038467250854, + "grad_norm": 0.9535961747169495, + "learning_rate": 2.2961532749145998e-06, + "loss": 0.054, "step": 51870 }, { - "epoch": 3.8526659735630475, - "grad_norm": 0.9048423171043396, - "learning_rate": 6.884004158621714e-06, - "loss": 0.0521, + "epoch": 7.705331947126095, + "grad_norm": 0.6639396548271179, + "learning_rate": 2.294668052873905e-06, + "loss": 0.0702, "step": 51880 }, { - "epoch": 3.853408584583395, - "grad_norm": 0.8243032097816467, - "learning_rate": 6.879548492499628e-06, - "loss": 0.0405, + "epoch": 7.70681716916679, + "grad_norm": 0.8463499546051025, + "learning_rate": 2.2931828308332095e-06, + "loss": 0.0521, "step": 51890 }, { - "epoch": 3.854151195603743, - "grad_norm": 0.618739128112793, - "learning_rate": 6.875092826377543e-06, - "loss": 0.0427, + "epoch": 7.708302391207486, + "grad_norm": 0.9757125973701477, + "learning_rate": 2.2916976087925147e-06, + "loss": 0.0603, "step": 51900 }, { - "epoch": 3.8548938066240903, - "grad_norm": 0.4855763912200928, - "learning_rate": 6.8706371602554575e-06, - "loss": 0.054, + "epoch": 7.709787613248181, + "grad_norm": 0.6511328816413879, + "learning_rate": 2.2902123867518196e-06, + "loss": 0.053, "step": 51910 }, { - "epoch": 3.8556364176444378, - "grad_norm": 0.3211023211479187, - "learning_rate": 6.866181494133373e-06, - "loss": 0.0877, + "epoch": 7.7112728352888755, + "grad_norm": 0.5482888221740723, + "learning_rate": 2.2887271647111245e-06, + "loss": 0.0582, "step": 51920 }, { - "epoch": 3.8563790286647857, - "grad_norm": 0.4299314320087433, - "learning_rate": 6.861725828011288e-06, - "loss": 0.0633, + "epoch": 7.71275805732957, + "grad_norm": 0.8813942670822144, + "learning_rate": 2.2872419426704297e-06, + "loss": 0.0563, "step": 51930 }, { - "epoch": 3.8571216396851327, - "grad_norm": 1.2965507507324219, - "learning_rate": 6.8572701618892025e-06, - "loss": 0.0749, + "epoch": 7.714243279370266, + "grad_norm": 0.5689736604690552, + "learning_rate": 2.285756720629734e-06, + "loss": 0.0564, "step": 51940 }, { - "epoch": 3.8578642507054806, - "grad_norm": 0.9099006056785583, - "learning_rate": 6.8528144957671175e-06, - "loss": 0.0495, + "epoch": 7.715728501410961, + "grad_norm": 0.744314432144165, + "learning_rate": 2.2842714985890395e-06, + "loss": 0.0476, "step": 51950 }, { - "epoch": 3.858606861725828, - "grad_norm": 2.0867326259613037, - "learning_rate": 6.848358829645032e-06, - "loss": 0.0509, + "epoch": 7.717213723451656, + "grad_norm": 0.7114430665969849, + "learning_rate": 2.2827862765483443e-06, + "loss": 0.0517, "step": 51960 }, { - "epoch": 3.8593494727461755, - "grad_norm": 3.871561050415039, - "learning_rate": 6.8439031635229475e-06, - "loss": 0.0922, + "epoch": 7.718698945492351, + "grad_norm": 0.8304001688957214, + "learning_rate": 2.281301054507649e-06, + "loss": 0.0509, "step": 51970 }, { - "epoch": 3.8600920837665234, - "grad_norm": 0.6323529481887817, - "learning_rate": 6.839447497400862e-06, - "loss": 0.0609, + "epoch": 7.720184167533046, + "grad_norm": 0.5770793557167053, + "learning_rate": 2.279815832466954e-06, + "loss": 0.0548, "step": 51980 }, { - "epoch": 3.860834694786871, - "grad_norm": 2.84472393989563, - "learning_rate": 6.834991831278777e-06, - "loss": 0.0602, + "epoch": 7.721669389573742, + "grad_norm": 0.4372476041316986, + "learning_rate": 2.278330610426259e-06, + "loss": 0.0495, "step": 51990 }, { - "epoch": 3.8615773058072183, - "grad_norm": 0.9858630895614624, - "learning_rate": 6.830536165156691e-06, - "loss": 0.035, + "epoch": 7.7231546116144365, + "grad_norm": 1.424376368522644, + "learning_rate": 2.2768453883855637e-06, + "loss": 0.0598, "step": 52000 }, { - "epoch": 3.8623199168275657, - "grad_norm": 1.1265889406204224, - "learning_rate": 6.826080499034606e-06, - "loss": 0.0581, + "epoch": 7.724639833655131, + "grad_norm": 0.3935073912143707, + "learning_rate": 2.2753601663448686e-06, + "loss": 0.0638, "step": 52010 }, { - "epoch": 3.863062527847913, - "grad_norm": 1.8120416402816772, - "learning_rate": 6.82162483291252e-06, - "loss": 0.0919, + "epoch": 7.726125055695826, + "grad_norm": 0.4132566750049591, + "learning_rate": 2.2738749443041735e-06, + "loss": 0.0546, "step": 52020 }, { - "epoch": 3.863805138868261, - "grad_norm": 0.340191513299942, - "learning_rate": 6.817169166790436e-06, - "loss": 0.0482, + "epoch": 7.727610277736522, + "grad_norm": 0.8765859007835388, + "learning_rate": 2.2723897222634787e-06, + "loss": 0.0548, "step": 52030 }, - { - "epoch": 3.8645477498886085, - "grad_norm": 2.8395814895629883, - "learning_rate": 6.81271350066835e-06, - "loss": 0.0593, + { + "epoch": 7.729095499777217, + "grad_norm": 0.8252843022346497, + "learning_rate": 2.270904500222783e-06, + "loss": 0.0569, "step": 52040 }, { - "epoch": 3.865290360908956, - "grad_norm": 1.4814605712890625, - "learning_rate": 6.808257834546265e-06, - "loss": 0.0911, + "epoch": 7.730580721817912, + "grad_norm": 1.28359854221344, + "learning_rate": 2.2694192781820884e-06, + "loss": 0.0654, "step": 52050 }, { - "epoch": 3.8660329719293034, - "grad_norm": 1.4447940587997437, - "learning_rate": 6.80380216842418e-06, - "loss": 0.0592, + "epoch": 7.732065943858607, + "grad_norm": 0.8124073147773743, + "learning_rate": 2.2679340561413933e-06, + "loss": 0.0544, "step": 52060 }, { - "epoch": 3.866775582949651, - "grad_norm": 1.7888092994689941, - "learning_rate": 6.799346502302094e-06, - "loss": 0.0895, + "epoch": 7.733551165899302, + "grad_norm": 0.8168864250183105, + "learning_rate": 2.266448834100698e-06, + "loss": 0.061, "step": 52070 }, { - "epoch": 3.8675181939699987, - "grad_norm": 1.2364249229431152, - "learning_rate": 6.794890836180009e-06, - "loss": 0.0805, + "epoch": 7.7350363879399975, + "grad_norm": 1.260461449623108, + "learning_rate": 2.2649636120600034e-06, + "loss": 0.057, "step": 52080 }, { - "epoch": 3.868260804990346, - "grad_norm": 1.0468952655792236, - "learning_rate": 6.790435170057924e-06, - "loss": 0.0674, + "epoch": 7.736521609980692, + "grad_norm": 0.668998122215271, + "learning_rate": 2.263478390019308e-06, + "loss": 0.0472, "step": 52090 }, { - "epoch": 3.8690034160106936, - "grad_norm": 1.610219120979309, - "learning_rate": 6.785979503935839e-06, - "loss": 0.0458, + "epoch": 7.738006832021387, + "grad_norm": 1.221310019493103, + "learning_rate": 2.261993167978613e-06, + "loss": 0.0428, "step": 52100 }, { - "epoch": 3.869746027031041, - "grad_norm": 0.4736784100532532, - "learning_rate": 6.781523837813753e-06, - "loss": 0.0512, + "epoch": 7.739492054062082, + "grad_norm": 0.6181626319885254, + "learning_rate": 2.260507945937918e-06, + "loss": 0.067, "step": 52110 }, { - "epoch": 3.8704886380513885, - "grad_norm": 1.5783486366271973, - "learning_rate": 6.777068171691668e-06, - "loss": 0.089, + "epoch": 7.740977276102777, + "grad_norm": 0.6286818981170654, + "learning_rate": 2.259022723897223e-06, + "loss": 0.0441, "step": 52120 }, { - "epoch": 3.8712312490717364, - "grad_norm": 1.8247394561767578, - "learning_rate": 6.772612505569582e-06, - "loss": 0.0809, + "epoch": 7.742462498143473, + "grad_norm": 0.8674864172935486, + "learning_rate": 2.2575375018565277e-06, + "loss": 0.0601, "step": 52130 }, { - "epoch": 3.871973860092084, - "grad_norm": 1.473613977432251, - "learning_rate": 6.768156839447497e-06, - "loss": 0.0699, + "epoch": 7.743947720184168, + "grad_norm": 0.7923988103866577, + "learning_rate": 2.2560522798158326e-06, + "loss": 0.0443, "step": 52140 }, { - "epoch": 3.8727164711124313, - "grad_norm": 0.9467633962631226, - "learning_rate": 6.763701173325412e-06, - "loss": 0.0398, + "epoch": 7.745432942224863, + "grad_norm": 1.3793833255767822, + "learning_rate": 2.2545670577751374e-06, + "loss": 0.0513, "step": 52150 }, { - "epoch": 3.873459082132779, - "grad_norm": 0.7337872982025146, - "learning_rate": 6.759245507203327e-06, - "loss": 0.0428, + "epoch": 7.746918164265558, + "grad_norm": 1.1016931533813477, + "learning_rate": 2.2530818357344423e-06, + "loss": 0.0556, "step": 52160 }, { - "epoch": 3.8742016931531262, - "grad_norm": 1.1722044944763184, - "learning_rate": 6.7547898410812415e-06, - "loss": 0.0658, + "epoch": 7.7484033863062525, + "grad_norm": 0.9921779632568359, + "learning_rate": 2.251596613693747e-06, + "loss": 0.0636, "step": 52170 }, { - "epoch": 3.874944304173474, - "grad_norm": 1.2315232753753662, - "learning_rate": 6.7503341749591565e-06, - "loss": 0.0714, + "epoch": 7.749888608346948, + "grad_norm": 0.7895703911781311, + "learning_rate": 2.2501113916530524e-06, + "loss": 0.0809, "step": 52180 }, { - "epoch": 3.8756869151938216, - "grad_norm": 1.0262686014175415, - "learning_rate": 6.7458785088370715e-06, - "loss": 0.0493, + "epoch": 7.751373830387643, + "grad_norm": 0.9937105774879456, + "learning_rate": 2.2486261696123573e-06, + "loss": 0.0633, "step": 52190 }, { - "epoch": 3.876429526214169, - "grad_norm": 2.3308000564575195, - "learning_rate": 6.741422842714986e-06, - "loss": 0.0469, + "epoch": 7.752859052428338, + "grad_norm": 0.8154398202896118, + "learning_rate": 2.247140947571662e-06, + "loss": 0.0518, "step": 52200 }, { - "epoch": 3.8771721372345165, - "grad_norm": 0.9160858392715454, - "learning_rate": 6.7369671765929014e-06, - "loss": 0.0738, + "epoch": 7.754344274469033, + "grad_norm": 0.7080826163291931, + "learning_rate": 2.245655725530967e-06, + "loss": 0.0548, "step": 52210 }, { - "epoch": 3.877914748254864, - "grad_norm": 0.8617852926254272, - "learning_rate": 6.732511510470816e-06, - "loss": 0.0674, + "epoch": 7.755829496509728, + "grad_norm": 0.2519676089286804, + "learning_rate": 2.244170503490272e-06, + "loss": 0.058, "step": 52220 }, { - "epoch": 3.878657359275212, - "grad_norm": 0.7218712568283081, - "learning_rate": 6.728055844348731e-06, - "loss": 0.0494, + "epoch": 7.757314718550424, + "grad_norm": 0.39050978422164917, + "learning_rate": 2.242685281449577e-06, + "loss": 0.0608, "step": 52230 }, { - "epoch": 3.8793999702955593, - "grad_norm": 2.578873872756958, - "learning_rate": 6.723600178226645e-06, + "epoch": 7.7587999405911185, + "grad_norm": 0.906412661075592, + "learning_rate": 2.2412000594088816e-06, "loss": 0.0685, "step": 52240 }, { - "epoch": 3.8801425813159067, - "grad_norm": 0.5948718786239624, - "learning_rate": 6.71914451210456e-06, - "loss": 0.0504, + "epoch": 7.7602851626318134, + "grad_norm": 0.9142884612083435, + "learning_rate": 2.239714837368187e-06, + "loss": 0.0591, "step": 52250 }, { - "epoch": 3.880885192336254, - "grad_norm": 3.3981211185455322, - "learning_rate": 6.714688845982474e-06, - "loss": 0.0623, + "epoch": 7.761770384672508, + "grad_norm": 0.45599719882011414, + "learning_rate": 2.2382296153274917e-06, + "loss": 0.0588, "step": 52260 }, { - "epoch": 3.8816278033566016, - "grad_norm": 1.991584300994873, - "learning_rate": 6.71023317986039e-06, - "loss": 0.0566, + "epoch": 7.763255606713203, + "grad_norm": 0.7237184643745422, + "learning_rate": 2.2367443932867966e-06, + "loss": 0.0518, "step": 52270 }, { - "epoch": 3.8823704143769495, - "grad_norm": 0.5625025629997253, - "learning_rate": 6.705777513738304e-06, - "loss": 0.043, + "epoch": 7.764740828753899, + "grad_norm": 1.3857414722442627, + "learning_rate": 2.2352591712461014e-06, + "loss": 0.0556, "step": 52280 }, { - "epoch": 3.883113025397297, - "grad_norm": 0.8824495673179626, - "learning_rate": 6.701321847616219e-06, - "loss": 0.0552, + "epoch": 7.766226050794594, + "grad_norm": 0.861685037612915, + "learning_rate": 2.2337739492054063e-06, + "loss": 0.0654, "step": 52290 }, { - "epoch": 3.8838556364176444, - "grad_norm": 0.5333496928215027, - "learning_rate": 6.696866181494133e-06, - "loss": 0.061, + "epoch": 7.767711272835289, + "grad_norm": 0.43121787905693054, + "learning_rate": 2.232288727164711e-06, + "loss": 0.0532, "step": 52300 }, { - "epoch": 3.884598247437992, - "grad_norm": 0.932508647441864, - "learning_rate": 6.692410515372048e-06, - "loss": 0.044, + "epoch": 7.769196494875984, + "grad_norm": 0.9009015560150146, + "learning_rate": 2.2308035051240164e-06, + "loss": 0.046, "step": 52310 }, { - "epoch": 3.8853408584583393, - "grad_norm": 0.28375789523124695, - "learning_rate": 6.687954849249964e-06, - "loss": 0.0445, + "epoch": 7.770681716916679, + "grad_norm": 0.5191366672515869, + "learning_rate": 2.2293182830833213e-06, + "loss": 0.0521, "step": 52320 }, { - "epoch": 3.886083469478687, - "grad_norm": 0.8071836829185486, - "learning_rate": 6.683499183127878e-06, - "loss": 0.1011, + "epoch": 7.772166938957374, + "grad_norm": 1.2074165344238281, + "learning_rate": 2.227833061042626e-06, + "loss": 0.0428, "step": 52330 }, { - "epoch": 3.8868260804990347, - "grad_norm": 1.178012490272522, - "learning_rate": 6.679043517005793e-06, - "loss": 0.0465, + "epoch": 7.773652160998069, + "grad_norm": 0.3799234926700592, + "learning_rate": 2.226347839001931e-06, + "loss": 0.0617, "step": 52340 }, { - "epoch": 3.887568691519382, - "grad_norm": 0.8823719620704651, - "learning_rate": 6.674587850883707e-06, - "loss": 0.045, + "epoch": 7.775137383038764, + "grad_norm": 0.7373781800270081, + "learning_rate": 2.224862616961236e-06, + "loss": 0.0699, "step": 52350 }, { - "epoch": 3.8883113025397296, - "grad_norm": 1.4806567430496216, - "learning_rate": 6.670132184761622e-06, - "loss": 0.0486, + "epoch": 7.776622605079459, + "grad_norm": 0.8077521920204163, + "learning_rate": 2.2233773949205407e-06, + "loss": 0.0534, "step": 52360 }, { - "epoch": 3.889053913560077, - "grad_norm": 1.835952877998352, - "learning_rate": 6.665676518639536e-06, - "loss": 0.0594, + "epoch": 7.778107827120154, + "grad_norm": 0.6925342082977295, + "learning_rate": 2.2218921728798456e-06, + "loss": 0.0478, "step": 52370 }, { - "epoch": 3.889796524580425, - "grad_norm": 1.964036226272583, - "learning_rate": 6.661220852517452e-06, - "loss": 0.0783, + "epoch": 7.77959304916085, + "grad_norm": 0.9443783760070801, + "learning_rate": 2.220406950839151e-06, + "loss": 0.0657, "step": 52380 }, { - "epoch": 3.8905391356007724, - "grad_norm": 0.7686440944671631, - "learning_rate": 6.656765186395366e-06, - "loss": 0.0448, + "epoch": 7.781078271201545, + "grad_norm": 0.4062022268772125, + "learning_rate": 2.2189217287984553e-06, + "loss": 0.0429, "step": 52390 }, { - "epoch": 3.89128174662112, - "grad_norm": 0.5452464818954468, - "learning_rate": 6.652309520273281e-06, - "loss": 0.0475, + "epoch": 7.78256349324224, + "grad_norm": 0.4872238337993622, + "learning_rate": 2.2174365067577606e-06, + "loss": 0.0584, "step": 52400 }, { - "epoch": 3.8920243576414673, - "grad_norm": 1.948075532913208, - "learning_rate": 6.647853854151195e-06, - "loss": 0.0631, + "epoch": 7.7840487152829345, + "grad_norm": 0.7286458611488342, + "learning_rate": 2.2159512847170654e-06, + "loss": 0.0433, "step": 52410 }, { - "epoch": 3.8927669686618147, - "grad_norm": 1.347221851348877, - "learning_rate": 6.64339818802911e-06, - "loss": 0.0502, + "epoch": 7.785533937323629, + "grad_norm": 1.2079678773880005, + "learning_rate": 2.2144660626763703e-06, + "loss": 0.0717, "step": 52420 }, { - "epoch": 3.8935095796821626, - "grad_norm": 0.37357455492019653, - "learning_rate": 6.6389425219070245e-06, - "loss": 0.0583, + "epoch": 7.787019159364325, + "grad_norm": 0.7324601411819458, + "learning_rate": 2.212980840635675e-06, + "loss": 0.0735, "step": 52430 }, { - "epoch": 3.89425219070251, - "grad_norm": 1.4739619493484497, - "learning_rate": 6.63448685578494e-06, - "loss": 0.0433, + "epoch": 7.78850438140502, + "grad_norm": 0.6645302176475525, + "learning_rate": 2.21149561859498e-06, + "loss": 0.0494, "step": 52440 }, { - "epoch": 3.8949948017228575, - "grad_norm": 0.9696072936058044, - "learning_rate": 6.630031189662855e-06, - "loss": 0.0642, + "epoch": 7.789989603445715, + "grad_norm": 0.8328620195388794, + "learning_rate": 2.2100103965542853e-06, + "loss": 0.051, "step": 52450 }, { - "epoch": 3.895737412743205, - "grad_norm": 2.1969690322875977, - "learning_rate": 6.6255755235407695e-06, - "loss": 0.042, + "epoch": 7.79147482548641, + "grad_norm": 0.7536907196044922, + "learning_rate": 2.20852517451359e-06, + "loss": 0.0669, "step": 52460 }, { - "epoch": 3.8964800237635524, - "grad_norm": 1.8756181001663208, - "learning_rate": 6.6211198574186845e-06, - "loss": 0.0537, + "epoch": 7.792960047527105, + "grad_norm": 0.7514217495918274, + "learning_rate": 2.207039952472895e-06, + "loss": 0.0474, "step": 52470 }, { - "epoch": 3.8972226347839003, - "grad_norm": 0.8761207461357117, - "learning_rate": 6.616664191296599e-06, - "loss": 0.0534, + "epoch": 7.794445269567801, + "grad_norm": 0.9645404815673828, + "learning_rate": 2.2055547304322e-06, + "loss": 0.0658, "step": 52480 }, { - "epoch": 3.8979652458042477, - "grad_norm": 1.1076685190200806, - "learning_rate": 6.612208525174514e-06, - "loss": 0.0536, + "epoch": 7.7959304916084955, + "grad_norm": 0.726373016834259, + "learning_rate": 2.2040695083915047e-06, + "loss": 0.0662, "step": 52490 }, { - "epoch": 3.898707856824595, - "grad_norm": 0.5765098333358765, - "learning_rate": 6.607752859052429e-06, - "loss": 0.0318, + "epoch": 7.79741571364919, + "grad_norm": 0.6940188407897949, + "learning_rate": 2.2025842863508096e-06, + "loss": 0.0476, "step": 52500 }, { - "epoch": 3.899450467844943, - "grad_norm": 1.015160322189331, - "learning_rate": 6.603297192930344e-06, - "loss": 0.0931, + "epoch": 7.798900935689885, + "grad_norm": 0.8864266276359558, + "learning_rate": 2.2010990643101144e-06, + "loss": 0.0607, "step": 52510 }, { - "epoch": 3.90019307886529, - "grad_norm": 1.242473840713501, - "learning_rate": 6.598841526808258e-06, - "loss": 0.0466, + "epoch": 7.800386157730581, + "grad_norm": 0.7016078233718872, + "learning_rate": 2.1996138422694193e-06, + "loss": 0.0354, "step": 52520 }, { - "epoch": 3.900935689885638, - "grad_norm": 0.7415289878845215, - "learning_rate": 6.594385860686173e-06, - "loss": 0.0643, + "epoch": 7.801871379771276, + "grad_norm": 0.42285242676734924, + "learning_rate": 2.1981286202287245e-06, + "loss": 0.0515, "step": 52530 }, { - "epoch": 3.9016783009059854, - "grad_norm": 1.0019993782043457, - "learning_rate": 6.589930194564087e-06, + "epoch": 7.803356601811971, + "grad_norm": 1.1683651208877563, + "learning_rate": 2.196643398188029e-06, "loss": 0.0633, "step": 52540 }, { - "epoch": 3.902420911926333, - "grad_norm": 3.3935482501983643, - "learning_rate": 6.585474528442002e-06, - "loss": 0.071, + "epoch": 7.804841823852666, + "grad_norm": 1.3887553215026855, + "learning_rate": 2.1951581761473343e-06, + "loss": 0.0682, "step": 52550 }, { - "epoch": 3.903163522946681, - "grad_norm": 2.050471067428589, - "learning_rate": 6.581018862319917e-06, - "loss": 0.0605, + "epoch": 7.806327045893361, + "grad_norm": 0.7513377070426941, + "learning_rate": 2.193672954106639e-06, + "loss": 0.0351, "step": 52560 }, { - "epoch": 3.9039061339670282, - "grad_norm": 1.7919012308120728, - "learning_rate": 6.576563196197832e-06, - "loss": 0.0388, + "epoch": 7.8078122679340565, + "grad_norm": 0.6102052330970764, + "learning_rate": 2.192187732065944e-06, + "loss": 0.0512, "step": 52570 }, { - "epoch": 3.9046487449873757, - "grad_norm": 2.5567381381988525, - "learning_rate": 6.572107530075747e-06, - "loss": 0.072, + "epoch": 7.809297489974751, + "grad_norm": 0.46565932035446167, + "learning_rate": 2.1907025100252493e-06, + "loss": 0.067, "step": 52580 }, { - "epoch": 3.905391356007723, - "grad_norm": 1.0397082567214966, - "learning_rate": 6.567651863953661e-06, - "loss": 0.0877, + "epoch": 7.810782712015446, + "grad_norm": 0.8263485431671143, + "learning_rate": 2.1892172879845537e-06, + "loss": 0.0484, "step": 52590 }, { - "epoch": 3.9061339670280706, - "grad_norm": 0.6068091988563538, - "learning_rate": 6.563196197831576e-06, - "loss": 0.0714, + "epoch": 7.812267934056141, + "grad_norm": 0.8608595132827759, + "learning_rate": 2.187732065943859e-06, + "loss": 0.0549, "step": 52600 }, { - "epoch": 3.9068765780484185, - "grad_norm": 1.3227723836898804, - "learning_rate": 6.55874053170949e-06, - "loss": 0.0669, + "epoch": 7.813753156096837, + "grad_norm": 1.4060004949569702, + "learning_rate": 2.186246843903164e-06, + "loss": 0.0529, "step": 52610 }, { - "epoch": 3.907619189068766, - "grad_norm": 1.6749495267868042, - "learning_rate": 6.554284865587406e-06, - "loss": 0.0689, + "epoch": 7.815238378137532, + "grad_norm": 0.8970702886581421, + "learning_rate": 2.1847616218624687e-06, + "loss": 0.0636, "step": 52620 }, { - "epoch": 3.9083618000891134, - "grad_norm": 1.0209654569625854, - "learning_rate": 6.54982919946532e-06, - "loss": 0.0847, + "epoch": 7.816723600178227, + "grad_norm": 0.3874322175979614, + "learning_rate": 2.1832763998217735e-06, + "loss": 0.0545, "step": 52630 }, { - "epoch": 3.909104411109461, - "grad_norm": 1.9074591398239136, - "learning_rate": 6.545373533343235e-06, - "loss": 0.0379, + "epoch": 7.818208822218922, + "grad_norm": 2.098299264907837, + "learning_rate": 2.1817911777810784e-06, + "loss": 0.0362, "step": 52640 }, { - "epoch": 3.9098470221298083, - "grad_norm": 0.3264058828353882, - "learning_rate": 6.540917867221149e-06, - "loss": 0.0683, + "epoch": 7.819694044259617, + "grad_norm": 0.3663277328014374, + "learning_rate": 2.1803059557403833e-06, + "loss": 0.0573, "step": 52650 }, { - "epoch": 3.910589633150156, - "grad_norm": 1.0117005109786987, - "learning_rate": 6.536462201099064e-06, - "loss": 0.0578, + "epoch": 7.821179266300312, + "grad_norm": 0.8399795889854431, + "learning_rate": 2.1788207336996885e-06, + "loss": 0.0484, "step": 52660 }, { - "epoch": 3.9113322441705036, - "grad_norm": 2.336249589920044, - "learning_rate": 6.532006534976979e-06, - "loss": 0.0476, + "epoch": 7.822664488341007, + "grad_norm": 1.1129577159881592, + "learning_rate": 2.177335511658993e-06, + "loss": 0.0467, "step": 52670 }, { - "epoch": 3.912074855190851, - "grad_norm": 0.9599561095237732, - "learning_rate": 6.527550868854894e-06, - "loss": 0.0638, + "epoch": 7.824149710381702, + "grad_norm": 0.6233702898025513, + "learning_rate": 2.1758502896182982e-06, + "loss": 0.0825, "step": 52680 }, { - "epoch": 3.9128174662111985, - "grad_norm": 1.9813097715377808, - "learning_rate": 6.5230952027328085e-06, - "loss": 0.0477, + "epoch": 7.825634932422397, + "grad_norm": 1.300802230834961, + "learning_rate": 2.174365067577603e-06, + "loss": 0.0493, "step": 52690 }, { - "epoch": 3.913560077231546, - "grad_norm": 1.9416104555130005, - "learning_rate": 6.5186395366107235e-06, - "loss": 0.1075, + "epoch": 7.827120154463092, + "grad_norm": 0.338610976934433, + "learning_rate": 2.172879845536908e-06, + "loss": 0.0549, "step": 52700 }, { - "epoch": 3.914302688251894, - "grad_norm": 2.747821569442749, - "learning_rate": 6.5141838704886384e-06, - "loss": 0.0997, + "epoch": 7.828605376503788, + "grad_norm": 0.6561576128005981, + "learning_rate": 2.171394623496213e-06, + "loss": 0.0544, "step": 52710 }, { - "epoch": 3.9150452992722413, - "grad_norm": 0.8139704465866089, - "learning_rate": 6.509728204366553e-06, - "loss": 0.0475, + "epoch": 7.830090598544483, + "grad_norm": 0.48244038224220276, + "learning_rate": 2.1699094014555177e-06, + "loss": 0.0555, "step": 52720 }, { - "epoch": 3.9157879102925888, - "grad_norm": 3.535797357559204, - "learning_rate": 6.5052725382444684e-06, - "loss": 0.074, + "epoch": 7.8315758205851775, + "grad_norm": 0.2857292890548706, + "learning_rate": 2.168424179414823e-06, + "loss": 0.0516, "step": 52730 }, { - "epoch": 3.916530521312936, - "grad_norm": 1.303336501121521, - "learning_rate": 6.500816872122383e-06, - "loss": 0.0383, + "epoch": 7.833061042625872, + "grad_norm": 0.9775968194007874, + "learning_rate": 2.1669389573741274e-06, + "loss": 0.0556, "step": 52740 }, { - "epoch": 3.9172731323332837, - "grad_norm": 1.1751950979232788, - "learning_rate": 6.496361206000298e-06, - "loss": 0.0735, + "epoch": 7.834546264666567, + "grad_norm": 0.8055846691131592, + "learning_rate": 2.1654537353334327e-06, + "loss": 0.0591, "step": 52750 }, { - "epoch": 3.9180157433536316, - "grad_norm": 0.4327644407749176, - "learning_rate": 6.491905539878212e-06, - "loss": 0.0379, + "epoch": 7.836031486707263, + "grad_norm": 1.5750608444213867, + "learning_rate": 2.1639685132927375e-06, + "loss": 0.0636, "step": 52760 }, { - "epoch": 3.918758354373979, - "grad_norm": 1.1452745199203491, - "learning_rate": 6.487449873756127e-06, - "loss": 0.0403, + "epoch": 7.837516708747958, + "grad_norm": 0.9055830240249634, + "learning_rate": 2.1624832912520424e-06, + "loss": 0.0478, "step": 52770 }, { - "epoch": 3.9195009653943265, - "grad_norm": 0.25140857696533203, - "learning_rate": 6.482994207634041e-06, - "loss": 0.0589, + "epoch": 7.839001930788653, + "grad_norm": 0.4669037461280823, + "learning_rate": 2.1609980692113472e-06, + "loss": 0.0656, "step": 52780 }, { - "epoch": 3.920243576414674, - "grad_norm": 2.8998043537139893, - "learning_rate": 6.478538541511957e-06, - "loss": 0.0906, + "epoch": 7.840487152829348, + "grad_norm": 1.7319399118423462, + "learning_rate": 2.159512847170652e-06, + "loss": 0.0762, "step": 52790 }, { - "epoch": 3.9209861874350214, - "grad_norm": 0.8305198550224304, - "learning_rate": 6.474082875389871e-06, - "loss": 0.0389, + "epoch": 7.841972374870043, + "grad_norm": 0.9436827898025513, + "learning_rate": 2.158027625129957e-06, + "loss": 0.0523, "step": 52800 }, { - "epoch": 3.9217287984553693, - "grad_norm": 1.9127947092056274, - "learning_rate": 6.469627209267786e-06, - "loss": 0.0678, + "epoch": 7.8434575969107385, + "grad_norm": 1.1199607849121094, + "learning_rate": 2.1565424030892622e-06, + "loss": 0.0527, "step": 52810 }, { - "epoch": 3.9224714094757167, - "grad_norm": 3.7456071376800537, - "learning_rate": 6.4651715431457e-06, - "loss": 0.0809, + "epoch": 7.844942818951433, + "grad_norm": 0.680494487285614, + "learning_rate": 2.1550571810485667e-06, + "loss": 0.049, "step": 52820 }, { - "epoch": 3.923214020496064, - "grad_norm": 2.8850257396698, - "learning_rate": 6.460715877023615e-06, - "loss": 0.0541, + "epoch": 7.846428040992128, + "grad_norm": 1.027863621711731, + "learning_rate": 2.153571959007872e-06, + "loss": 0.0434, "step": 52830 }, { - "epoch": 3.9239566315164116, - "grad_norm": 0.6639874577522278, - "learning_rate": 6.45626021090153e-06, - "loss": 0.0512, + "epoch": 7.847913263032823, + "grad_norm": 0.5206725001335144, + "learning_rate": 2.152086736967177e-06, + "loss": 0.0404, "step": 52840 }, { - "epoch": 3.924699242536759, - "grad_norm": 1.1795817613601685, - "learning_rate": 6.451804544779445e-06, - "loss": 0.0508, + "epoch": 7.849398485073518, + "grad_norm": 0.6419275403022766, + "learning_rate": 2.1506015149264817e-06, + "loss": 0.0598, "step": 52850 }, { - "epoch": 3.925441853557107, - "grad_norm": 0.710468590259552, - "learning_rate": 6.44734887865736e-06, - "loss": 0.0463, + "epoch": 7.850883707114214, + "grad_norm": 1.9312747716903687, + "learning_rate": 2.1491162928857865e-06, + "loss": 0.0482, "step": 52860 }, { - "epoch": 3.9261844645774544, - "grad_norm": 1.7391959428787231, - "learning_rate": 6.442893212535274e-06, - "loss": 0.0873, + "epoch": 7.852368929154909, + "grad_norm": 0.9515159130096436, + "learning_rate": 2.1476310708450914e-06, + "loss": 0.0689, "step": 52870 }, { - "epoch": 3.926927075597802, - "grad_norm": 0.5861713290214539, - "learning_rate": 6.438437546413189e-06, - "loss": 0.0452, + "epoch": 7.853854151195604, + "grad_norm": 0.5467528700828552, + "learning_rate": 2.1461458488043967e-06, + "loss": 0.07, "step": 52880 }, { - "epoch": 3.9276696866181493, - "grad_norm": 0.8153517842292786, - "learning_rate": 6.433981880291103e-06, - "loss": 0.0403, + "epoch": 7.855339373236299, + "grad_norm": 0.9535314440727234, + "learning_rate": 2.144660626763701e-06, + "loss": 0.0678, "step": 52890 }, { - "epoch": 3.9284122976384968, - "grad_norm": 3.0007896423339844, - "learning_rate": 6.429526214169018e-06, - "loss": 0.0681, + "epoch": 7.8568245952769935, + "grad_norm": 0.6750079989433289, + "learning_rate": 2.1431754047230064e-06, + "loss": 0.0744, "step": 52900 }, { - "epoch": 3.9291549086588446, - "grad_norm": 2.487295627593994, - "learning_rate": 6.425070548046933e-06, - "loss": 0.0734, + "epoch": 7.858309817317689, + "grad_norm": 1.2106472253799438, + "learning_rate": 2.1416901826823112e-06, + "loss": 0.059, "step": 52910 }, { - "epoch": 3.929897519679192, - "grad_norm": 1.7912089824676514, - "learning_rate": 6.420614881924848e-06, - "loss": 0.0914, + "epoch": 7.859795039358384, + "grad_norm": 1.0857863426208496, + "learning_rate": 2.140204960641616e-06, + "loss": 0.0547, "step": 52920 }, { - "epoch": 3.9306401306995395, - "grad_norm": 3.3400092124938965, - "learning_rate": 6.416159215802762e-06, - "loss": 0.0571, + "epoch": 7.861280261399079, + "grad_norm": 1.1065659523010254, + "learning_rate": 2.138719738600921e-06, + "loss": 0.0556, "step": 52930 }, { - "epoch": 3.931382741719887, - "grad_norm": 2.8643431663513184, - "learning_rate": 6.411703549680677e-06, - "loss": 0.0645, + "epoch": 7.862765483439774, + "grad_norm": 1.1433991193771362, + "learning_rate": 2.137234516560226e-06, + "loss": 0.0623, "step": 52940 }, { - "epoch": 3.9321253527402344, - "grad_norm": 0.3890113830566406, - "learning_rate": 6.4072478835585915e-06, - "loss": 0.0606, + "epoch": 7.864250705480469, + "grad_norm": 0.4376066327095032, + "learning_rate": 2.1357492945195307e-06, + "loss": 0.0546, "step": 52950 }, { - "epoch": 3.9328679637605823, - "grad_norm": 0.5684772729873657, - "learning_rate": 6.4027922174365065e-06, - "loss": 0.0604, + "epoch": 7.865735927521165, + "grad_norm": 0.9114493727684021, + "learning_rate": 2.134264072478836e-06, + "loss": 0.0547, "step": 52960 }, { - "epoch": 3.93361057478093, - "grad_norm": 2.360518217086792, - "learning_rate": 6.398336551314422e-06, - "loss": 0.063, + "epoch": 7.86722114956186, + "grad_norm": 0.5998684167861938, + "learning_rate": 2.132778850438141e-06, + "loss": 0.045, "step": 52970 }, { - "epoch": 3.9343531858012772, - "grad_norm": 1.0846983194351196, - "learning_rate": 6.3938808851923365e-06, - "loss": 0.0421, + "epoch": 7.8687063716025545, + "grad_norm": 0.5542938113212585, + "learning_rate": 2.1312936283974456e-06, + "loss": 0.0506, "step": 52980 }, { - "epoch": 3.9350957968216247, - "grad_norm": 0.8970616459846497, - "learning_rate": 6.3894252190702515e-06, - "loss": 0.0558, + "epoch": 7.870191593643249, + "grad_norm": 0.6076810956001282, + "learning_rate": 2.1298084063567505e-06, + "loss": 0.0437, "step": 52990 }, { - "epoch": 3.935838407841972, - "grad_norm": 1.57839834690094, - "learning_rate": 6.384969552948166e-06, - "loss": 0.0796, + "epoch": 7.871676815683944, + "grad_norm": 0.691862165927887, + "learning_rate": 2.1283231843160554e-06, + "loss": 0.0686, "step": 53000 }, { - "epoch": 3.93658101886232, - "grad_norm": 1.5346392393112183, - "learning_rate": 6.380513886826081e-06, - "loss": 0.0477, + "epoch": 7.87316203772464, + "grad_norm": 1.2608790397644043, + "learning_rate": 2.1268379622753606e-06, + "loss": 0.0612, "step": 53010 }, { - "epoch": 3.9373236298826675, - "grad_norm": 1.0439153909683228, - "learning_rate": 6.376058220703996e-06, - "loss": 0.0641, + "epoch": 7.874647259765335, + "grad_norm": 0.6385157704353333, + "learning_rate": 2.125352740234665e-06, + "loss": 0.0643, "step": 53020 }, { - "epoch": 3.938066240903015, - "grad_norm": 0.8135676383972168, - "learning_rate": 6.371602554581911e-06, - "loss": 0.0671, + "epoch": 7.87613248180603, + "grad_norm": 1.4922287464141846, + "learning_rate": 2.1238675181939704e-06, + "loss": 0.0688, "step": 53030 }, { - "epoch": 3.9388088519233624, - "grad_norm": 2.3363993167877197, - "learning_rate": 6.367146888459825e-06, - "loss": 0.0387, + "epoch": 7.877617703846725, + "grad_norm": 0.8557190299034119, + "learning_rate": 2.122382296153275e-06, + "loss": 0.0517, "step": 53040 }, { - "epoch": 3.93955146294371, - "grad_norm": 1.2489509582519531, - "learning_rate": 6.36269122233774e-06, - "loss": 0.0375, + "epoch": 7.87910292588742, + "grad_norm": 0.8984439373016357, + "learning_rate": 2.12089707411258e-06, + "loss": 0.0448, "step": 53050 }, { - "epoch": 3.9402940739640577, - "grad_norm": 1.8481662273406982, - "learning_rate": 6.358235556215654e-06, - "loss": 0.0492, + "epoch": 7.8805881479281155, + "grad_norm": 0.6511837244033813, + "learning_rate": 2.119411852071885e-06, + "loss": 0.0536, "step": 53060 }, { - "epoch": 3.941036684984405, - "grad_norm": 2.6093268394470215, - "learning_rate": 6.353779890093569e-06, - "loss": 0.0814, + "epoch": 7.88207336996881, + "grad_norm": 0.7384523153305054, + "learning_rate": 2.1179266300311898e-06, + "loss": 0.0609, "step": 53070 }, { - "epoch": 3.9417792960047526, - "grad_norm": 1.122709035873413, - "learning_rate": 6.349324223971484e-06, - "loss": 0.0497, + "epoch": 7.883558592009505, + "grad_norm": 0.6342982649803162, + "learning_rate": 2.1164414079904946e-06, + "loss": 0.0501, "step": 53080 }, { - "epoch": 3.9425219070251005, - "grad_norm": 0.5797234177589417, - "learning_rate": 6.344868557849399e-06, - "loss": 0.0727, + "epoch": 7.8850438140502, + "grad_norm": 1.1879791021347046, + "learning_rate": 2.1149561859497995e-06, + "loss": 0.051, "step": 53090 }, { - "epoch": 3.9432645180454475, - "grad_norm": 0.6840558648109436, - "learning_rate": 6.340412891727314e-06, - "loss": 0.0617, + "epoch": 7.886529036090896, + "grad_norm": 1.1492778062820435, + "learning_rate": 2.1134709639091048e-06, + "loss": 0.0661, "step": 53100 }, { - "epoch": 3.9440071290657954, - "grad_norm": 0.6512880325317383, - "learning_rate": 6.335957225605228e-06, - "loss": 0.0461, + "epoch": 7.888014258131591, + "grad_norm": 1.4980379343032837, + "learning_rate": 2.1119857418684096e-06, + "loss": 0.0498, "step": 53110 }, { - "epoch": 3.944749740086143, - "grad_norm": 0.3409847319126129, - "learning_rate": 6.331501559483143e-06, - "loss": 0.0355, + "epoch": 7.889499480172286, + "grad_norm": 1.5005109310150146, + "learning_rate": 2.1105005198277145e-06, + "loss": 0.046, "step": 53120 }, { - "epoch": 3.9454923511064903, - "grad_norm": 1.1444979906082153, - "learning_rate": 6.327045893361057e-06, - "loss": 0.0465, + "epoch": 7.890984702212981, + "grad_norm": 0.8864477872848511, + "learning_rate": 2.1090152977870193e-06, + "loss": 0.0581, "step": 53130 }, { - "epoch": 3.946234962126838, - "grad_norm": 2.209327459335327, - "learning_rate": 6.322590227238973e-06, - "loss": 0.0566, + "epoch": 7.8924699242536756, + "grad_norm": 0.39365842938423157, + "learning_rate": 2.107530075746324e-06, + "loss": 0.0362, "step": 53140 }, { - "epoch": 3.9469775731471857, - "grad_norm": 0.7463454604148865, - "learning_rate": 6.318134561116887e-06, - "loss": 0.0632, + "epoch": 7.893955146294371, + "grad_norm": 0.6498399376869202, + "learning_rate": 2.106044853705629e-06, + "loss": 0.0612, "step": 53150 }, { - "epoch": 3.947720184167533, - "grad_norm": 0.4572658836841583, - "learning_rate": 6.313678894994802e-06, - "loss": 0.0592, + "epoch": 7.895440368335066, + "grad_norm": 1.087868094444275, + "learning_rate": 2.1045596316649343e-06, + "loss": 0.0404, "step": 53160 }, { - "epoch": 3.9484627951878806, - "grad_norm": 0.3454363942146301, - "learning_rate": 6.309223228872716e-06, - "loss": 0.0801, + "epoch": 7.896925590375761, + "grad_norm": 0.6875630617141724, + "learning_rate": 2.1030744096242388e-06, + "loss": 0.0685, "step": 53170 }, { - "epoch": 3.949205406208228, - "grad_norm": 1.0574936866760254, - "learning_rate": 6.304767562750631e-06, - "loss": 0.0735, + "epoch": 7.898410812416456, + "grad_norm": 1.0016955137252808, + "learning_rate": 2.101589187583544e-06, + "loss": 0.058, "step": 53180 }, { - "epoch": 3.949948017228576, - "grad_norm": 1.7936334609985352, - "learning_rate": 6.3003118966285455e-06, - "loss": 0.0525, + "epoch": 7.899896034457152, + "grad_norm": 0.6489076614379883, + "learning_rate": 2.100103965542849e-06, + "loss": 0.056, "step": 53190 }, { - "epoch": 3.9506906282489234, - "grad_norm": 1.0691609382629395, - "learning_rate": 6.295856230506461e-06, - "loss": 0.0766, + "epoch": 7.901381256497847, + "grad_norm": 0.7610294818878174, + "learning_rate": 2.0986187435021538e-06, + "loss": 0.0646, "step": 53200 }, { - "epoch": 3.951433239269271, - "grad_norm": 2.9121530055999756, - "learning_rate": 6.2914005643843755e-06, - "loss": 0.0436, + "epoch": 7.902866478538542, + "grad_norm": 0.771050214767456, + "learning_rate": 2.0971335214614586e-06, + "loss": 0.0504, "step": 53210 }, { - "epoch": 3.9521758502896183, - "grad_norm": 1.2134883403778076, - "learning_rate": 6.2869448982622904e-06, - "loss": 0.0619, + "epoch": 7.9043517005792365, + "grad_norm": 1.212473750114441, + "learning_rate": 2.0956482994207635e-06, + "loss": 0.0522, "step": 53220 }, { - "epoch": 3.9529184613099657, - "grad_norm": 3.7583858966827393, - "learning_rate": 6.282489232140205e-06, - "loss": 0.0716, + "epoch": 7.905836922619931, + "grad_norm": 0.8780522346496582, + "learning_rate": 2.0941630773800683e-06, + "loss": 0.0585, "step": 53230 }, { - "epoch": 3.9536610723303136, - "grad_norm": 0.6128131151199341, - "learning_rate": 6.27803356601812e-06, - "loss": 0.0474, + "epoch": 7.907322144660627, + "grad_norm": 1.4317315816879272, + "learning_rate": 2.092677855339373e-06, + "loss": 0.0556, "step": 53240 }, { - "epoch": 3.954403683350661, - "grad_norm": 2.04298734664917, - "learning_rate": 6.273577899896035e-06, - "loss": 0.0499, + "epoch": 7.908807366701322, + "grad_norm": 0.8244507312774658, + "learning_rate": 2.0911926332986785e-06, + "loss": 0.0405, "step": 53250 }, { - "epoch": 3.9551462943710085, - "grad_norm": 2.507197618484497, - "learning_rate": 6.26912223377395e-06, - "loss": 0.0556, + "epoch": 7.910292588742017, + "grad_norm": 1.3808245658874512, + "learning_rate": 2.0897074112579833e-06, + "loss": 0.0592, "step": 53260 }, { - "epoch": 3.955888905391356, - "grad_norm": 2.27921986579895, - "learning_rate": 6.2646665676518646e-06, - "loss": 0.0311, + "epoch": 7.911777810782712, + "grad_norm": 1.1323333978652954, + "learning_rate": 2.088222189217288e-06, + "loss": 0.0574, "step": 53270 }, { - "epoch": 3.9566315164117034, - "grad_norm": 1.2935408353805542, - "learning_rate": 6.260210901529779e-06, - "loss": 0.0436, + "epoch": 7.913263032823407, + "grad_norm": 0.7280920147895813, + "learning_rate": 2.086736967176593e-06, + "loss": 0.0584, "step": 53280 }, { - "epoch": 3.9573741274320513, - "grad_norm": 1.6734613180160522, - "learning_rate": 6.255755235407694e-06, - "loss": 0.0638, + "epoch": 7.914748254864103, + "grad_norm": 1.3350197076797485, + "learning_rate": 2.085251745135898e-06, + "loss": 0.0549, "step": 53290 }, { - "epoch": 3.9581167384523988, - "grad_norm": 2.51885724067688, - "learning_rate": 6.251299569285608e-06, - "loss": 0.063, + "epoch": 7.9162334769047975, + "grad_norm": 0.5780650973320007, + "learning_rate": 2.0837665230952028e-06, + "loss": 0.0705, "step": 53300 }, { - "epoch": 3.958859349472746, - "grad_norm": 1.2505862712860107, - "learning_rate": 6.246843903163523e-06, - "loss": 0.0401, + "epoch": 7.917718698945492, + "grad_norm": 0.8865066170692444, + "learning_rate": 2.082281301054508e-06, + "loss": 0.0623, "step": 53310 }, { - "epoch": 3.9596019604930937, - "grad_norm": 0.8314433097839355, - "learning_rate": 6.242388237041438e-06, - "loss": 0.0514, + "epoch": 7.919203920986187, + "grad_norm": 0.5735422372817993, + "learning_rate": 2.0807960790138125e-06, + "loss": 0.0536, "step": 53320 }, { - "epoch": 3.960344571513441, - "grad_norm": 1.3497314453125, - "learning_rate": 6.237932570919353e-06, - "loss": 0.0438, + "epoch": 7.920689143026882, + "grad_norm": 0.9639255404472351, + "learning_rate": 2.0793108569731178e-06, + "loss": 0.0496, "step": 53330 }, { - "epoch": 3.961087182533789, - "grad_norm": 0.9097846746444702, - "learning_rate": 6.233476904797267e-06, - "loss": 0.0561, + "epoch": 7.922174365067578, + "grad_norm": 1.7265294790267944, + "learning_rate": 2.0778256349324226e-06, + "loss": 0.055, "step": 53340 }, { - "epoch": 3.9618297935541364, - "grad_norm": 2.9179575443267822, - "learning_rate": 6.229021238675182e-06, - "loss": 0.0492, + "epoch": 7.923659587108273, + "grad_norm": 0.5997419953346252, + "learning_rate": 2.0763404128917275e-06, + "loss": 0.0474, "step": 53350 }, { - "epoch": 3.962572404574484, - "grad_norm": 1.4877798557281494, - "learning_rate": 6.224565572553096e-06, - "loss": 0.0712, + "epoch": 7.925144809148968, + "grad_norm": 0.8460346460342407, + "learning_rate": 2.0748551908510323e-06, + "loss": 0.0474, "step": 53360 }, { - "epoch": 3.9633150155948313, - "grad_norm": 0.9369436502456665, - "learning_rate": 6.220109906431012e-06, - "loss": 0.075, + "epoch": 7.926630031189663, + "grad_norm": 1.1648005247116089, + "learning_rate": 2.073369968810337e-06, + "loss": 0.073, "step": 53370 }, { - "epoch": 3.964057626615179, - "grad_norm": 1.126839280128479, - "learning_rate": 6.215654240308927e-06, - "loss": 0.0503, + "epoch": 7.928115253230358, + "grad_norm": 0.7186694145202637, + "learning_rate": 2.0718847467696425e-06, + "loss": 0.0489, "step": 53380 }, { - "epoch": 3.9648002376355267, - "grad_norm": 1.7786030769348145, - "learning_rate": 6.211198574186841e-06, - "loss": 0.0628, + "epoch": 7.929600475271053, + "grad_norm": 0.8861649632453918, + "learning_rate": 2.0703995247289473e-06, + "loss": 0.0594, "step": 53390 }, { - "epoch": 3.965542848655874, - "grad_norm": 1.3987665176391602, - "learning_rate": 6.206742908064756e-06, - "loss": 0.0942, + "epoch": 7.931085697311748, + "grad_norm": 0.42687320709228516, + "learning_rate": 2.068914302688252e-06, + "loss": 0.0817, "step": 53400 }, { - "epoch": 3.9662854596762216, - "grad_norm": 0.4520890712738037, - "learning_rate": 6.20228724194267e-06, - "loss": 0.0451, + "epoch": 7.932570919352443, + "grad_norm": 0.4874408543109894, + "learning_rate": 2.067429080647557e-06, + "loss": 0.0587, "step": 53410 }, { - "epoch": 3.967028070696569, - "grad_norm": 1.6644339561462402, - "learning_rate": 6.197831575820585e-06, - "loss": 0.0483, + "epoch": 7.934056141393138, + "grad_norm": 0.8394566774368286, + "learning_rate": 2.065943858606862e-06, + "loss": 0.0536, "step": 53420 }, { - "epoch": 3.9677706817169165, - "grad_norm": 1.37549889087677, - "learning_rate": 6.1933759096985e-06, - "loss": 0.0521, + "epoch": 7.935541363433833, + "grad_norm": 0.8180386424064636, + "learning_rate": 2.0644586365661667e-06, + "loss": 0.0621, "step": 53430 }, { - "epoch": 3.9685132927372644, - "grad_norm": 2.7035961151123047, - "learning_rate": 6.188920243576415e-06, - "loss": 0.0649, + "epoch": 7.937026585474529, + "grad_norm": 0.6095578670501709, + "learning_rate": 2.0629734145254716e-06, + "loss": 0.0527, "step": 53440 }, { - "epoch": 3.969255903757612, - "grad_norm": 1.5665085315704346, - "learning_rate": 6.184464577454329e-06, - "loss": 0.0313, + "epoch": 7.938511807515224, + "grad_norm": 1.1461389064788818, + "learning_rate": 2.0614881924847765e-06, + "loss": 0.0654, "step": 53450 }, { - "epoch": 3.9699985147779593, - "grad_norm": 1.6840803623199463, - "learning_rate": 6.180008911332244e-06, - "loss": 0.0966, + "epoch": 7.939997029555919, + "grad_norm": 1.306644082069397, + "learning_rate": 2.0600029704440817e-06, + "loss": 0.077, "step": 53460 }, { - "epoch": 3.9707411257983067, - "grad_norm": 2.5111865997314453, - "learning_rate": 6.1755532452101585e-06, - "loss": 0.0306, + "epoch": 7.9414822515966135, + "grad_norm": 0.6092720627784729, + "learning_rate": 2.058517748403386e-06, + "loss": 0.0553, "step": 53470 }, { - "epoch": 3.971483736818654, - "grad_norm": 1.9524160623550415, - "learning_rate": 6.1710975790880735e-06, - "loss": 0.0649, + "epoch": 7.942967473637308, + "grad_norm": 0.5493023991584778, + "learning_rate": 2.0570325263626915e-06, + "loss": 0.0502, "step": 53480 }, { - "epoch": 3.972226347839002, - "grad_norm": 1.0792032480239868, - "learning_rate": 6.1666419129659885e-06, - "loss": 0.07, + "epoch": 7.944452695678004, + "grad_norm": 1.4455081224441528, + "learning_rate": 2.0555473043219963e-06, + "loss": 0.0646, "step": 53490 }, { - "epoch": 3.9729689588593495, - "grad_norm": 1.4523978233337402, - "learning_rate": 6.1621862468439035e-06, - "loss": 0.0678, + "epoch": 7.945937917718699, + "grad_norm": 0.392264187335968, + "learning_rate": 2.054062082281301e-06, + "loss": 0.0629, "step": 53500 }, { - "epoch": 3.973711569879697, - "grad_norm": 2.432481527328491, - "learning_rate": 6.1577305807218185e-06, - "loss": 0.0852, + "epoch": 7.947423139759394, + "grad_norm": 0.8776362538337708, + "learning_rate": 2.0525768602406065e-06, + "loss": 0.0575, "step": 53510 }, { - "epoch": 3.9744541809000444, - "grad_norm": 1.813653588294983, - "learning_rate": 6.153274914599733e-06, - "loss": 0.0837, + "epoch": 7.948908361800089, + "grad_norm": 1.0791746377944946, + "learning_rate": 2.051091638199911e-06, + "loss": 0.047, "step": 53520 }, { - "epoch": 3.975196791920392, - "grad_norm": 0.7046365141868591, - "learning_rate": 6.148819248477648e-06, - "loss": 0.056, + "epoch": 7.950393583840784, + "grad_norm": 0.6361451148986816, + "learning_rate": 2.049606416159216e-06, + "loss": 0.0642, "step": 53530 }, { - "epoch": 3.9759394029407398, - "grad_norm": 2.6574409008026123, - "learning_rate": 6.144363582355562e-06, - "loss": 0.0592, + "epoch": 7.9518788058814796, + "grad_norm": 0.7630805373191833, + "learning_rate": 2.048121194118521e-06, + "loss": 0.0581, "step": 53540 }, { - "epoch": 3.9766820139610872, - "grad_norm": 2.38148832321167, - "learning_rate": 6.139907916233478e-06, - "loss": 0.0468, + "epoch": 7.9533640279221745, + "grad_norm": 1.1073448657989502, + "learning_rate": 2.046635972077826e-06, + "loss": 0.0448, "step": 53550 }, { - "epoch": 3.9774246249814347, - "grad_norm": 1.9013030529022217, - "learning_rate": 6.135452250111392e-06, - "loss": 0.0815, + "epoch": 7.954849249962869, + "grad_norm": 1.3098224401474, + "learning_rate": 2.0451507500371307e-06, + "loss": 0.0615, "step": 53560 }, { - "epoch": 3.978167236001782, - "grad_norm": 0.9605785608291626, - "learning_rate": 6.130996583989307e-06, - "loss": 0.0733, + "epoch": 7.956334472003564, + "grad_norm": 0.7846435308456421, + "learning_rate": 2.0436655279964356e-06, + "loss": 0.0545, "step": 53570 }, { - "epoch": 3.9789098470221296, - "grad_norm": 0.8357670903205872, - "learning_rate": 6.126540917867221e-06, - "loss": 0.0657, + "epoch": 7.957819694044259, + "grad_norm": 0.8411532044410706, + "learning_rate": 2.0421803059557404e-06, + "loss": 0.0473, "step": 53580 }, { - "epoch": 3.9796524580424775, - "grad_norm": 2.5137290954589844, - "learning_rate": 6.122085251745136e-06, - "loss": 0.0539, + "epoch": 7.959304916084955, + "grad_norm": 1.306350827217102, + "learning_rate": 2.0406950839150453e-06, + "loss": 0.0683, "step": 53590 }, { - "epoch": 3.980395069062825, - "grad_norm": 0.9419310688972473, - "learning_rate": 6.11762958562305e-06, - "loss": 0.0589, + "epoch": 7.96079013812565, + "grad_norm": 0.5132039189338684, + "learning_rate": 2.03920986187435e-06, + "loss": 0.0565, "step": 53600 }, { - "epoch": 3.9811376800831724, - "grad_norm": 1.140599012374878, - "learning_rate": 6.113173919500966e-06, - "loss": 0.0427, + "epoch": 7.962275360166345, + "grad_norm": 0.5860397815704346, + "learning_rate": 2.0377246398336554e-06, + "loss": 0.051, "step": 53610 }, { - "epoch": 3.98188029110352, - "grad_norm": 1.1350241899490356, - "learning_rate": 6.10871825337888e-06, - "loss": 0.0699, + "epoch": 7.96376058220704, + "grad_norm": 1.0573729276657104, + "learning_rate": 2.03623941779296e-06, + "loss": 0.0645, "step": 53620 }, { - "epoch": 3.9826229021238673, - "grad_norm": 1.3582016229629517, - "learning_rate": 6.104262587256795e-06, - "loss": 0.0442, + "epoch": 7.9652458042477345, + "grad_norm": 1.315981388092041, + "learning_rate": 2.034754195752265e-06, + "loss": 0.0467, "step": 53630 }, { - "epoch": 3.983365513144215, - "grad_norm": 1.5812486410140991, - "learning_rate": 6.09980692113471e-06, - "loss": 0.0749, + "epoch": 7.96673102628843, + "grad_norm": 0.9606397747993469, + "learning_rate": 2.03326897371157e-06, + "loss": 0.0536, "step": 53640 }, { - "epoch": 3.9841081241645626, - "grad_norm": 0.9100233912467957, - "learning_rate": 6.095351255012624e-06, - "loss": 0.0737, + "epoch": 7.968216248329125, + "grad_norm": 0.4387343227863312, + "learning_rate": 2.031783751670875e-06, + "loss": 0.0627, "step": 53650 }, { - "epoch": 3.98485073518491, - "grad_norm": 0.8678179383277893, - "learning_rate": 6.090895588890539e-06, - "loss": 0.0367, + "epoch": 7.96970147036982, + "grad_norm": 1.525460124015808, + "learning_rate": 2.03029852963018e-06, + "loss": 0.076, "step": 53660 }, { - "epoch": 3.985593346205258, - "grad_norm": 2.191160202026367, - "learning_rate": 6.086439922768454e-06, - "loss": 0.0931, + "epoch": 7.971186692410515, + "grad_norm": 0.7140664458274841, + "learning_rate": 2.0288133075894846e-06, + "loss": 0.0503, "step": 53670 }, { - "epoch": 3.986335957225605, - "grad_norm": 1.9324291944503784, - "learning_rate": 6.081984256646369e-06, - "loss": 0.0731, + "epoch": 7.972671914451211, + "grad_norm": 1.0060763359069824, + "learning_rate": 2.02732808554879e-06, + "loss": 0.0628, "step": 53680 }, { - "epoch": 3.987078568245953, - "grad_norm": 0.9478018879890442, - "learning_rate": 6.077528590524283e-06, - "loss": 0.0615, + "epoch": 7.974157136491906, + "grad_norm": 1.8016340732574463, + "learning_rate": 2.0258428635080947e-06, + "loss": 0.0574, "step": 53690 }, { - "epoch": 3.9878211792663003, - "grad_norm": 1.2666985988616943, - "learning_rate": 6.073072924402198e-06, - "loss": 0.0543, + "epoch": 7.975642358532601, + "grad_norm": 0.6302917003631592, + "learning_rate": 2.0243576414673996e-06, + "loss": 0.062, "step": 53700 }, { - "epoch": 3.9885637902866478, - "grad_norm": 1.0880041122436523, - "learning_rate": 6.0686172582801125e-06, - "loss": 0.0707, + "epoch": 7.9771275805732955, + "grad_norm": 0.5254745483398438, + "learning_rate": 2.0228724194267044e-06, + "loss": 0.0535, "step": 53710 }, { - "epoch": 3.9893064013069957, - "grad_norm": 0.5105621814727783, - "learning_rate": 6.064161592158028e-06, - "loss": 0.0686, + "epoch": 7.97861280261399, + "grad_norm": 0.3416903614997864, + "learning_rate": 2.0213871973860093e-06, + "loss": 0.0521, "step": 53720 }, { - "epoch": 3.990049012327343, - "grad_norm": 1.3909556865692139, - "learning_rate": 6.0597059260359424e-06, - "loss": 0.0491, + "epoch": 7.980098024654686, + "grad_norm": 0.40586283802986145, + "learning_rate": 2.019901975345314e-06, + "loss": 0.0724, "step": 53730 }, { - "epoch": 3.9907916233476906, - "grad_norm": 3.0516223907470703, - "learning_rate": 6.0552502599138574e-06, - "loss": 0.0681, + "epoch": 7.981583246695381, + "grad_norm": 1.3886269330978394, + "learning_rate": 2.0184167533046194e-06, + "loss": 0.0574, "step": 53740 }, { - "epoch": 3.991534234368038, - "grad_norm": 0.9059837460517883, - "learning_rate": 6.050794593791772e-06, - "loss": 0.034, + "epoch": 7.983068468736076, + "grad_norm": 1.389870524406433, + "learning_rate": 2.016931531263924e-06, + "loss": 0.0622, "step": 53750 }, { - "epoch": 3.9922768453883855, - "grad_norm": 2.0982439517974854, - "learning_rate": 6.046338927669687e-06, - "loss": 0.0533, + "epoch": 7.984553690776771, + "grad_norm": 0.6103036403656006, + "learning_rate": 2.015446309223229e-06, + "loss": 0.0473, "step": 53760 }, { - "epoch": 3.9930194564087333, - "grad_norm": 1.3619189262390137, - "learning_rate": 6.041883261547602e-06, - "loss": 0.053, + "epoch": 7.986038912817467, + "grad_norm": 0.42124733328819275, + "learning_rate": 2.013961087182534e-06, + "loss": 0.0668, "step": 53770 }, { - "epoch": 3.993762067429081, - "grad_norm": 0.7172215580940247, - "learning_rate": 6.0374275954255166e-06, - "loss": 0.0728, + "epoch": 7.987524134858162, + "grad_norm": 0.7079092264175415, + "learning_rate": 2.012475865141839e-06, + "loss": 0.052, "step": 53780 }, { - "epoch": 3.9945046784494282, - "grad_norm": 1.9413235187530518, - "learning_rate": 6.0329719293034316e-06, - "loss": 0.0779, + "epoch": 7.9890093568988565, + "grad_norm": 1.561933159828186, + "learning_rate": 2.0109906431011437e-06, + "loss": 0.0752, "step": 53790 }, { - "epoch": 3.9952472894697757, - "grad_norm": 2.7654523849487305, - "learning_rate": 6.028516263181346e-06, - "loss": 0.0537, + "epoch": 7.990494578939551, + "grad_norm": 0.95597904920578, + "learning_rate": 2.0095054210604486e-06, + "loss": 0.0502, "step": 53800 }, { - "epoch": 3.995989900490123, - "grad_norm": 1.1179438829421997, - "learning_rate": 6.024060597059261e-06, - "loss": 0.0586, + "epoch": 7.991979800980246, + "grad_norm": 0.5312618017196655, + "learning_rate": 2.008020199019754e-06, + "loss": 0.0446, "step": 53810 }, { - "epoch": 3.996732511510471, - "grad_norm": 2.428178071975708, - "learning_rate": 6.019604930937175e-06, - "loss": 0.0714, + "epoch": 7.993465023020942, + "grad_norm": 0.8646672368049622, + "learning_rate": 2.0065349769790583e-06, + "loss": 0.0554, "step": 53820 }, { - "epoch": 3.9974751225308185, - "grad_norm": 1.2658203840255737, - "learning_rate": 6.01514926481509e-06, - "loss": 0.0462, + "epoch": 7.994950245061637, + "grad_norm": 0.9731066823005676, + "learning_rate": 2.0050497549383636e-06, + "loss": 0.0663, "step": 53830 }, { - "epoch": 3.998217733551166, - "grad_norm": 0.42739203572273254, - "learning_rate": 6.010693598693005e-06, - "loss": 0.0538, + "epoch": 7.996435467102332, + "grad_norm": 0.6797077059745789, + "learning_rate": 2.0035645328976684e-06, + "loss": 0.05, "step": 53840 }, { - "epoch": 3.9989603445715134, - "grad_norm": 0.2762404978275299, - "learning_rate": 6.00623793257092e-06, - "loss": 0.0637, + "epoch": 7.997920689143027, + "grad_norm": 0.8420076966285706, + "learning_rate": 2.0020793108569733e-06, + "loss": 0.0582, "step": 53850 }, { - "epoch": 3.999702955591861, - "grad_norm": 0.41970357298851013, - "learning_rate": 6.001782266448834e-06, - "loss": 0.0443, + "epoch": 7.999405911183722, + "grad_norm": 0.8660672307014465, + "learning_rate": 2.000594088816278e-06, + "loss": 0.0575, "step": 53860 }, { - "epoch": 4.0, - "eval_f1": 0.0, - "eval_loss": 0.05353143438696861, - "eval_runtime": 798.3212, - "eval_samples_per_second": 47.624, - "eval_steps_per_second": 2.977, + "epoch": 8.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.05548640340566635, + "eval_runtime": 214.1959, + "eval_samples_per_second": 177.496, + "eval_steps_per_second": 5.551, "step": 53864 }, { - "epoch": 4.000445566612209, - "grad_norm": 1.3657422065734863, - "learning_rate": 5.997326600326749e-06, - "loss": 0.0483, + "epoch": 8.000891133224417, + "grad_norm": 1.6648470163345337, + "learning_rate": 1.999108866775583e-06, + "loss": 0.0673, "step": 53870 }, { - "epoch": 4.001188177632556, - "grad_norm": 1.7870548963546753, - "learning_rate": 5.992870934204663e-06, - "loss": 0.0755, + "epoch": 8.002376355265111, + "grad_norm": 0.6469563245773315, + "learning_rate": 1.997623644734888e-06, + "loss": 0.0668, "step": 53880 }, { - "epoch": 4.001930788652904, - "grad_norm": 1.3454580307006836, - "learning_rate": 5.988415268082578e-06, - "loss": 0.0706, + "epoch": 8.003861577305807, + "grad_norm": 1.2048108577728271, + "learning_rate": 1.996138422694193e-06, + "loss": 0.0616, "step": 53890 }, { - "epoch": 4.0026733996732515, - "grad_norm": 1.4166697263717651, - "learning_rate": 5.983959601960494e-06, - "loss": 0.0645, + "epoch": 8.005346799346503, + "grad_norm": 0.7064803838729858, + "learning_rate": 1.994653200653498e-06, + "loss": 0.0461, "step": 53900 }, { - "epoch": 4.0034160106935985, - "grad_norm": 1.9602590799331665, - "learning_rate": 5.979503935838408e-06, - "loss": 0.0707, + "epoch": 8.006832021387197, + "grad_norm": 1.7400462627410889, + "learning_rate": 1.993167978612803e-06, + "loss": 0.0562, "step": 53910 }, { - "epoch": 4.004158621713946, - "grad_norm": 3.5693228244781494, - "learning_rate": 5.975048269716323e-06, - "loss": 0.0585, + "epoch": 8.008317243427893, + "grad_norm": 0.8522458076477051, + "learning_rate": 1.9916827565721077e-06, + "loss": 0.0434, "step": 53920 }, { - "epoch": 4.004901232734293, - "grad_norm": 3.0163803100585938, - "learning_rate": 5.970592603594237e-06, - "loss": 0.0689, + "epoch": 8.009802465468587, + "grad_norm": 0.41414836049079895, + "learning_rate": 1.9901975345314126e-06, + "loss": 0.0457, "step": 53930 }, { - "epoch": 4.005643843754641, - "grad_norm": 0.6391358375549316, - "learning_rate": 5.966136937472152e-06, - "loss": 0.0442, + "epoch": 8.011287687509283, + "grad_norm": 1.1489344835281372, + "learning_rate": 1.9887123124907174e-06, + "loss": 0.0547, "step": 53940 }, { - "epoch": 4.006386454774989, - "grad_norm": 0.6875801086425781, - "learning_rate": 5.961681271350066e-06, - "loss": 0.0441, + "epoch": 8.012772909549978, + "grad_norm": 0.36201971769332886, + "learning_rate": 1.9872270904500223e-06, + "loss": 0.0544, "step": 53950 }, { - "epoch": 4.007129065795336, - "grad_norm": 2.397939443588257, - "learning_rate": 5.957225605227982e-06, - "loss": 0.067, + "epoch": 8.014258131590672, + "grad_norm": 0.940636157989502, + "learning_rate": 1.9857418684093276e-06, + "loss": 0.0553, "step": 53960 }, { - "epoch": 4.007871676815684, - "grad_norm": 1.6307581663131714, - "learning_rate": 5.952769939105896e-06, - "loss": 0.0316, + "epoch": 8.015743353631368, + "grad_norm": 0.7558504343032837, + "learning_rate": 1.984256646368632e-06, + "loss": 0.0613, "step": 53970 }, { - "epoch": 4.008614287836031, - "grad_norm": 1.195626974105835, - "learning_rate": 5.948314272983811e-06, - "loss": 0.0583, + "epoch": 8.017228575672062, + "grad_norm": 0.6963912844657898, + "learning_rate": 1.9827714243279373e-06, + "loss": 0.0521, "step": 53980 }, { - "epoch": 4.009356898856379, - "grad_norm": 2.5990028381347656, - "learning_rate": 5.9438586068617255e-06, - "loss": 0.079, + "epoch": 8.018713797712758, + "grad_norm": 1.4509975910186768, + "learning_rate": 1.981286202287242e-06, + "loss": 0.0622, "step": 53990 }, { - "epoch": 4.010099509876727, - "grad_norm": 1.327807068824768, - "learning_rate": 5.9394029407396405e-06, - "loss": 0.0702, + "epoch": 8.020199019753454, + "grad_norm": 1.1569229364395142, + "learning_rate": 1.979800980246547e-06, + "loss": 0.054, "step": 54000 }, { - "epoch": 4.010842120897074, - "grad_norm": 2.4777891635894775, - "learning_rate": 5.9349472746175555e-06, - "loss": 0.049, + "epoch": 8.021684241794148, + "grad_norm": 0.5657662749290466, + "learning_rate": 1.978315758205852e-06, + "loss": 0.0515, "step": 54010 }, { - "epoch": 4.011584731917422, - "grad_norm": 1.1965991258621216, - "learning_rate": 5.9304916084954705e-06, - "loss": 0.0718, + "epoch": 8.023169463834844, + "grad_norm": 0.42393553256988525, + "learning_rate": 1.9768305361651567e-06, + "loss": 0.0565, "step": 54020 }, { - "epoch": 4.012327342937769, - "grad_norm": 0.5554696321487427, - "learning_rate": 5.9260359423733855e-06, - "loss": 0.0433, + "epoch": 8.024654685875538, + "grad_norm": 0.5691762566566467, + "learning_rate": 1.975345314124462e-06, + "loss": 0.0533, "step": 54030 }, { - "epoch": 4.013069953958117, - "grad_norm": 1.6719565391540527, - "learning_rate": 5.9215802762513e-06, - "loss": 0.074, + "epoch": 8.026139907916233, + "grad_norm": 0.9995362758636475, + "learning_rate": 1.973860092083767e-06, + "loss": 0.0596, "step": 54040 }, { - "epoch": 4.013812564978465, - "grad_norm": 0.40224915742874146, - "learning_rate": 5.917124610129215e-06, - "loss": 0.0552, + "epoch": 8.02762512995693, + "grad_norm": 0.9841630458831787, + "learning_rate": 1.9723748700430717e-06, + "loss": 0.0577, "step": 54050 }, { - "epoch": 4.014555175998812, - "grad_norm": 0.9646987915039062, - "learning_rate": 5.912668944007129e-06, - "loss": 0.0513, + "epoch": 8.029110351997623, + "grad_norm": 0.9374364614486694, + "learning_rate": 1.9708896480023765e-06, + "loss": 0.0669, "step": 54060 }, { - "epoch": 4.0152977870191595, - "grad_norm": 2.081406593322754, - "learning_rate": 5.908213277885045e-06, - "loss": 0.0644, + "epoch": 8.030595574038319, + "grad_norm": 0.5411103367805481, + "learning_rate": 1.9694044259616814e-06, + "loss": 0.0609, "step": 54070 }, { - "epoch": 4.0160403980395065, - "grad_norm": 1.1674538850784302, - "learning_rate": 5.903757611762959e-06, - "loss": 0.048, + "epoch": 8.032080796079013, + "grad_norm": 1.2413806915283203, + "learning_rate": 1.9679192039209863e-06, + "loss": 0.0416, "step": 54080 }, { - "epoch": 4.016783009059854, - "grad_norm": 0.48358213901519775, - "learning_rate": 5.899301945640874e-06, - "loss": 0.0558, + "epoch": 8.033566018119709, + "grad_norm": 0.8985723257064819, + "learning_rate": 1.9664339818802915e-06, + "loss": 0.0606, "step": 54090 }, { - "epoch": 4.017525620080202, - "grad_norm": 0.6902279257774353, - "learning_rate": 5.894846279518788e-06, - "loss": 0.0671, + "epoch": 8.035051240160405, + "grad_norm": 0.7435330152511597, + "learning_rate": 1.964948759839596e-06, + "loss": 0.0462, "step": 54100 }, { - "epoch": 4.018268231100549, - "grad_norm": 1.8209834098815918, - "learning_rate": 5.890390613396703e-06, - "loss": 0.0673, + "epoch": 8.036536462201099, + "grad_norm": 0.7745005488395691, + "learning_rate": 1.9634635377989013e-06, + "loss": 0.0541, "step": 54110 }, { - "epoch": 4.019010842120897, - "grad_norm": 0.6729385256767273, - "learning_rate": 5.885934947274617e-06, - "loss": 0.0378, + "epoch": 8.038021684241794, + "grad_norm": 1.5749375820159912, + "learning_rate": 1.961978315758206e-06, + "loss": 0.0704, "step": 54120 }, { - "epoch": 4.019753453141244, - "grad_norm": 0.9727711081504822, - "learning_rate": 5.881479281152533e-06, - "loss": 0.0405, + "epoch": 8.039506906282488, + "grad_norm": 1.094252109527588, + "learning_rate": 1.960493093717511e-06, + "loss": 0.0847, "step": 54130 }, { - "epoch": 4.020496064161592, - "grad_norm": 0.18303215503692627, - "learning_rate": 5.877023615030447e-06, - "loss": 0.0852, + "epoch": 8.040992128323184, + "grad_norm": 0.38332584500312805, + "learning_rate": 1.959007871676816e-06, + "loss": 0.0425, "step": 54140 }, { - "epoch": 4.02123867518194, - "grad_norm": 1.9007530212402344, - "learning_rate": 5.872567948908362e-06, - "loss": 0.0734, + "epoch": 8.04247735036388, + "grad_norm": 0.9757881760597229, + "learning_rate": 1.9575226496361207e-06, + "loss": 0.0611, "step": 54150 }, { - "epoch": 4.021981286202287, - "grad_norm": 1.277612566947937, - "learning_rate": 5.868112282786277e-06, - "loss": 0.055, + "epoch": 8.043962572404574, + "grad_norm": 0.6582308411598206, + "learning_rate": 1.956037427595426e-06, + "loss": 0.044, "step": 54160 }, { - "epoch": 4.022723897222635, - "grad_norm": 0.40179601311683655, - "learning_rate": 5.863656616664191e-06, - "loss": 0.0591, + "epoch": 8.04544779444527, + "grad_norm": 1.326470971107483, + "learning_rate": 1.9545522055547304e-06, + "loss": 0.0569, "step": 54170 }, { - "epoch": 4.023466508242982, - "grad_norm": 0.6786103248596191, - "learning_rate": 5.859200950542106e-06, - "loss": 0.0633, + "epoch": 8.046933016485964, + "grad_norm": 1.4490509033203125, + "learning_rate": 1.9530669835140357e-06, + "loss": 0.0623, "step": 54180 }, { - "epoch": 4.02420911926333, - "grad_norm": 2.203526496887207, - "learning_rate": 5.854745284420021e-06, - "loss": 0.0759, + "epoch": 8.04841823852666, + "grad_norm": 0.7956948280334473, + "learning_rate": 1.9515817614733405e-06, + "loss": 0.0505, "step": 54190 }, { - "epoch": 4.024951730283678, - "grad_norm": 1.4047437906265259, - "learning_rate": 5.850289618297936e-06, - "loss": 0.0761, + "epoch": 8.049903460567355, + "grad_norm": 0.8142948746681213, + "learning_rate": 1.9500965394326454e-06, + "loss": 0.0596, "step": 54200 }, { - "epoch": 4.025694341304025, - "grad_norm": 1.809567928314209, - "learning_rate": 5.84583395217585e-06, - "loss": 0.0656, + "epoch": 8.05138868260805, + "grad_norm": 0.8596924543380737, + "learning_rate": 1.9486113173919502e-06, + "loss": 0.0393, "step": 54210 }, { - "epoch": 4.026436952324373, - "grad_norm": 1.946758508682251, - "learning_rate": 5.841378286053765e-06, - "loss": 0.0509, + "epoch": 8.052873904648745, + "grad_norm": 0.9736611247062683, + "learning_rate": 1.947126095351255e-06, + "loss": 0.0543, "step": 54220 }, { - "epoch": 4.02717956334472, - "grad_norm": 1.9087995290756226, - "learning_rate": 5.8369226199316795e-06, - "loss": 0.0697, + "epoch": 8.05435912668944, + "grad_norm": 1.1759473085403442, + "learning_rate": 1.94564087331056e-06, + "loss": 0.048, "step": 54230 }, { - "epoch": 4.0279221743650675, - "grad_norm": 0.6901232600212097, - "learning_rate": 5.8324669538095944e-06, - "loss": 0.0574, + "epoch": 8.055844348730135, + "grad_norm": 0.7489706873893738, + "learning_rate": 1.9441556512698652e-06, + "loss": 0.0539, "step": 54240 }, { - "epoch": 4.028664785385415, - "grad_norm": 1.0720127820968628, - "learning_rate": 5.8280112876875094e-06, - "loss": 0.0663, + "epoch": 8.05732957077083, + "grad_norm": 0.7064876556396484, + "learning_rate": 1.9426704292291697e-06, + "loss": 0.0752, "step": 54250 }, { - "epoch": 4.029407396405762, - "grad_norm": 1.3058216571807861, - "learning_rate": 5.8235556215654244e-06, - "loss": 0.046, + "epoch": 8.058814792811525, + "grad_norm": 1.0841573476791382, + "learning_rate": 1.941185207188475e-06, + "loss": 0.051, "step": 54260 }, { - "epoch": 4.03015000742611, - "grad_norm": 0.79048752784729, - "learning_rate": 5.819099955443339e-06, - "loss": 0.0757, + "epoch": 8.06030001485222, + "grad_norm": 0.6828540563583374, + "learning_rate": 1.93969998514778e-06, + "loss": 0.0469, "step": 54270 }, { - "epoch": 4.030892618446458, - "grad_norm": 2.9405195713043213, - "learning_rate": 5.814644289321254e-06, - "loss": 0.0786, + "epoch": 8.061785236892916, + "grad_norm": 0.9682624936103821, + "learning_rate": 1.9382147631070847e-06, + "loss": 0.0491, "step": 54280 }, { - "epoch": 4.031635229466805, - "grad_norm": 2.264370918273926, - "learning_rate": 5.8101886231991686e-06, - "loss": 0.0659, + "epoch": 8.06327045893361, + "grad_norm": 0.630560040473938, + "learning_rate": 1.9367295410663895e-06, + "loss": 0.0502, "step": 54290 }, { - "epoch": 4.032377840487153, - "grad_norm": 0.7332669496536255, - "learning_rate": 5.805732957077083e-06, - "loss": 0.0595, + "epoch": 8.064755680974306, + "grad_norm": 0.7623802423477173, + "learning_rate": 1.9352443190256944e-06, + "loss": 0.0487, "step": 54300 }, { - "epoch": 4.0331204515075, - "grad_norm": 2.585928440093994, - "learning_rate": 5.8012772909549986e-06, - "loss": 0.0937, + "epoch": 8.066240903015, + "grad_norm": 0.6193575263023376, + "learning_rate": 1.9337590969849997e-06, + "loss": 0.0449, "step": 54310 }, { - "epoch": 4.033863062527848, - "grad_norm": 0.5826427340507507, - "learning_rate": 5.796821624832913e-06, - "loss": 0.0509, + "epoch": 8.067726125055696, + "grad_norm": 0.35763445496559143, + "learning_rate": 1.932273874944304e-06, + "loss": 0.0441, "step": 54320 }, { - "epoch": 4.034605673548196, - "grad_norm": 1.9038110971450806, - "learning_rate": 5.792365958710828e-06, - "loss": 0.0811, + "epoch": 8.069211347096392, + "grad_norm": 0.6113510131835938, + "learning_rate": 1.9307886529036094e-06, + "loss": 0.0609, "step": 54330 }, { - "epoch": 4.035348284568543, - "grad_norm": 0.9601470232009888, - "learning_rate": 5.787910292588742e-06, - "loss": 0.0506, + "epoch": 8.070696569137086, + "grad_norm": 0.9819941520690918, + "learning_rate": 1.9293034308629142e-06, + "loss": 0.0749, "step": 54340 }, { - "epoch": 4.036090895588891, - "grad_norm": 0.9888731241226196, - "learning_rate": 5.783454626466657e-06, - "loss": 0.0703, + "epoch": 8.072181791177782, + "grad_norm": 1.105443000793457, + "learning_rate": 1.927818208822219e-06, + "loss": 0.0552, "step": 54350 }, { - "epoch": 4.036833506609238, - "grad_norm": 0.9209118485450745, - "learning_rate": 5.778998960344572e-06, - "loss": 0.0736, + "epoch": 8.073667013218476, + "grad_norm": 1.36037278175354, + "learning_rate": 1.926332986781524e-06, + "loss": 0.0667, "step": 54360 }, { - "epoch": 4.037576117629586, - "grad_norm": 2.1321849822998047, - "learning_rate": 5.774543294222487e-06, - "loss": 0.058, + "epoch": 8.075152235259171, + "grad_norm": 0.5551352500915527, + "learning_rate": 1.924847764740829e-06, + "loss": 0.0636, "step": 54370 }, { - "epoch": 4.038318728649934, - "grad_norm": 0.4780378043651581, - "learning_rate": 5.770087628100401e-06, - "loss": 0.0451, + "epoch": 8.076637457299867, + "grad_norm": 1.235351324081421, + "learning_rate": 1.9233625427001337e-06, + "loss": 0.0578, "step": 54380 }, { - "epoch": 4.039061339670281, - "grad_norm": 0.6175082921981812, - "learning_rate": 5.765631961978316e-06, - "loss": 0.0457, + "epoch": 8.078122679340561, + "grad_norm": 0.669603168964386, + "learning_rate": 1.921877320659439e-06, + "loss": 0.0491, "step": 54390 }, { - "epoch": 4.0398039506906285, - "grad_norm": 0.8569179773330688, - "learning_rate": 5.76117629585623e-06, - "loss": 0.063, + "epoch": 8.079607901381257, + "grad_norm": 0.5650709271430969, + "learning_rate": 1.9203920986187434e-06, + "loss": 0.0635, "step": 54400 }, { - "epoch": 4.0405465617109755, - "grad_norm": 0.5725436210632324, - "learning_rate": 5.756720629734145e-06, - "loss": 0.0555, + "epoch": 8.081093123421951, + "grad_norm": 1.0669199228286743, + "learning_rate": 1.9189068765780487e-06, + "loss": 0.0924, "step": 54410 }, { - "epoch": 4.041289172731323, - "grad_norm": 1.1730387210845947, - "learning_rate": 5.752264963612061e-06, - "loss": 0.0586, + "epoch": 8.082578345462647, + "grad_norm": 0.9767273664474487, + "learning_rate": 1.9174216545373535e-06, + "loss": 0.0671, "step": 54420 }, { - "epoch": 4.042031783751671, - "grad_norm": 1.527571678161621, - "learning_rate": 5.747809297489975e-06, - "loss": 0.0381, + "epoch": 8.084063567503343, + "grad_norm": 1.03615403175354, + "learning_rate": 1.9159364324966584e-06, + "loss": 0.0466, "step": 54430 }, { - "epoch": 4.042774394772018, - "grad_norm": 1.1510217189788818, - "learning_rate": 5.74335363136789e-06, - "loss": 0.0494, + "epoch": 8.085548789544037, + "grad_norm": 0.7880136370658875, + "learning_rate": 1.9144512104559636e-06, + "loss": 0.0654, "step": 54440 }, { - "epoch": 4.043517005792366, - "grad_norm": 2.92785382270813, - "learning_rate": 5.738897965245804e-06, - "loss": 0.0484, + "epoch": 8.087034011584732, + "grad_norm": 1.0869358777999878, + "learning_rate": 1.912965988415268e-06, + "loss": 0.0612, "step": 54450 }, { - "epoch": 4.044259616812713, - "grad_norm": 1.290939211845398, - "learning_rate": 5.734442299123719e-06, - "loss": 0.0585, + "epoch": 8.088519233625426, + "grad_norm": 1.2350759506225586, + "learning_rate": 1.9114807663745734e-06, + "loss": 0.0502, "step": 54460 }, { - "epoch": 4.045002227833061, - "grad_norm": 1.4122437238693237, - "learning_rate": 5.729986633001633e-06, - "loss": 0.0517, + "epoch": 8.090004455666122, + "grad_norm": 1.341839075088501, + "learning_rate": 1.9099955443338782e-06, + "loss": 0.0443, "step": 54470 }, { - "epoch": 4.045744838853409, - "grad_norm": 1.2548693418502808, - "learning_rate": 5.725530966879549e-06, - "loss": 0.0642, + "epoch": 8.091489677706818, + "grad_norm": 0.43474024534225464, + "learning_rate": 1.908510322293183e-06, + "loss": 0.041, "step": 54480 }, { - "epoch": 4.046487449873756, - "grad_norm": 2.637096405029297, - "learning_rate": 5.721075300757463e-06, - "loss": 0.0761, + "epoch": 8.092974899747512, + "grad_norm": 1.368962287902832, + "learning_rate": 1.9070251002524877e-06, + "loss": 0.066, "step": 54490 }, { - "epoch": 4.047230060894104, - "grad_norm": 1.5033966302871704, - "learning_rate": 5.716619634635378e-06, - "loss": 0.0705, + "epoch": 8.094460121788208, + "grad_norm": 1.0121878385543823, + "learning_rate": 1.9055398782117928e-06, + "loss": 0.069, "step": 54500 }, { - "epoch": 4.047972671914451, - "grad_norm": 0.40369749069213867, - "learning_rate": 5.7121639685132925e-06, - "loss": 0.0678, + "epoch": 8.095945343828902, + "grad_norm": 0.4158135652542114, + "learning_rate": 1.9040546561710976e-06, + "loss": 0.0453, "step": 54510 }, { - "epoch": 4.048715282934799, - "grad_norm": 2.240159511566162, - "learning_rate": 5.7077083023912075e-06, - "loss": 0.0721, + "epoch": 8.097430565869598, + "grad_norm": 0.7408210039138794, + "learning_rate": 1.9025694341304027e-06, + "loss": 0.0662, "step": 54520 }, { - "epoch": 4.049457893955147, - "grad_norm": 2.353022336959839, - "learning_rate": 5.703252636269122e-06, - "loss": 0.0597, + "epoch": 8.098915787910293, + "grad_norm": 0.5974799990653992, + "learning_rate": 1.9010842120897074e-06, + "loss": 0.0491, "step": 54530 }, { - "epoch": 4.050200504975494, - "grad_norm": 0.9646291732788086, - "learning_rate": 5.6987969701470375e-06, - "loss": 0.049, + "epoch": 8.100401009950987, + "grad_norm": 1.0753979682922363, + "learning_rate": 1.8995989900490124e-06, + "loss": 0.0648, "step": 54540 }, { - "epoch": 4.050943115995842, - "grad_norm": 2.1946423053741455, - "learning_rate": 5.6943413040249525e-06, - "loss": 0.062, + "epoch": 8.101886231991683, + "grad_norm": 0.9504005908966064, + "learning_rate": 1.8981137680083175e-06, + "loss": 0.0495, "step": 54550 }, { - "epoch": 4.051685727016189, - "grad_norm": 2.869677782058716, - "learning_rate": 5.689885637902867e-06, - "loss": 0.0553, + "epoch": 8.103371454032377, + "grad_norm": 0.9773847460746765, + "learning_rate": 1.8966285459676224e-06, + "loss": 0.0656, "step": 54560 }, { - "epoch": 4.0524283380365365, - "grad_norm": 3.2481913566589355, - "learning_rate": 5.685429971780782e-06, - "loss": 0.0373, + "epoch": 8.104856676073073, + "grad_norm": 0.8386691808700562, + "learning_rate": 1.8951433239269274e-06, + "loss": 0.0468, "step": 54570 }, { - "epoch": 4.053170949056884, - "grad_norm": 1.6126598119735718, - "learning_rate": 5.680974305658696e-06, - "loss": 0.0596, + "epoch": 8.106341898113769, + "grad_norm": 0.9314102530479431, + "learning_rate": 1.893658101886232e-06, + "loss": 0.0618, "step": 54580 }, { - "epoch": 4.053913560077231, - "grad_norm": 1.128563404083252, - "learning_rate": 5.676518639536611e-06, - "loss": 0.0334, + "epoch": 8.107827120154463, + "grad_norm": 0.8424334526062012, + "learning_rate": 1.8921728798455371e-06, + "loss": 0.0651, "step": 54590 }, { - "epoch": 4.054656171097579, - "grad_norm": 2.0686044692993164, - "learning_rate": 5.672062973414526e-06, - "loss": 0.0672, + "epoch": 8.109312342195159, + "grad_norm": 1.1387025117874146, + "learning_rate": 1.890687657804842e-06, + "loss": 0.0633, "step": 54600 }, { - "epoch": 4.055398782117926, - "grad_norm": 0.4722823202610016, - "learning_rate": 5.667607307292441e-06, - "loss": 0.0368, + "epoch": 8.110797564235853, + "grad_norm": 1.1821174621582031, + "learning_rate": 1.889202435764147e-06, + "loss": 0.0672, "step": 54610 }, { - "epoch": 4.056141393138274, - "grad_norm": 1.7301435470581055, - "learning_rate": 5.663151641170355e-06, - "loss": 0.0736, + "epoch": 8.112282786276548, + "grad_norm": 0.7393181920051575, + "learning_rate": 1.8877172137234517e-06, + "loss": 0.0564, "step": 54620 }, { - "epoch": 4.056884004158622, - "grad_norm": 0.7229349613189697, - "learning_rate": 5.65869597504827e-06, - "loss": 0.0901, + "epoch": 8.113768008317244, + "grad_norm": 0.8846901655197144, + "learning_rate": 1.8862319916827568e-06, + "loss": 0.0624, "step": 54630 }, { - "epoch": 4.057626615178969, - "grad_norm": 2.0059196949005127, - "learning_rate": 5.654240308926184e-06, - "loss": 0.0608, + "epoch": 8.115253230357938, + "grad_norm": 0.6640163660049438, + "learning_rate": 1.8847467696420616e-06, + "loss": 0.0529, "step": 54640 }, { - "epoch": 4.058369226199317, - "grad_norm": 0.5810075402259827, - "learning_rate": 5.649784642804099e-06, - "loss": 0.0438, + "epoch": 8.116738452398634, + "grad_norm": 1.2793275117874146, + "learning_rate": 1.8832615476013665e-06, + "loss": 0.0632, "step": 54650 }, { - "epoch": 4.059111837219664, - "grad_norm": 1.114890456199646, - "learning_rate": 5.645328976682014e-06, - "loss": 0.0457, + "epoch": 8.118223674439328, + "grad_norm": 0.901630163192749, + "learning_rate": 1.8817763255606713e-06, + "loss": 0.0518, "step": 54660 }, { - "epoch": 4.059854448240012, - "grad_norm": 1.2894550561904907, - "learning_rate": 5.640873310559929e-06, - "loss": 0.0744, + "epoch": 8.119708896480024, + "grad_norm": 0.7542802095413208, + "learning_rate": 1.8802911035199764e-06, + "loss": 0.0547, "step": 54670 }, { - "epoch": 4.06059705926036, - "grad_norm": 0.7137770652770996, - "learning_rate": 5.636417644437843e-06, - "loss": 0.0785, + "epoch": 8.12119411852072, + "grad_norm": 0.851569652557373, + "learning_rate": 1.8788058814792813e-06, + "loss": 0.0511, "step": 54680 }, { - "epoch": 4.061339670280707, - "grad_norm": 2.466371774673462, - "learning_rate": 5.631961978315758e-06, - "loss": 0.0536, + "epoch": 8.122679340561413, + "grad_norm": 1.1253145933151245, + "learning_rate": 1.8773206594385861e-06, + "loss": 0.0523, "step": 54690 }, { - "epoch": 4.062082281301055, - "grad_norm": 0.39749160408973694, - "learning_rate": 5.627506312193673e-06, - "loss": 0.0582, + "epoch": 8.12416456260211, + "grad_norm": 0.4665616750717163, + "learning_rate": 1.8758354373978912e-06, + "loss": 0.0569, "step": 54700 }, { - "epoch": 4.062824892321402, - "grad_norm": 1.1517406702041626, - "learning_rate": 5.623050646071588e-06, - "loss": 0.0545, + "epoch": 8.125649784642803, + "grad_norm": 1.1059377193450928, + "learning_rate": 1.874350215357196e-06, + "loss": 0.0588, "step": 54710 }, { - "epoch": 4.0635675033417495, - "grad_norm": 0.7695569396018982, - "learning_rate": 5.618594979949503e-06, - "loss": 0.0449, + "epoch": 8.127135006683499, + "grad_norm": 0.6096968650817871, + "learning_rate": 1.8728649933165011e-06, + "loss": 0.0698, "step": 54720 }, { - "epoch": 4.064310114362097, - "grad_norm": 0.4918759763240814, - "learning_rate": 5.614139313827417e-06, - "loss": 0.0566, + "epoch": 8.128620228724195, + "grad_norm": 1.5796551704406738, + "learning_rate": 1.8713797712758058e-06, + "loss": 0.0657, "step": 54730 }, { - "epoch": 4.065052725382444, - "grad_norm": 1.6468467712402344, - "learning_rate": 5.609683647705332e-06, - "loss": 0.052, + "epoch": 8.130105450764889, + "grad_norm": 0.34364181756973267, + "learning_rate": 1.8698945492351108e-06, + "loss": 0.0465, "step": 54740 }, { - "epoch": 4.065795336402792, - "grad_norm": 1.0345146656036377, - "learning_rate": 5.6052279815832464e-06, - "loss": 0.0394, + "epoch": 8.131590672805585, + "grad_norm": 0.48883184790611267, + "learning_rate": 1.8684093271944157e-06, + "loss": 0.0468, "step": 54750 }, { - "epoch": 4.066537947423139, - "grad_norm": 3.2710659503936768, - "learning_rate": 5.6007723154611614e-06, - "loss": 0.071, + "epoch": 8.133075894846279, + "grad_norm": 1.1846975088119507, + "learning_rate": 1.8669241051537208e-06, + "loss": 0.0745, "step": 54760 }, { - "epoch": 4.067280558443487, - "grad_norm": 2.8637590408325195, - "learning_rate": 5.5963166493390764e-06, - "loss": 0.0476, + "epoch": 8.134561116886974, + "grad_norm": 0.8794407248497009, + "learning_rate": 1.8654388831130254e-06, + "loss": 0.0448, "step": 54770 }, { - "epoch": 4.068023169463835, - "grad_norm": 0.5226951837539673, - "learning_rate": 5.5918609832169914e-06, - "loss": 0.0442, + "epoch": 8.13604633892767, + "grad_norm": 0.44761931896209717, + "learning_rate": 1.8639536610723305e-06, + "loss": 0.0586, "step": 54780 }, { - "epoch": 4.068765780484182, - "grad_norm": 0.48805347084999084, - "learning_rate": 5.587405317094906e-06, - "loss": 0.044, + "epoch": 8.137531560968364, + "grad_norm": 1.2459882497787476, + "learning_rate": 1.8624684390316353e-06, + "loss": 0.0445, "step": 54790 }, { - "epoch": 4.06950839150453, - "grad_norm": 0.6936028599739075, - "learning_rate": 5.5829496509728206e-06, - "loss": 0.0593, + "epoch": 8.13901678300906, + "grad_norm": 1.1183502674102783, + "learning_rate": 1.8609832169909404e-06, + "loss": 0.0768, "step": 54800 }, { - "epoch": 4.070251002524877, - "grad_norm": 2.0860705375671387, - "learning_rate": 5.578493984850735e-06, - "loss": 0.0653, + "epoch": 8.140502005049754, + "grad_norm": 0.39576178789138794, + "learning_rate": 1.859497994950245e-06, + "loss": 0.0579, "step": 54810 }, { - "epoch": 4.070993613545225, - "grad_norm": 1.1754940748214722, - "learning_rate": 5.57403831872865e-06, - "loss": 0.0628, + "epoch": 8.14198722709045, + "grad_norm": 1.019203782081604, + "learning_rate": 1.8580127729095501e-06, + "loss": 0.0515, "step": 54820 }, { - "epoch": 4.071736224565573, - "grad_norm": 1.029878854751587, - "learning_rate": 5.5695826526065656e-06, - "loss": 0.0553, + "epoch": 8.143472449131146, + "grad_norm": 1.2380660772323608, + "learning_rate": 1.8565275508688552e-06, + "loss": 0.054, "step": 54830 }, { - "epoch": 4.07247883558592, - "grad_norm": 0.6767436861991882, - "learning_rate": 5.56512698648448e-06, - "loss": 0.0671, + "epoch": 8.14495767117184, + "grad_norm": 1.7992037534713745, + "learning_rate": 1.8550423288281598e-06, + "loss": 0.0564, "step": 54840 }, { - "epoch": 4.073221446606268, - "grad_norm": 1.4418343305587769, - "learning_rate": 5.560671320362395e-06, - "loss": 0.0582, + "epoch": 8.146442893212535, + "grad_norm": 0.9479325413703918, + "learning_rate": 1.853557106787465e-06, + "loss": 0.0535, "step": 54850 }, { - "epoch": 4.073964057626615, - "grad_norm": 0.44620373845100403, - "learning_rate": 5.556215654240309e-06, - "loss": 0.0845, + "epoch": 8.14792811525323, + "grad_norm": 0.8101366758346558, + "learning_rate": 1.8520718847467698e-06, + "loss": 0.0534, "step": 54860 }, { - "epoch": 4.074706668646963, - "grad_norm": 1.5117290019989014, - "learning_rate": 5.551759988118224e-06, - "loss": 0.0614, + "epoch": 8.149413337293925, + "grad_norm": 0.4467363655567169, + "learning_rate": 1.8505866627060748e-06, + "loss": 0.0601, "step": 54870 }, { - "epoch": 4.0754492796673105, - "grad_norm": 1.3769358396530151, - "learning_rate": 5.547304321996138e-06, - "loss": 0.0551, + "epoch": 8.150898559334621, + "grad_norm": 0.7251670956611633, + "learning_rate": 1.8491014406653795e-06, + "loss": 0.0597, "step": 54880 }, { - "epoch": 4.0761918906876575, - "grad_norm": 1.5998951196670532, - "learning_rate": 5.542848655874054e-06, - "loss": 0.0743, + "epoch": 8.152383781375315, + "grad_norm": 0.28151366114616394, + "learning_rate": 1.8476162186246845e-06, + "loss": 0.0508, "step": 54890 }, { - "epoch": 4.076934501708005, - "grad_norm": 1.0898070335388184, - "learning_rate": 5.538392989751968e-06, - "loss": 0.0667, + "epoch": 8.15386900341601, + "grad_norm": 0.4550783038139343, + "learning_rate": 1.8461309965839894e-06, + "loss": 0.0586, "step": 54900 }, { - "epoch": 4.077677112728353, - "grad_norm": 1.2157825231552124, - "learning_rate": 5.533937323629883e-06, - "loss": 0.0511, + "epoch": 8.155354225456707, + "grad_norm": 0.6677719354629517, + "learning_rate": 1.8446457745432945e-06, + "loss": 0.0546, "step": 54910 }, { - "epoch": 4.0784197237487, - "grad_norm": 2.0952835083007812, - "learning_rate": 5.529481657507797e-06, - "loss": 0.0447, + "epoch": 8.1568394474974, + "grad_norm": 1.1175285577774048, + "learning_rate": 1.8431605525025991e-06, + "loss": 0.0489, "step": 54920 }, { - "epoch": 4.079162334769048, - "grad_norm": 1.1729780435562134, - "learning_rate": 5.525025991385712e-06, - "loss": 0.1045, + "epoch": 8.158324669538096, + "grad_norm": 0.46331268548965454, + "learning_rate": 1.8416753304619042e-06, + "loss": 0.0525, "step": 54930 }, { - "epoch": 4.079904945789395, - "grad_norm": 1.0873030424118042, - "learning_rate": 5.520570325263626e-06, - "loss": 0.062, + "epoch": 8.15980989157879, + "grad_norm": 1.2049654722213745, + "learning_rate": 1.840190108421209e-06, + "loss": 0.0496, "step": 54940 }, { - "epoch": 4.080647556809743, - "grad_norm": 0.5280413627624512, - "learning_rate": 5.516114659141542e-06, - "loss": 0.0579, + "epoch": 8.161295113619486, + "grad_norm": 0.4104573130607605, + "learning_rate": 1.838704886380514e-06, + "loss": 0.0521, "step": 54950 }, { - "epoch": 4.081390167830091, - "grad_norm": 1.2786023616790771, - "learning_rate": 5.511658993019457e-06, - "loss": 0.0453, + "epoch": 8.162780335660182, + "grad_norm": 0.787076473236084, + "learning_rate": 1.8372196643398192e-06, + "loss": 0.0641, "step": 54960 }, { - "epoch": 4.082132778850438, - "grad_norm": 1.6670242547988892, - "learning_rate": 5.507203326897371e-06, - "loss": 0.0718, + "epoch": 8.164265557700876, + "grad_norm": 0.6896853446960449, + "learning_rate": 1.8357344422991238e-06, + "loss": 0.0499, "step": 54970 }, { - "epoch": 4.082875389870786, - "grad_norm": 0.9172728657722473, - "learning_rate": 5.502747660775286e-06, - "loss": 0.0472, + "epoch": 8.165750779741572, + "grad_norm": 1.139472246170044, + "learning_rate": 1.8342492202584289e-06, + "loss": 0.0451, "step": 54980 }, { - "epoch": 4.083618000891133, - "grad_norm": 0.3648128807544708, - "learning_rate": 5.4982919946532e-06, - "loss": 0.0824, + "epoch": 8.167236001782266, + "grad_norm": 1.1083751916885376, + "learning_rate": 1.8327639982177337e-06, + "loss": 0.0552, "step": 54990 }, { - "epoch": 4.084360611911481, - "grad_norm": 3.186699151992798, - "learning_rate": 5.493836328531115e-06, - "loss": 0.0656, + "epoch": 8.168721223822962, + "grad_norm": 1.0357023477554321, + "learning_rate": 1.8312787761770386e-06, + "loss": 0.0471, "step": 55000 }, { - "epoch": 4.085103222931829, - "grad_norm": 1.5202915668487549, - "learning_rate": 5.48938066240903e-06, - "loss": 0.0586, + "epoch": 8.170206445863657, + "grad_norm": 0.9517379999160767, + "learning_rate": 1.8297935541363435e-06, + "loss": 0.055, "step": 55010 }, { - "epoch": 4.085845833952176, - "grad_norm": 1.0740153789520264, - "learning_rate": 5.484924996286945e-06, - "loss": 0.0612, + "epoch": 8.171691667904351, + "grad_norm": 0.9328690767288208, + "learning_rate": 1.8283083320956485e-06, + "loss": 0.0599, "step": 55020 }, { - "epoch": 4.086588444972524, - "grad_norm": 1.715743899345398, - "learning_rate": 5.4804693301648595e-06, - "loss": 0.0613, + "epoch": 8.173176889945047, + "grad_norm": 0.6500536799430847, + "learning_rate": 1.8268231100549532e-06, + "loss": 0.0545, "step": 55030 }, { - "epoch": 4.087331055992871, - "grad_norm": 2.118396520614624, - "learning_rate": 5.4760136640427745e-06, - "loss": 0.0698, + "epoch": 8.174662111985741, + "grad_norm": 0.9316378235816956, + "learning_rate": 1.8253378880142582e-06, + "loss": 0.0348, "step": 55040 }, { - "epoch": 4.0880736670132185, - "grad_norm": 0.9983229637145996, - "learning_rate": 5.471557997920689e-06, - "loss": 0.0509, + "epoch": 8.176147334026437, + "grad_norm": 0.9480088353157043, + "learning_rate": 1.823852665973563e-06, + "loss": 0.0593, "step": 55050 }, { - "epoch": 4.088816278033566, - "grad_norm": 2.4664018154144287, - "learning_rate": 5.4671023317986045e-06, - "loss": 0.0678, + "epoch": 8.177632556067133, + "grad_norm": 0.7086759209632874, + "learning_rate": 1.8223674439328682e-06, + "loss": 0.0699, "step": 55060 }, { - "epoch": 4.089558889053913, - "grad_norm": 0.848099410533905, - "learning_rate": 5.462646665676519e-06, - "loss": 0.0499, + "epoch": 8.179117778107827, + "grad_norm": 1.6740468740463257, + "learning_rate": 1.8208822218921728e-06, + "loss": 0.0744, "step": 55070 }, { - "epoch": 4.090301500074261, - "grad_norm": 1.8434141874313354, - "learning_rate": 5.458190999554434e-06, - "loss": 0.0476, + "epoch": 8.180603000148523, + "grad_norm": 1.1423155069351196, + "learning_rate": 1.8193969998514779e-06, + "loss": 0.0532, "step": 55080 }, { - "epoch": 4.091044111094608, - "grad_norm": 1.2754813432693481, - "learning_rate": 5.453735333432349e-06, - "loss": 0.0586, + "epoch": 8.182088222189217, + "grad_norm": 0.4345645606517792, + "learning_rate": 1.817911777810783e-06, + "loss": 0.0519, "step": 55090 }, { - "epoch": 4.091786722114956, - "grad_norm": 0.441261887550354, - "learning_rate": 5.449279667310263e-06, - "loss": 0.0583, + "epoch": 8.183573444229912, + "grad_norm": 0.9073742032051086, + "learning_rate": 1.8164265557700878e-06, + "loss": 0.0505, "step": 55100 }, { - "epoch": 4.092529333135304, - "grad_norm": 1.066699743270874, - "learning_rate": 5.444824001188178e-06, - "loss": 0.0658, + "epoch": 8.185058666270608, + "grad_norm": 0.9864577054977417, + "learning_rate": 1.8149413337293929e-06, + "loss": 0.0687, "step": 55110 }, { - "epoch": 4.093271944155651, - "grad_norm": 0.8867554664611816, - "learning_rate": 5.440368335066093e-06, - "loss": 0.0909, + "epoch": 8.186543888311302, + "grad_norm": 0.9961680769920349, + "learning_rate": 1.8134561116886975e-06, + "loss": 0.0511, "step": 55120 }, { - "epoch": 4.094014555175999, - "grad_norm": 0.7380247116088867, - "learning_rate": 5.435912668944008e-06, - "loss": 0.0538, + "epoch": 8.188029110351998, + "grad_norm": 0.4023669958114624, + "learning_rate": 1.8119708896480026e-06, + "loss": 0.0399, "step": 55130 }, { - "epoch": 4.094757166196346, - "grad_norm": 1.3226486444473267, - "learning_rate": 5.431457002821922e-06, - "loss": 0.0688, + "epoch": 8.189514332392692, + "grad_norm": 0.8669695854187012, + "learning_rate": 1.8104856676073074e-06, + "loss": 0.0545, "step": 55140 }, { - "epoch": 4.095499777216694, - "grad_norm": 1.1807446479797363, - "learning_rate": 5.427001336699837e-06, - "loss": 0.0498, + "epoch": 8.190999554433388, + "grad_norm": 0.6370580792427063, + "learning_rate": 1.8090004455666125e-06, + "loss": 0.0474, "step": 55150 }, { - "epoch": 4.096242388237042, - "grad_norm": 1.0485824346542358, - "learning_rate": 5.422545670577751e-06, - "loss": 0.0652, + "epoch": 8.192484776474084, + "grad_norm": 0.6495532393455505, + "learning_rate": 1.8075152235259172e-06, + "loss": 0.0597, "step": 55160 }, { - "epoch": 4.096984999257389, - "grad_norm": 0.35923972725868225, - "learning_rate": 5.418090004455666e-06, - "loss": 0.0483, + "epoch": 8.193969998514778, + "grad_norm": 0.6302558779716492, + "learning_rate": 1.8060300014852222e-06, + "loss": 0.0511, "step": 55170 }, { - "epoch": 4.097727610277737, - "grad_norm": 0.8954094648361206, - "learning_rate": 5.413634338333581e-06, - "loss": 0.0722, + "epoch": 8.195455220555473, + "grad_norm": 0.9278355836868286, + "learning_rate": 1.804544779444527e-06, + "loss": 0.0486, "step": 55180 }, { - "epoch": 4.098470221298084, - "grad_norm": 2.760606288909912, - "learning_rate": 5.409178672211496e-06, - "loss": 0.0826, + "epoch": 8.196940442596167, + "grad_norm": 0.4719637334346771, + "learning_rate": 1.803059557403832e-06, + "loss": 0.0473, "step": 55190 }, { - "epoch": 4.099212832318432, - "grad_norm": 1.2169206142425537, - "learning_rate": 5.40472300608941e-06, - "loss": 0.043, + "epoch": 8.198425664636863, + "grad_norm": 1.1095895767211914, + "learning_rate": 1.8015743353631368e-06, + "loss": 0.0593, "step": 55200 }, { - "epoch": 4.0999554433387795, - "grad_norm": 0.5677967667579651, - "learning_rate": 5.400267339967325e-06, - "loss": 0.0508, + "epoch": 8.199910886677559, + "grad_norm": 0.5811156034469604, + "learning_rate": 1.8000891133224419e-06, + "loss": 0.039, "step": 55210 }, { - "epoch": 4.1006980543591265, - "grad_norm": 0.8861594200134277, - "learning_rate": 5.39581167384524e-06, - "loss": 0.0754, + "epoch": 8.201396108718253, + "grad_norm": 1.1787195205688477, + "learning_rate": 1.798603891281747e-06, + "loss": 0.055, "step": 55220 }, { - "epoch": 4.101440665379474, - "grad_norm": 1.1130675077438354, - "learning_rate": 5.391356007723154e-06, - "loss": 0.0487, + "epoch": 8.202881330758949, + "grad_norm": 0.4531806409358978, + "learning_rate": 1.7971186692410516e-06, + "loss": 0.049, "step": 55230 }, { - "epoch": 4.102183276399821, - "grad_norm": 1.4723432064056396, - "learning_rate": 5.38690034160107e-06, - "loss": 0.0622, + "epoch": 8.204366552799643, + "grad_norm": 0.6535776257514954, + "learning_rate": 1.7956334472003566e-06, + "loss": 0.0562, "step": 55240 }, { - "epoch": 4.102925887420169, - "grad_norm": 2.4365248680114746, - "learning_rate": 5.382444675478984e-06, - "loss": 0.0429, + "epoch": 8.205851774840339, + "grad_norm": 0.8606031537055969, + "learning_rate": 1.7941482251596615e-06, + "loss": 0.0706, "step": 55250 }, { - "epoch": 4.103668498440517, - "grad_norm": 2.329158067703247, - "learning_rate": 5.377989009356899e-06, - "loss": 0.0546, + "epoch": 8.207336996881034, + "grad_norm": 0.8530101776123047, + "learning_rate": 1.7926630031189666e-06, + "loss": 0.0366, "step": 55260 }, { - "epoch": 4.104411109460864, - "grad_norm": 0.6031007170677185, - "learning_rate": 5.3735333432348134e-06, - "loss": 0.0434, + "epoch": 8.208822218921728, + "grad_norm": 0.9330909848213196, + "learning_rate": 1.7911777810782712e-06, + "loss": 0.0609, "step": 55270 }, { - "epoch": 4.105153720481212, - "grad_norm": 1.8432621955871582, - "learning_rate": 5.3690776771127284e-06, - "loss": 0.0504, + "epoch": 8.210307440962424, + "grad_norm": 1.5024702548980713, + "learning_rate": 1.7896925590375763e-06, + "loss": 0.0607, "step": 55280 }, { - "epoch": 4.105896331501559, - "grad_norm": 4.149932384490967, - "learning_rate": 5.364622010990643e-06, - "loss": 0.0487, + "epoch": 8.211792663003118, + "grad_norm": 0.6589770317077637, + "learning_rate": 1.7882073369968811e-06, + "loss": 0.0546, "step": 55290 }, { - "epoch": 4.106638942521907, - "grad_norm": 0.8511655926704407, - "learning_rate": 5.360166344868558e-06, - "loss": 0.0491, + "epoch": 8.213277885043814, + "grad_norm": 1.931390643119812, + "learning_rate": 1.7867221149561862e-06, + "loss": 0.0496, "step": 55300 }, { - "epoch": 4.107381553542255, - "grad_norm": 0.9837514162063599, - "learning_rate": 5.3557106787464726e-06, - "loss": 0.0698, + "epoch": 8.21476310708451, + "grad_norm": 0.75163334608078, + "learning_rate": 1.7852368929154909e-06, + "loss": 0.0659, "step": 55310 }, { - "epoch": 4.108124164562602, - "grad_norm": 3.7656993865966797, - "learning_rate": 5.3512550126243876e-06, - "loss": 0.0776, + "epoch": 8.216248329125204, + "grad_norm": 0.9942273497581482, + "learning_rate": 1.783751670874796e-06, + "loss": 0.0528, "step": 55320 }, { - "epoch": 4.10886677558295, - "grad_norm": 3.2895989418029785, - "learning_rate": 5.346799346502302e-06, - "loss": 0.0607, + "epoch": 8.2177335511659, + "grad_norm": 0.8498992323875427, + "learning_rate": 1.7822664488341008e-06, + "loss": 0.0437, "step": 55330 }, { - "epoch": 4.109609386603297, - "grad_norm": 1.3208965063095093, - "learning_rate": 5.342343680380217e-06, - "loss": 0.0559, + "epoch": 8.219218773206594, + "grad_norm": 1.12864089012146, + "learning_rate": 1.7807812267934059e-06, + "loss": 0.0459, "step": 55340 }, { - "epoch": 4.110351997623645, - "grad_norm": 0.915107250213623, - "learning_rate": 5.337888014258132e-06, - "loss": 0.0958, + "epoch": 8.22070399524729, + "grad_norm": 0.8504659533500671, + "learning_rate": 1.7792960047527107e-06, + "loss": 0.0767, "step": 55350 }, { - "epoch": 4.111094608643993, - "grad_norm": 3.8144876956939697, - "learning_rate": 5.333432348136047e-06, - "loss": 0.0531, + "epoch": 8.222189217287985, + "grad_norm": 0.7916573882102966, + "learning_rate": 1.7778107827120156e-06, + "loss": 0.0492, "step": 55360 }, { - "epoch": 4.11183721966434, - "grad_norm": 0.789252519607544, - "learning_rate": 5.328976682013962e-06, - "loss": 0.0589, + "epoch": 8.22367443932868, + "grad_norm": 1.0378488302230835, + "learning_rate": 1.7763255606713206e-06, + "loss": 0.0788, "step": 55370 }, { - "epoch": 4.1125798306846875, - "grad_norm": 0.5822809934616089, - "learning_rate": 5.324521015891876e-06, - "loss": 0.0394, + "epoch": 8.225159661369375, + "grad_norm": 0.593513011932373, + "learning_rate": 1.7748403386306253e-06, + "loss": 0.0596, "step": 55380 }, { - "epoch": 4.1133224417050345, - "grad_norm": 1.0521644353866577, - "learning_rate": 5.320065349769791e-06, - "loss": 0.0813, + "epoch": 8.226644883410069, + "grad_norm": 1.3286253213882446, + "learning_rate": 1.7733551165899303e-06, + "loss": 0.0503, "step": 55390 }, { - "epoch": 4.114065052725382, - "grad_norm": 3.525383710861206, - "learning_rate": 5.315609683647705e-06, - "loss": 0.0696, + "epoch": 8.228130105450765, + "grad_norm": 0.4538455009460449, + "learning_rate": 1.7718698945492352e-06, + "loss": 0.0629, "step": 55400 }, { - "epoch": 4.11480766374573, - "grad_norm": 2.288709878921509, - "learning_rate": 5.311154017525621e-06, - "loss": 0.0517, + "epoch": 8.22961532749146, + "grad_norm": 1.29067063331604, + "learning_rate": 1.7703846725085403e-06, + "loss": 0.073, "step": 55410 }, { - "epoch": 4.115550274766077, - "grad_norm": 1.1933070421218872, - "learning_rate": 5.306698351403535e-06, - "loss": 0.0642, + "epoch": 8.231100549532155, + "grad_norm": 0.9755465388298035, + "learning_rate": 1.768899450467845e-06, + "loss": 0.054, "step": 55420 }, { - "epoch": 4.116292885786425, - "grad_norm": 0.5231961011886597, - "learning_rate": 5.30224268528145e-06, - "loss": 0.0373, + "epoch": 8.23258577157285, + "grad_norm": 1.3098180294036865, + "learning_rate": 1.76741422842715e-06, + "loss": 0.0396, "step": 55430 }, { - "epoch": 4.117035496806773, - "grad_norm": 2.135310173034668, - "learning_rate": 5.297787019159364e-06, - "loss": 0.0701, + "epoch": 8.234070993613546, + "grad_norm": 1.1818701028823853, + "learning_rate": 1.7659290063864548e-06, + "loss": 0.0721, "step": 55440 }, { - "epoch": 4.11777810782712, - "grad_norm": 1.2622560262680054, - "learning_rate": 5.293331353037279e-06, - "loss": 0.0359, + "epoch": 8.23555621565424, + "grad_norm": 1.2597570419311523, + "learning_rate": 1.76444378434576e-06, + "loss": 0.0554, "step": 55450 }, { - "epoch": 4.118520718847468, - "grad_norm": 0.9313594102859497, - "learning_rate": 5.288875686915193e-06, - "loss": 0.0639, + "epoch": 8.237041437694936, + "grad_norm": 0.5721325874328613, + "learning_rate": 1.7629585623050646e-06, + "loss": 0.0576, "step": 55460 }, { - "epoch": 4.119263329867815, - "grad_norm": 1.2040377855300903, - "learning_rate": 5.284420020793109e-06, - "loss": 0.0436, + "epoch": 8.23852665973563, + "grad_norm": 0.9122353196144104, + "learning_rate": 1.7614733402643696e-06, + "loss": 0.0557, "step": 55470 }, { - "epoch": 4.120005940888163, - "grad_norm": 1.6361944675445557, - "learning_rate": 5.279964354671024e-06, - "loss": 0.045, + "epoch": 8.240011881776326, + "grad_norm": 0.48903775215148926, + "learning_rate": 1.7599881182236747e-06, + "loss": 0.0766, "step": 55480 }, { - "epoch": 4.120748551908511, - "grad_norm": 0.6171565651893616, - "learning_rate": 5.275508688548938e-06, - "loss": 0.0487, + "epoch": 8.241497103817021, + "grad_norm": 0.7196714282035828, + "learning_rate": 1.7585028961829796e-06, + "loss": 0.056, "step": 55490 }, { - "epoch": 4.121491162928858, - "grad_norm": 1.9398356676101685, - "learning_rate": 5.271053022426853e-06, - "loss": 0.0551, + "epoch": 8.242982325857716, + "grad_norm": 0.540144681930542, + "learning_rate": 1.7570176741422846e-06, + "loss": 0.0431, "step": 55500 }, { - "epoch": 4.122233773949206, - "grad_norm": 0.6700782775878906, - "learning_rate": 5.266597356304767e-06, - "loss": 0.0695, + "epoch": 8.244467547898411, + "grad_norm": 0.8926095366477966, + "learning_rate": 1.7555324521015893e-06, + "loss": 0.0587, "step": 55510 }, { - "epoch": 4.122976384969553, - "grad_norm": 1.5354483127593994, - "learning_rate": 5.262141690182682e-06, - "loss": 0.0606, + "epoch": 8.245952769939105, + "grad_norm": 0.639405369758606, + "learning_rate": 1.7540472300608943e-06, + "loss": 0.0561, "step": 55520 }, { - "epoch": 4.1237189959899005, - "grad_norm": 2.600698947906494, - "learning_rate": 5.257686024060597e-06, - "loss": 0.0734, + "epoch": 8.247437991979801, + "grad_norm": 0.5663328170776367, + "learning_rate": 1.7525620080201992e-06, + "loss": 0.0424, "step": 55530 }, { - "epoch": 4.124461607010248, - "grad_norm": 1.9559639692306519, - "learning_rate": 5.253230357938512e-06, - "loss": 0.0569, + "epoch": 8.248923214020497, + "grad_norm": 0.5691666603088379, + "learning_rate": 1.751076785979504e-06, + "loss": 0.059, "step": 55540 }, { - "epoch": 4.1252042180305954, - "grad_norm": 2.100451946258545, - "learning_rate": 5.2487746918164265e-06, - "loss": 0.0729, + "epoch": 8.250408436061191, + "grad_norm": 0.7660884857177734, + "learning_rate": 1.749591563938809e-06, + "loss": 0.0563, "step": 55550 }, { - "epoch": 4.125946829050943, - "grad_norm": 2.451205253601074, - "learning_rate": 5.2443190256943415e-06, - "loss": 0.0457, + "epoch": 8.251893658101887, + "grad_norm": 0.3192717730998993, + "learning_rate": 1.748106341898114e-06, + "loss": 0.0473, "step": 55560 }, { - "epoch": 4.12668944007129, - "grad_norm": 1.9854660034179688, - "learning_rate": 5.239863359572256e-06, - "loss": 0.0379, + "epoch": 8.25337888014258, + "grad_norm": 1.3229044675827026, + "learning_rate": 1.7466211198574186e-06, + "loss": 0.0577, "step": 55570 }, { - "epoch": 4.127432051091638, - "grad_norm": 1.1762291193008423, - "learning_rate": 5.235407693450171e-06, - "loss": 0.0425, + "epoch": 8.254864102183276, + "grad_norm": 0.6860600709915161, + "learning_rate": 1.7451358978167237e-06, + "loss": 0.0531, "step": 55580 }, { - "epoch": 4.128174662111986, - "grad_norm": 1.3475638628005981, - "learning_rate": 5.230952027328086e-06, - "loss": 0.0654, + "epoch": 8.256349324223972, + "grad_norm": 0.7781535387039185, + "learning_rate": 1.7436506757760285e-06, + "loss": 0.041, "step": 55590 }, { - "epoch": 4.128917273132333, - "grad_norm": 1.0399333238601685, - "learning_rate": 5.226496361206001e-06, - "loss": 0.0595, + "epoch": 8.257834546264666, + "grad_norm": 0.48318976163864136, + "learning_rate": 1.7421654537353336e-06, + "loss": 0.0456, "step": 55600 }, { - "epoch": 4.129659884152681, - "grad_norm": 0.3056696355342865, - "learning_rate": 5.222040695083916e-06, - "loss": 0.0424, + "epoch": 8.259319768305362, + "grad_norm": 0.8603309392929077, + "learning_rate": 1.7406802316946387e-06, + "loss": 0.0511, "step": 55610 }, { - "epoch": 4.130402495173028, - "grad_norm": 2.0627856254577637, - "learning_rate": 5.21758502896183e-06, - "loss": 0.0635, + "epoch": 8.260804990346056, + "grad_norm": 0.7285515666007996, + "learning_rate": 1.7391950096539433e-06, + "loss": 0.0522, "step": 55620 }, { - "epoch": 4.131145106193376, - "grad_norm": 2.32353138923645, - "learning_rate": 5.213129362839745e-06, - "loss": 0.0512, + "epoch": 8.262290212386752, + "grad_norm": 0.8494543433189392, + "learning_rate": 1.7377097876132484e-06, + "loss": 0.0366, "step": 55630 }, { - "epoch": 4.131887717213724, - "grad_norm": 1.2325079441070557, - "learning_rate": 5.208673696717659e-06, - "loss": 0.027, + "epoch": 8.263775434427448, + "grad_norm": 1.2635679244995117, + "learning_rate": 1.7362245655725533e-06, + "loss": 0.044, "step": 55640 }, { - "epoch": 4.132630328234071, - "grad_norm": 1.5056791305541992, - "learning_rate": 5.204218030595575e-06, - "loss": 0.0549, + "epoch": 8.265260656468142, + "grad_norm": 0.6081938147544861, + "learning_rate": 1.7347393435318583e-06, + "loss": 0.0546, "step": 55650 }, { - "epoch": 4.133372939254419, - "grad_norm": 2.7190983295440674, - "learning_rate": 5.199762364473489e-06, - "loss": 0.0631, + "epoch": 8.266745878508837, + "grad_norm": 0.6587321162223816, + "learning_rate": 1.733254121491163e-06, + "loss": 0.0521, "step": 55660 }, { - "epoch": 4.134115550274766, - "grad_norm": 1.026847243309021, - "learning_rate": 5.195306698351404e-06, - "loss": 0.0541, + "epoch": 8.268231100549531, + "grad_norm": 0.6453005075454712, + "learning_rate": 1.731768899450468e-06, + "loss": 0.0547, "step": 55670 }, { - "epoch": 4.134858161295114, - "grad_norm": 1.6611924171447754, - "learning_rate": 5.190851032229318e-06, - "loss": 0.0663, + "epoch": 8.269716322590227, + "grad_norm": 1.6176621913909912, + "learning_rate": 1.7302836774097729e-06, + "loss": 0.069, "step": 55680 }, { - "epoch": 4.1356007723154615, - "grad_norm": 0.3207695186138153, - "learning_rate": 5.186395366107233e-06, - "loss": 0.0528, + "epoch": 8.271201544630923, + "grad_norm": 1.0043728351593018, + "learning_rate": 1.728798455369078e-06, + "loss": 0.0437, "step": 55690 }, { - "epoch": 4.1363433833358085, - "grad_norm": 1.145107388496399, - "learning_rate": 5.181939699985148e-06, - "loss": 0.0617, + "epoch": 8.272686766671617, + "grad_norm": 1.269233226776123, + "learning_rate": 1.7273132333283826e-06, + "loss": 0.0562, "step": 55700 }, { - "epoch": 4.137085994356156, - "grad_norm": 2.4018893241882324, - "learning_rate": 5.177484033863063e-06, - "loss": 0.092, + "epoch": 8.274171988712313, + "grad_norm": 0.8003224730491638, + "learning_rate": 1.7258280112876877e-06, + "loss": 0.0646, "step": 55710 }, { - "epoch": 4.137828605376503, - "grad_norm": 0.642469048500061, - "learning_rate": 5.173028367740977e-06, - "loss": 0.0801, + "epoch": 8.275657210753007, + "grad_norm": 0.9215367436408997, + "learning_rate": 1.7243427892469925e-06, + "loss": 0.0642, "step": 55720 }, { - "epoch": 4.138571216396851, - "grad_norm": 0.8119722008705139, - "learning_rate": 5.168572701618892e-06, - "loss": 0.0502, + "epoch": 8.277142432793703, + "grad_norm": 0.4948424696922302, + "learning_rate": 1.7228575672062974e-06, + "loss": 0.058, "step": 55730 }, { - "epoch": 4.139313827417199, - "grad_norm": 0.8802699446678162, - "learning_rate": 5.164117035496807e-06, - "loss": 0.058, + "epoch": 8.278627654834398, + "grad_norm": 1.7762740850448608, + "learning_rate": 1.7213723451656025e-06, + "loss": 0.0432, "step": 55740 }, { - "epoch": 4.140056438437546, - "grad_norm": 2.3792264461517334, - "learning_rate": 5.159661369374721e-06, - "loss": 0.0601, + "epoch": 8.280112876875092, + "grad_norm": 1.6269477605819702, + "learning_rate": 1.7198871231249073e-06, + "loss": 0.0699, "step": 55750 }, { - "epoch": 4.140799049457894, - "grad_norm": 0.80853670835495, - "learning_rate": 5.155205703252637e-06, - "loss": 0.0554, + "epoch": 8.281598098915788, + "grad_norm": 0.7696530222892761, + "learning_rate": 1.7184019010842124e-06, + "loss": 0.0647, "step": 55760 }, { - "epoch": 4.141541660478241, - "grad_norm": 1.0470110177993774, - "learning_rate": 5.150750037130551e-06, - "loss": 0.0569, + "epoch": 8.283083320956482, + "grad_norm": 0.37772563099861145, + "learning_rate": 1.716916679043517e-06, + "loss": 0.0564, "step": 55770 }, { - "epoch": 4.142284271498589, - "grad_norm": 3.2647767066955566, - "learning_rate": 5.146294371008466e-06, - "loss": 0.0529, + "epoch": 8.284568542997178, + "grad_norm": 0.7547775506973267, + "learning_rate": 1.715431457002822e-06, + "loss": 0.0755, "step": 55780 }, { - "epoch": 4.143026882518937, - "grad_norm": 1.5302366018295288, - "learning_rate": 5.1418387048863804e-06, - "loss": 0.0634, + "epoch": 8.286053765037874, + "grad_norm": 0.5472797751426697, + "learning_rate": 1.713946234962127e-06, + "loss": 0.0546, "step": 55790 }, { - "epoch": 4.143769493539284, - "grad_norm": 3.8410422801971436, - "learning_rate": 5.1373830387642954e-06, - "loss": 0.0607, + "epoch": 8.287538987078568, + "grad_norm": 1.0821386575698853, + "learning_rate": 1.712461012921432e-06, + "loss": 0.0702, "step": 55800 }, { - "epoch": 4.144512104559632, - "grad_norm": 1.0163367986679077, - "learning_rate": 5.1329273726422096e-06, - "loss": 0.0323, + "epoch": 8.289024209119264, + "grad_norm": 1.0084789991378784, + "learning_rate": 1.7109757908807367e-06, + "loss": 0.0675, "step": 55810 }, { - "epoch": 4.145254715579979, - "grad_norm": 1.0663138628005981, - "learning_rate": 5.128471706520125e-06, - "loss": 0.0599, + "epoch": 8.290509431159958, + "grad_norm": 1.4478343725204468, + "learning_rate": 1.7094905688400417e-06, + "loss": 0.058, "step": 55820 }, { - "epoch": 4.145997326600327, - "grad_norm": 1.1490668058395386, - "learning_rate": 5.1240160403980396e-06, - "loss": 0.0542, + "epoch": 8.291994653200653, + "grad_norm": 0.6397274732589722, + "learning_rate": 1.7080053467993466e-06, + "loss": 0.0507, "step": 55830 }, { - "epoch": 4.146739937620675, - "grad_norm": 2.020784616470337, - "learning_rate": 5.1195603742759546e-06, - "loss": 0.0645, + "epoch": 8.29347987524135, + "grad_norm": 0.4168252944946289, + "learning_rate": 1.7065201247586517e-06, + "loss": 0.0406, "step": 55840 }, { - "epoch": 4.147482548641022, - "grad_norm": 0.6550213098526001, - "learning_rate": 5.115104708153869e-06, - "loss": 0.0588, + "epoch": 8.294965097282043, + "grad_norm": 0.8522523641586304, + "learning_rate": 1.7050349027179563e-06, + "loss": 0.0347, "step": 55850 }, { - "epoch": 4.1482251596613695, - "grad_norm": 1.2832306623458862, - "learning_rate": 5.110649042031784e-06, - "loss": 0.0504, + "epoch": 8.296450319322739, + "grad_norm": 1.1035298109054565, + "learning_rate": 1.7035496806772614e-06, + "loss": 0.0654, "step": 55860 }, { - "epoch": 4.1489677706817165, - "grad_norm": 0.4903786778450012, - "learning_rate": 5.106193375909699e-06, - "loss": 0.0826, + "epoch": 8.297935541363433, + "grad_norm": 0.6701522469520569, + "learning_rate": 1.7020644586365664e-06, + "loss": 0.0434, "step": 55870 }, { - "epoch": 4.149710381702064, - "grad_norm": 4.058075904846191, - "learning_rate": 5.101737709787614e-06, - "loss": 0.0807, + "epoch": 8.299420763404129, + "grad_norm": 0.8941202759742737, + "learning_rate": 1.7005792365958713e-06, + "loss": 0.0644, "step": 55880 }, { - "epoch": 4.150452992722412, - "grad_norm": 0.9241071939468384, - "learning_rate": 5.097282043665529e-06, - "loss": 0.0651, + "epoch": 8.300905985444825, + "grad_norm": 1.1706655025482178, + "learning_rate": 1.6990940145551762e-06, + "loss": 0.075, "step": 55890 }, { - "epoch": 4.151195603742759, - "grad_norm": 0.6750563979148865, - "learning_rate": 5.092826377543443e-06, - "loss": 0.049, + "epoch": 8.302391207485519, + "grad_norm": 0.5262462496757507, + "learning_rate": 1.697608792514481e-06, + "loss": 0.0556, "step": 55900 }, { - "epoch": 4.151938214763107, - "grad_norm": 2.1607372760772705, - "learning_rate": 5.088370711421358e-06, - "loss": 0.092, + "epoch": 8.303876429526214, + "grad_norm": 0.9723024368286133, + "learning_rate": 1.696123570473786e-06, + "loss": 0.0491, "step": 55910 }, { - "epoch": 4.152680825783454, - "grad_norm": 0.9965745210647583, - "learning_rate": 5.083915045299272e-06, - "loss": 0.0498, + "epoch": 8.305361651566908, + "grad_norm": 0.891448438167572, + "learning_rate": 1.6946383484330907e-06, + "loss": 0.0596, "step": 55920 }, { - "epoch": 4.153423436803802, - "grad_norm": 2.7033541202545166, - "learning_rate": 5.079459379177187e-06, - "loss": 0.0601, + "epoch": 8.306846873607604, + "grad_norm": 0.42021748423576355, + "learning_rate": 1.6931531263923958e-06, + "loss": 0.0434, "step": 55930 }, { - "epoch": 4.15416604782415, - "grad_norm": 0.4900580644607544, - "learning_rate": 5.075003713055102e-06, - "loss": 0.0497, + "epoch": 8.3083320956483, + "grad_norm": 0.6452431678771973, + "learning_rate": 1.6916679043517007e-06, + "loss": 0.0706, "step": 55940 }, { - "epoch": 4.154908658844497, - "grad_norm": 2.328214406967163, - "learning_rate": 5.070548046933017e-06, - "loss": 0.047, + "epoch": 8.309817317688994, + "grad_norm": 1.2964681386947632, + "learning_rate": 1.6901826823110057e-06, + "loss": 0.0352, "step": 55950 }, { - "epoch": 4.155651269864845, - "grad_norm": 1.3433852195739746, - "learning_rate": 5.066092380810931e-06, - "loss": 0.0311, + "epoch": 8.31130253972969, + "grad_norm": 1.099612832069397, + "learning_rate": 1.6886974602703104e-06, + "loss": 0.064, "step": 55960 }, { - "epoch": 4.156393880885192, - "grad_norm": 2.1777939796447754, - "learning_rate": 5.061636714688846e-06, - "loss": 0.0605, + "epoch": 8.312787761770384, + "grad_norm": 0.3394363522529602, + "learning_rate": 1.6872122382296154e-06, + "loss": 0.0485, "step": 55970 }, { - "epoch": 4.15713649190554, - "grad_norm": 0.2275165617465973, - "learning_rate": 5.05718104856676e-06, - "loss": 0.047, + "epoch": 8.31427298381108, + "grad_norm": 0.8450482487678528, + "learning_rate": 1.6857270161889203e-06, + "loss": 0.0496, "step": 55980 }, { - "epoch": 4.157879102925888, - "grad_norm": 0.16543416678905487, - "learning_rate": 5.052725382444675e-06, - "loss": 0.0714, + "epoch": 8.315758205851775, + "grad_norm": 0.8660183548927307, + "learning_rate": 1.6842417941482254e-06, + "loss": 0.0617, "step": 55990 }, { - "epoch": 4.158621713946235, - "grad_norm": 1.2253612279891968, - "learning_rate": 5.048269716322591e-06, - "loss": 0.0605, + "epoch": 8.31724342789247, + "grad_norm": 0.7594779133796692, + "learning_rate": 1.6827565721075304e-06, + "loss": 0.0501, "step": 56000 }, { - "epoch": 4.159364324966583, - "grad_norm": 2.219952344894409, - "learning_rate": 5.043814050200505e-06, - "loss": 0.0681, + "epoch": 8.318728649933165, + "grad_norm": 1.148614525794983, + "learning_rate": 1.681271350066835e-06, + "loss": 0.0708, "step": 56010 }, { - "epoch": 4.16010693598693, - "grad_norm": 1.6367051601409912, - "learning_rate": 5.03935838407842e-06, - "loss": 0.038, + "epoch": 8.32021387197386, + "grad_norm": 0.5654726624488831, + "learning_rate": 1.6797861280261401e-06, + "loss": 0.0454, "step": 56020 }, { - "epoch": 4.1608495470072775, - "grad_norm": 1.207080602645874, - "learning_rate": 5.034902717956334e-06, - "loss": 0.0787, + "epoch": 8.321699094014555, + "grad_norm": 1.0416905879974365, + "learning_rate": 1.678300905985445e-06, + "loss": 0.0397, "step": 56030 }, { - "epoch": 4.161592158027625, - "grad_norm": 2.1484475135803223, - "learning_rate": 5.030447051834249e-06, - "loss": 0.0528, + "epoch": 8.32318431605525, + "grad_norm": 1.6517877578735352, + "learning_rate": 1.67681568394475e-06, + "loss": 0.0628, "step": 56040 }, { - "epoch": 4.162334769047972, - "grad_norm": 0.8749150633811951, - "learning_rate": 5.025991385712164e-06, - "loss": 0.0848, + "epoch": 8.324669538095945, + "grad_norm": 2.160710334777832, + "learning_rate": 1.6753304619040547e-06, + "loss": 0.049, "step": 56050 }, { - "epoch": 4.16307738006832, - "grad_norm": 0.868646502494812, - "learning_rate": 5.021535719590079e-06, - "loss": 0.0662, + "epoch": 8.32615476013664, + "grad_norm": 0.4089074432849884, + "learning_rate": 1.6738452398633598e-06, + "loss": 0.048, "step": 56060 }, { - "epoch": 4.163819991088668, - "grad_norm": 0.94334876537323, - "learning_rate": 5.0170800534679935e-06, - "loss": 0.058, + "epoch": 8.327639982177336, + "grad_norm": 0.6569660305976868, + "learning_rate": 1.6723600178226646e-06, + "loss": 0.0579, "step": 56070 }, { - "epoch": 4.164562602109015, - "grad_norm": 0.9084467887878418, - "learning_rate": 5.0126243873459085e-06, - "loss": 0.0357, + "epoch": 8.32912520421803, + "grad_norm": 0.9492629766464233, + "learning_rate": 1.6708747957819695e-06, + "loss": 0.0647, "step": 56080 }, { - "epoch": 4.165305213129363, - "grad_norm": 1.5573078393936157, - "learning_rate": 5.008168721223823e-06, - "loss": 0.0886, + "epoch": 8.330610426258726, + "grad_norm": 0.38269171118736267, + "learning_rate": 1.6693895737412744e-06, + "loss": 0.0542, "step": 56090 }, { - "epoch": 4.16604782414971, - "grad_norm": 2.178457260131836, - "learning_rate": 5.003713055101738e-06, - "loss": 0.0442, + "epoch": 8.33209564829942, + "grad_norm": 1.2882914543151855, + "learning_rate": 1.6679043517005794e-06, + "loss": 0.0472, "step": 56100 }, { - "epoch": 4.166790435170058, - "grad_norm": 1.7466106414794922, - "learning_rate": 4.999257388979653e-06, - "loss": 0.051, + "epoch": 8.333580870340116, + "grad_norm": 0.41408294439315796, + "learning_rate": 1.666419129659884e-06, + "loss": 0.0406, "step": 56110 }, { - "epoch": 4.167533046190406, - "grad_norm": 1.0477418899536133, - "learning_rate": 4.994801722857568e-06, - "loss": 0.0392, + "epoch": 8.335066092380812, + "grad_norm": 1.3629785776138306, + "learning_rate": 1.6649339076191891e-06, + "loss": 0.083, "step": 56120 }, { - "epoch": 4.168275657210753, - "grad_norm": 2.2968533039093018, - "learning_rate": 4.990346056735482e-06, - "loss": 0.0433, + "epoch": 8.336551314421506, + "grad_norm": 0.6229886412620544, + "learning_rate": 1.663448685578494e-06, + "loss": 0.0517, "step": 56130 }, { - "epoch": 4.169018268231101, - "grad_norm": 0.44317713379859924, - "learning_rate": 4.985890390613397e-06, - "loss": 0.0537, + "epoch": 8.338036536462202, + "grad_norm": 0.6783386468887329, + "learning_rate": 1.661963463537799e-06, + "loss": 0.0568, "step": 56140 }, { - "epoch": 4.169760879251448, - "grad_norm": 1.9365060329437256, - "learning_rate": 4.981434724491312e-06, - "loss": 0.0842, + "epoch": 8.339521758502896, + "grad_norm": 0.3790055513381958, + "learning_rate": 1.6604782414971041e-06, + "loss": 0.061, "step": 56150 }, { - "epoch": 4.170503490271796, - "grad_norm": 0.4268725514411926, - "learning_rate": 4.976979058369226e-06, - "loss": 0.064, + "epoch": 8.341006980543591, + "grad_norm": 1.6844587326049805, + "learning_rate": 1.6589930194564088e-06, + "loss": 0.0845, "step": 56160 }, { - "epoch": 4.171246101292144, - "grad_norm": 0.8040658235549927, - "learning_rate": 4.972523392247142e-06, - "loss": 0.0691, + "epoch": 8.342492202584287, + "grad_norm": 1.0809643268585205, + "learning_rate": 1.6575077974157138e-06, + "loss": 0.0368, "step": 56170 }, { - "epoch": 4.171988712312491, - "grad_norm": 1.0074480772018433, - "learning_rate": 4.968067726125056e-06, - "loss": 0.0687, + "epoch": 8.343977424624981, + "grad_norm": 1.7007912397384644, + "learning_rate": 1.6560225753750187e-06, + "loss": 0.0574, "step": 56180 }, { - "epoch": 4.1727313233328385, - "grad_norm": 0.6365683078765869, - "learning_rate": 4.963612060002971e-06, - "loss": 0.0461, + "epoch": 8.345462646665677, + "grad_norm": 2.82881760597229, + "learning_rate": 1.6545373533343238e-06, + "loss": 0.0514, "step": 56190 }, { - "epoch": 4.1734739343531855, - "grad_norm": 2.264573812484741, - "learning_rate": 4.959156393880885e-06, - "loss": 0.0618, + "epoch": 8.346947868706371, + "grad_norm": 2.6078879833221436, + "learning_rate": 1.6530521312936284e-06, + "loss": 0.0427, "step": 56200 }, { - "epoch": 4.174216545373533, - "grad_norm": 1.5489435195922852, - "learning_rate": 4.9547007277588e-06, - "loss": 0.0888, + "epoch": 8.348433090747067, + "grad_norm": 0.698557436466217, + "learning_rate": 1.6515669092529335e-06, + "loss": 0.0621, "step": 56210 }, { - "epoch": 4.174959156393881, - "grad_norm": 0.8037591576576233, - "learning_rate": 4.950245061636714e-06, - "loss": 0.0793, + "epoch": 8.349918312787763, + "grad_norm": 1.1243953704833984, + "learning_rate": 1.6500816872122383e-06, + "loss": 0.0439, "step": 56220 }, { - "epoch": 4.175701767414228, - "grad_norm": 1.1610863208770752, - "learning_rate": 4.94578939551463e-06, - "loss": 0.0623, + "epoch": 8.351403534828457, + "grad_norm": 0.25133031606674194, + "learning_rate": 1.6485964651715434e-06, + "loss": 0.032, "step": 56230 }, { - "epoch": 4.176444378434576, - "grad_norm": 1.3831819295883179, - "learning_rate": 4.941333729392544e-06, - "loss": 0.059, + "epoch": 8.352888756869152, + "grad_norm": 1.685579776763916, + "learning_rate": 1.647111243130848e-06, + "loss": 0.0684, "step": 56240 }, { - "epoch": 4.177186989454923, - "grad_norm": 0.5763561725616455, - "learning_rate": 4.936878063270459e-06, - "loss": 0.0341, + "epoch": 8.354373978909846, + "grad_norm": 1.0357993841171265, + "learning_rate": 1.6456260210901531e-06, + "loss": 0.0545, "step": 56250 }, { - "epoch": 4.177929600475271, - "grad_norm": 1.123581051826477, - "learning_rate": 4.932422397148373e-06, - "loss": 0.0658, + "epoch": 8.355859200950542, + "grad_norm": 1.7972323894500732, + "learning_rate": 1.644140799049458e-06, + "loss": 0.0631, "step": 56260 }, { - "epoch": 4.178672211495619, - "grad_norm": 1.3264278173446655, - "learning_rate": 4.927966731026288e-06, - "loss": 0.0623, + "epoch": 8.357344422991238, + "grad_norm": 1.4118080139160156, + "learning_rate": 1.6426555770087628e-06, + "loss": 0.0523, "step": 56270 }, { - "epoch": 4.179414822515966, - "grad_norm": 1.4704315662384033, - "learning_rate": 4.923511064904203e-06, - "loss": 0.0734, + "epoch": 8.358829645031932, + "grad_norm": 1.0385082960128784, + "learning_rate": 1.641170354968068e-06, + "loss": 0.0469, "step": 56280 }, { - "epoch": 4.180157433536314, - "grad_norm": 1.4368115663528442, - "learning_rate": 4.919055398782118e-06, - "loss": 0.0561, + "epoch": 8.360314867072628, + "grad_norm": 1.1347720623016357, + "learning_rate": 1.6396851329273728e-06, + "loss": 0.0495, "step": 56290 }, { - "epoch": 4.180900044556661, - "grad_norm": 0.8675275444984436, - "learning_rate": 4.914599732660033e-06, - "loss": 0.08, + "epoch": 8.361800089113322, + "grad_norm": 0.974901556968689, + "learning_rate": 1.6381999108866778e-06, + "loss": 0.0637, "step": 56300 }, { - "epoch": 4.181642655577009, - "grad_norm": 1.2973276376724243, - "learning_rate": 4.9101440665379474e-06, - "loss": 0.0725, + "epoch": 8.363285311154018, + "grad_norm": 0.9575043320655823, + "learning_rate": 1.6367146888459825e-06, + "loss": 0.0521, "step": 56310 }, { - "epoch": 4.182385266597357, - "grad_norm": 0.6267523169517517, - "learning_rate": 4.905688400415862e-06, - "loss": 0.0461, + "epoch": 8.364770533194713, + "grad_norm": 1.842950701713562, + "learning_rate": 1.6352294668052875e-06, + "loss": 0.0655, "step": 56320 }, { - "epoch": 4.183127877617704, - "grad_norm": 2.129601001739502, - "learning_rate": 4.9012327342937766e-06, - "loss": 0.0556, + "epoch": 8.366255755235407, + "grad_norm": 0.8532291054725647, + "learning_rate": 1.6337442447645924e-06, + "loss": 0.0523, "step": 56330 }, { - "epoch": 4.1838704886380516, - "grad_norm": 2.3097314834594727, - "learning_rate": 4.8967770681716916e-06, - "loss": 0.0503, + "epoch": 8.367740977276103, + "grad_norm": 0.7238110899925232, + "learning_rate": 1.6322590227238975e-06, + "loss": 0.0535, "step": 56340 }, { - "epoch": 4.184613099658399, - "grad_norm": 1.3276792764663696, - "learning_rate": 4.8923214020496066e-06, - "loss": 0.076, + "epoch": 8.369226199316797, + "grad_norm": 0.788982093334198, + "learning_rate": 1.6307738006832021e-06, + "loss": 0.0579, "step": 56350 }, { - "epoch": 4.1853557106787465, - "grad_norm": 0.35334932804107666, - "learning_rate": 4.8878657359275216e-06, - "loss": 0.0596, + "epoch": 8.370711421357493, + "grad_norm": 0.8190876245498657, + "learning_rate": 1.6292885786425072e-06, + "loss": 0.065, "step": 56360 }, { - "epoch": 4.186098321699094, - "grad_norm": 2.6858012676239014, - "learning_rate": 4.883410069805436e-06, - "loss": 0.0484, + "epoch": 8.372196643398189, + "grad_norm": 1.094773292541504, + "learning_rate": 1.627803356601812e-06, + "loss": 0.0561, "step": 56370 }, { - "epoch": 4.186840932719441, - "grad_norm": 2.2050089836120605, - "learning_rate": 4.878954403683351e-06, - "loss": 0.067, + "epoch": 8.373681865438883, + "grad_norm": 0.4097306430339813, + "learning_rate": 1.6263181345611171e-06, + "loss": 0.0474, "step": 56380 }, { - "epoch": 4.187583543739789, - "grad_norm": 0.7625178694725037, - "learning_rate": 4.874498737561265e-06, - "loss": 0.0654, + "epoch": 8.375167087479578, + "grad_norm": 0.6835191249847412, + "learning_rate": 1.6248329125204218e-06, + "loss": 0.0723, "step": 56390 }, { - "epoch": 4.188326154760136, - "grad_norm": 1.7497589588165283, - "learning_rate": 4.870043071439181e-06, - "loss": 0.0781, + "epoch": 8.376652309520273, + "grad_norm": 0.9203733205795288, + "learning_rate": 1.6233476904797268e-06, + "loss": 0.0501, "step": 56400 }, { - "epoch": 4.189068765780484, - "grad_norm": 0.6290327906608582, - "learning_rate": 4.865587405317096e-06, - "loss": 0.045, + "epoch": 8.378137531560968, + "grad_norm": 1.1533256769180298, + "learning_rate": 1.6218624684390319e-06, + "loss": 0.0488, "step": 56410 }, { - "epoch": 4.189811376800832, - "grad_norm": 2.0656166076660156, - "learning_rate": 4.86113173919501e-06, - "loss": 0.068, + "epoch": 8.379622753601664, + "grad_norm": 1.44968843460083, + "learning_rate": 1.6203772463983367e-06, + "loss": 0.0511, "step": 56420 }, { - "epoch": 4.190553987821179, - "grad_norm": 1.3763740062713623, - "learning_rate": 4.856676073072925e-06, - "loss": 0.0746, + "epoch": 8.381107975642358, + "grad_norm": 0.7023779153823853, + "learning_rate": 1.6188920243576416e-06, + "loss": 0.0582, "step": 56430 }, { - "epoch": 4.191296598841527, - "grad_norm": 1.222730278968811, - "learning_rate": 4.852220406950839e-06, - "loss": 0.0773, + "epoch": 8.382593197683054, + "grad_norm": 1.1704336404800415, + "learning_rate": 1.6174068023169465e-06, + "loss": 0.0632, "step": 56440 }, { - "epoch": 4.192039209861874, - "grad_norm": 0.5212209820747375, - "learning_rate": 4.847764740828754e-06, - "loss": 0.0392, + "epoch": 8.384078419723748, + "grad_norm": 0.7215237617492676, + "learning_rate": 1.6159215802762515e-06, + "loss": 0.0427, "step": 56450 }, { - "epoch": 4.192781820882222, - "grad_norm": 0.5401204824447632, - "learning_rate": 4.843309074706669e-06, - "loss": 0.0801, + "epoch": 8.385563641764444, + "grad_norm": 0.4235459864139557, + "learning_rate": 1.6144363582355562e-06, + "loss": 0.0477, "step": 56460 }, { - "epoch": 4.19352443190257, - "grad_norm": 0.9797640442848206, - "learning_rate": 4.838853408584584e-06, - "loss": 0.0541, + "epoch": 8.38704886380514, + "grad_norm": 0.7092357277870178, + "learning_rate": 1.6129511361948612e-06, + "loss": 0.0368, "step": 56470 }, { - "epoch": 4.194267042922917, - "grad_norm": 0.41546717286109924, - "learning_rate": 4.834397742462498e-06, - "loss": 0.0495, + "epoch": 8.388534085845833, + "grad_norm": 1.170106291770935, + "learning_rate": 1.611465914154166e-06, + "loss": 0.06, "step": 56480 }, { - "epoch": 4.195009653943265, - "grad_norm": 1.290562391281128, - "learning_rate": 4.829942076340413e-06, - "loss": 0.0371, + "epoch": 8.39001930788653, + "grad_norm": 0.5858461260795593, + "learning_rate": 1.6099806921134712e-06, + "loss": 0.0452, "step": 56490 }, { - "epoch": 4.195752264963612, - "grad_norm": 0.5906802415847778, - "learning_rate": 4.825486410218327e-06, - "loss": 0.064, + "epoch": 8.391504529927223, + "grad_norm": 0.7332956790924072, + "learning_rate": 1.6084954700727758e-06, + "loss": 0.0569, "step": 56500 }, { - "epoch": 4.1964948759839595, - "grad_norm": 1.46896493434906, - "learning_rate": 4.821030744096242e-06, - "loss": 0.0478, + "epoch": 8.392989751967919, + "grad_norm": 1.0417643785476685, + "learning_rate": 1.6070102480320809e-06, + "loss": 0.0525, "step": 56510 }, { - "epoch": 4.197237487004307, - "grad_norm": 1.2562772035598755, - "learning_rate": 4.816575077974157e-06, - "loss": 0.0531, + "epoch": 8.394474974008615, + "grad_norm": 0.4032362103462219, + "learning_rate": 1.6055250259913857e-06, + "loss": 0.0394, "step": 56520 }, { - "epoch": 4.197980098024654, - "grad_norm": 1.5765583515167236, - "learning_rate": 4.812119411852072e-06, - "loss": 0.0586, + "epoch": 8.395960196049309, + "grad_norm": 0.6877124309539795, + "learning_rate": 1.6040398039506908e-06, + "loss": 0.0587, "step": 56530 }, { - "epoch": 4.198722709045002, - "grad_norm": 1.9203531742095947, - "learning_rate": 4.807663745729987e-06, - "loss": 0.0759, + "epoch": 8.397445418090005, + "grad_norm": 1.4561941623687744, + "learning_rate": 1.6025545819099959e-06, + "loss": 0.0642, "step": 56540 }, { - "epoch": 4.19946532006535, - "grad_norm": 0.2275390923023224, - "learning_rate": 4.803208079607901e-06, - "loss": 0.0347, + "epoch": 8.3989306401307, + "grad_norm": 1.2329953908920288, + "learning_rate": 1.6010693598693005e-06, + "loss": 0.0599, "step": 56550 }, { - "epoch": 4.200207931085697, - "grad_norm": 0.8825653791427612, - "learning_rate": 4.798752413485816e-06, - "loss": 0.0288, + "epoch": 8.400415862171394, + "grad_norm": 1.5175464153289795, + "learning_rate": 1.5995841378286056e-06, + "loss": 0.0496, "step": 56560 }, { - "epoch": 4.200950542106045, - "grad_norm": 0.9257974624633789, - "learning_rate": 4.7942967473637305e-06, - "loss": 0.0421, + "epoch": 8.40190108421209, + "grad_norm": 1.3104963302612305, + "learning_rate": 1.5980989157879104e-06, + "loss": 0.0663, "step": 56570 }, { - "epoch": 4.201693153126392, - "grad_norm": 0.9080449938774109, - "learning_rate": 4.789841081241646e-06, - "loss": 0.0569, + "epoch": 8.403386306252784, + "grad_norm": 0.6927348971366882, + "learning_rate": 1.5966136937472155e-06, + "loss": 0.0593, "step": 56580 }, { - "epoch": 4.20243576414674, - "grad_norm": 0.7005447149276733, - "learning_rate": 4.7853854151195605e-06, - "loss": 0.1045, + "epoch": 8.40487152829348, + "grad_norm": 0.9610560536384583, + "learning_rate": 1.5951284717065202e-06, + "loss": 0.0584, "step": 56590 }, { - "epoch": 4.203178375167088, - "grad_norm": 0.9929253458976746, - "learning_rate": 4.7809297489974755e-06, - "loss": 0.053, + "epoch": 8.406356750334176, + "grad_norm": 0.5008347630500793, + "learning_rate": 1.5936432496658252e-06, + "loss": 0.0458, "step": 56600 }, { - "epoch": 4.203920986187435, - "grad_norm": 2.272272825241089, - "learning_rate": 4.77647408287539e-06, - "loss": 0.076, + "epoch": 8.40784197237487, + "grad_norm": 0.6039450764656067, + "learning_rate": 1.59215802762513e-06, + "loss": 0.0431, "step": 56610 }, { - "epoch": 4.204663597207783, - "grad_norm": 1.6657465696334839, - "learning_rate": 4.772018416753305e-06, - "loss": 0.0705, + "epoch": 8.409327194415566, + "grad_norm": 1.2651342153549194, + "learning_rate": 1.590672805584435e-06, + "loss": 0.0573, "step": 56620 }, { - "epoch": 4.20540620822813, - "grad_norm": 4.0360589027404785, - "learning_rate": 4.767562750631219e-06, - "loss": 0.0637, + "epoch": 8.41081241645626, + "grad_norm": 0.8857653737068176, + "learning_rate": 1.5891875835437398e-06, + "loss": 0.0553, "step": 56630 }, { - "epoch": 4.206148819248478, - "grad_norm": 1.8561382293701172, - "learning_rate": 4.763107084509135e-06, - "loss": 0.0368, + "epoch": 8.412297638496955, + "grad_norm": 0.8885740041732788, + "learning_rate": 1.5877023615030449e-06, + "loss": 0.0598, "step": 56640 }, { - "epoch": 4.206891430268826, - "grad_norm": 0.5723518133163452, - "learning_rate": 4.758651418387049e-06, - "loss": 0.0841, + "epoch": 8.413782860537651, + "grad_norm": 0.7766587138175964, + "learning_rate": 1.5862171394623495e-06, + "loss": 0.046, "step": 56650 }, { - "epoch": 4.207634041289173, - "grad_norm": 0.5894990563392639, - "learning_rate": 4.754195752264964e-06, - "loss": 0.0622, + "epoch": 8.415268082578345, + "grad_norm": 1.0298758745193481, + "learning_rate": 1.5847319174216546e-06, + "loss": 0.0372, "step": 56660 }, { - "epoch": 4.2083766523095205, - "grad_norm": 0.48831412196159363, - "learning_rate": 4.749740086142879e-06, - "loss": 0.1112, + "epoch": 8.416753304619041, + "grad_norm": 1.0436177253723145, + "learning_rate": 1.5832466953809597e-06, + "loss": 0.0554, "step": 56670 }, { - "epoch": 4.2091192633298675, - "grad_norm": 1.655131220817566, - "learning_rate": 4.745284420020793e-06, - "loss": 0.0583, + "epoch": 8.418238526659735, + "grad_norm": 0.6761430501937866, + "learning_rate": 1.5817614733402645e-06, + "loss": 0.0533, "step": 56680 }, { - "epoch": 4.209861874350215, - "grad_norm": 0.4988187849521637, - "learning_rate": 4.740828753898708e-06, - "loss": 0.0436, + "epoch": 8.41972374870043, + "grad_norm": 1.1792784929275513, + "learning_rate": 1.5802762512995696e-06, + "loss": 0.0554, "step": 56690 }, { - "epoch": 4.210604485370563, - "grad_norm": 6.94892692565918, - "learning_rate": 4.736373087776623e-06, - "loss": 0.0508, + "epoch": 8.421208970741127, + "grad_norm": 0.33128201961517334, + "learning_rate": 1.5787910292588742e-06, + "loss": 0.0416, "step": 56700 }, { - "epoch": 4.21134709639091, - "grad_norm": 0.6588563323020935, - "learning_rate": 4.731917421654538e-06, - "loss": 0.0307, + "epoch": 8.42269419278182, + "grad_norm": 0.54185551404953, + "learning_rate": 1.5773058072181793e-06, + "loss": 0.0586, "step": 56710 }, { - "epoch": 4.212089707411258, - "grad_norm": 1.032076358795166, - "learning_rate": 4.727461755532452e-06, - "loss": 0.0532, + "epoch": 8.424179414822516, + "grad_norm": 0.5396039485931396, + "learning_rate": 1.5758205851774841e-06, + "loss": 0.0536, "step": 56720 }, { - "epoch": 4.212832318431605, - "grad_norm": 0.7659674882888794, - "learning_rate": 4.723006089410367e-06, - "loss": 0.0798, + "epoch": 8.42566463686321, + "grad_norm": 1.5792487859725952, + "learning_rate": 1.5743353631367892e-06, + "loss": 0.0562, "step": 56730 }, { - "epoch": 4.213574929451953, - "grad_norm": 1.151066780090332, - "learning_rate": 4.718550423288281e-06, - "loss": 0.0603, + "epoch": 8.427149858903906, + "grad_norm": 1.8107473850250244, + "learning_rate": 1.5728501410960939e-06, + "loss": 0.0599, "step": 56740 }, { - "epoch": 4.214317540472301, - "grad_norm": 3.9358067512512207, - "learning_rate": 4.714094757166197e-06, - "loss": 0.0505, + "epoch": 8.428635080944602, + "grad_norm": 0.57285076379776, + "learning_rate": 1.571364919055399e-06, + "loss": 0.0394, "step": 56750 }, { - "epoch": 4.215060151492648, - "grad_norm": 0.6404879093170166, - "learning_rate": 4.709639091044111e-06, - "loss": 0.0577, + "epoch": 8.430120302985296, + "grad_norm": 0.35481494665145874, + "learning_rate": 1.5698796970147038e-06, + "loss": 0.0527, "step": 56760 }, { - "epoch": 4.215802762512996, - "grad_norm": 2.104989767074585, - "learning_rate": 4.705183424922026e-06, - "loss": 0.0566, + "epoch": 8.431605525025992, + "grad_norm": 1.0526456832885742, + "learning_rate": 1.5683944749740089e-06, + "loss": 0.0565, "step": 56770 }, { - "epoch": 4.216545373533343, - "grad_norm": 0.7882208228111267, - "learning_rate": 4.70072775879994e-06, - "loss": 0.058, + "epoch": 8.433090747066686, + "grad_norm": 1.1689788103103638, + "learning_rate": 1.5669092529333135e-06, + "loss": 0.0538, "step": 56780 }, { - "epoch": 4.217287984553691, - "grad_norm": 1.2608188390731812, - "learning_rate": 4.696272092677855e-06, - "loss": 0.0692, + "epoch": 8.434575969107382, + "grad_norm": 1.2657625675201416, + "learning_rate": 1.5654240308926186e-06, + "loss": 0.0546, "step": 56790 }, { - "epoch": 4.218030595574039, - "grad_norm": 1.273901343345642, - "learning_rate": 4.69181642655577e-06, - "loss": 0.067, + "epoch": 8.436061191148077, + "grad_norm": 0.6141216158866882, + "learning_rate": 1.5639388088519236e-06, + "loss": 0.0446, "step": 56800 }, { - "epoch": 4.218773206594386, - "grad_norm": 2.1798079013824463, - "learning_rate": 4.687360760433685e-06, - "loss": 0.0807, + "epoch": 8.437546413188771, + "grad_norm": 0.4884616732597351, + "learning_rate": 1.5624535868112283e-06, + "loss": 0.0537, "step": 56810 }, { - "epoch": 4.219515817614734, - "grad_norm": 1.1464588642120361, - "learning_rate": 4.6829050943116e-06, - "loss": 0.0356, + "epoch": 8.439031635229467, + "grad_norm": 1.2065073251724243, + "learning_rate": 1.5609683647705334e-06, + "loss": 0.0582, "step": 56820 }, { - "epoch": 4.220258428635081, - "grad_norm": 2.1322357654571533, - "learning_rate": 4.678449428189514e-06, - "loss": 0.0811, + "epoch": 8.440516857270161, + "grad_norm": 0.9194113612174988, + "learning_rate": 1.5594831427298382e-06, + "loss": 0.0549, "step": 56830 }, { - "epoch": 4.2210010396554285, - "grad_norm": 1.9702597856521606, - "learning_rate": 4.673993762067429e-06, - "loss": 0.0382, + "epoch": 8.442002079310857, + "grad_norm": 0.5705419182777405, + "learning_rate": 1.5579979206891433e-06, + "loss": 0.0528, "step": 56840 }, { - "epoch": 4.221743650675776, - "grad_norm": 1.0465569496154785, - "learning_rate": 4.6695380959453436e-06, - "loss": 0.0481, + "epoch": 8.443487301351553, + "grad_norm": 0.7145352363586426, + "learning_rate": 1.556512698648448e-06, + "loss": 0.0435, "step": 56850 }, { - "epoch": 4.222486261696123, - "grad_norm": 1.8960071802139282, - "learning_rate": 4.6650824298232586e-06, - "loss": 0.0823, + "epoch": 8.444972523392247, + "grad_norm": 1.5799872875213623, + "learning_rate": 1.555027476607753e-06, + "loss": 0.0694, "step": 56860 }, { - "epoch": 4.223228872716471, - "grad_norm": 1.5519205331802368, - "learning_rate": 4.6606267637011736e-06, - "loss": 0.0807, + "epoch": 8.446457745432943, + "grad_norm": 1.1314457654953003, + "learning_rate": 1.5535422545670579e-06, + "loss": 0.0475, "step": 56870 }, { - "epoch": 4.223971483736818, - "grad_norm": 2.4255125522613525, - "learning_rate": 4.6561710975790885e-06, - "loss": 0.0666, + "epoch": 8.447942967473637, + "grad_norm": 1.1472721099853516, + "learning_rate": 1.552057032526363e-06, + "loss": 0.0459, "step": 56880 }, { - "epoch": 4.224714094757166, - "grad_norm": 1.1080626249313354, - "learning_rate": 4.651715431457003e-06, - "loss": 0.081, + "epoch": 8.449428189514332, + "grad_norm": 0.9981784224510193, + "learning_rate": 1.5505718104856676e-06, + "loss": 0.0546, "step": 56890 }, { - "epoch": 4.225456705777514, - "grad_norm": 2.251464605331421, - "learning_rate": 4.647259765334918e-06, - "loss": 0.0743, + "epoch": 8.450913411555028, + "grad_norm": 1.7208770513534546, + "learning_rate": 1.5490865884449726e-06, + "loss": 0.0666, "step": 56900 }, { - "epoch": 4.226199316797861, - "grad_norm": 0.8593553900718689, - "learning_rate": 4.642804099212832e-06, - "loss": 0.0523, + "epoch": 8.452398633595722, + "grad_norm": 1.0905085802078247, + "learning_rate": 1.5476013664042775e-06, + "loss": 0.0598, "step": 56910 }, { - "epoch": 4.226941927818209, - "grad_norm": 0.543804407119751, - "learning_rate": 4.638348433090747e-06, - "loss": 0.0748, + "epoch": 8.453883855636418, + "grad_norm": 0.7236934304237366, + "learning_rate": 1.5461161443635826e-06, + "loss": 0.0615, "step": 56920 }, { - "epoch": 4.227684538838556, - "grad_norm": 2.7038323879241943, - "learning_rate": 4.633892766968663e-06, - "loss": 0.059, + "epoch": 8.455369077677112, + "grad_norm": 0.36417078971862793, + "learning_rate": 1.5446309223228876e-06, + "loss": 0.0572, "step": 56930 }, { - "epoch": 4.228427149858904, - "grad_norm": 3.215402364730835, - "learning_rate": 4.629437100846577e-06, - "loss": 0.0626, + "epoch": 8.456854299717808, + "grad_norm": 0.8872111439704895, + "learning_rate": 1.5431457002821923e-06, + "loss": 0.0505, "step": 56940 }, { - "epoch": 4.229169760879252, - "grad_norm": 0.7325242161750793, - "learning_rate": 4.624981434724492e-06, - "loss": 0.055, + "epoch": 8.458339521758504, + "grad_norm": 1.3937108516693115, + "learning_rate": 1.5416604782414973e-06, + "loss": 0.086, "step": 56950 }, { - "epoch": 4.229912371899599, - "grad_norm": 1.3402073383331299, - "learning_rate": 4.620525768602406e-06, - "loss": 0.0661, + "epoch": 8.459824743799198, + "grad_norm": 0.8593041896820068, + "learning_rate": 1.5401752562008022e-06, + "loss": 0.0507, "step": 56960 }, { - "epoch": 4.230654982919947, - "grad_norm": 2.4132492542266846, - "learning_rate": 4.616070102480321e-06, - "loss": 0.0618, + "epoch": 8.461309965839893, + "grad_norm": 1.7641494274139404, + "learning_rate": 1.538690034160107e-06, + "loss": 0.0508, "step": 56970 }, { - "epoch": 4.231397593940294, - "grad_norm": 2.361948251724243, - "learning_rate": 4.611614436358235e-06, - "loss": 0.029, + "epoch": 8.462795187880587, + "grad_norm": 0.31947317719459534, + "learning_rate": 1.537204812119412e-06, + "loss": 0.0379, "step": 56980 }, { - "epoch": 4.232140204960642, - "grad_norm": 1.8688277006149292, - "learning_rate": 4.607158770236151e-06, - "loss": 0.0675, + "epoch": 8.464280409921283, + "grad_norm": 1.2858604192733765, + "learning_rate": 1.535719590078717e-06, + "loss": 0.0535, "step": 56990 }, { - "epoch": 4.2328828159809895, - "grad_norm": 1.2990567684173584, - "learning_rate": 4.602703104114065e-06, - "loss": 0.0553, + "epoch": 8.465765631961979, + "grad_norm": 1.130893588066101, + "learning_rate": 1.5342343680380216e-06, + "loss": 0.062, "step": 57000 }, { - "epoch": 4.2336254270013365, - "grad_norm": 1.1148401498794556, - "learning_rate": 4.59824743799198e-06, - "loss": 0.0565, + "epoch": 8.467250854002673, + "grad_norm": 0.2929428517818451, + "learning_rate": 1.5327491459973267e-06, + "loss": 0.0638, "step": 57010 }, { - "epoch": 4.234368038021684, - "grad_norm": 1.900498867034912, - "learning_rate": 4.593791771869894e-06, - "loss": 0.0724, + "epoch": 8.468736076043369, + "grad_norm": 0.8168018460273743, + "learning_rate": 1.5312639239566316e-06, + "loss": 0.0658, "step": 57020 }, { - "epoch": 4.235110649042031, - "grad_norm": 1.3050068616867065, - "learning_rate": 4.589336105747809e-06, - "loss": 0.059, + "epoch": 8.470221298084063, + "grad_norm": 1.2483536005020142, + "learning_rate": 1.5297787019159366e-06, + "loss": 0.0662, "step": 57030 }, { - "epoch": 4.235853260062379, - "grad_norm": 1.2359297275543213, - "learning_rate": 4.584880439625723e-06, - "loss": 0.0581, + "epoch": 8.471706520124759, + "grad_norm": 1.250025987625122, + "learning_rate": 1.5282934798752413e-06, + "loss": 0.039, "step": 57040 }, { - "epoch": 4.236595871082727, - "grad_norm": 0.6790696382522583, - "learning_rate": 4.580424773503639e-06, - "loss": 0.0591, + "epoch": 8.473191742165454, + "grad_norm": 1.2861472368240356, + "learning_rate": 1.5268082578345463e-06, + "loss": 0.0725, "step": 57050 }, { - "epoch": 4.237338482103074, - "grad_norm": 1.258554220199585, - "learning_rate": 4.575969107381554e-06, - "loss": 0.0604, + "epoch": 8.474676964206148, + "grad_norm": 0.44773104786872864, + "learning_rate": 1.5253230357938514e-06, + "loss": 0.0683, "step": 57060 }, { - "epoch": 4.238081093123422, - "grad_norm": 0.6367031335830688, - "learning_rate": 4.571513441259468e-06, - "loss": 0.0627, + "epoch": 8.476162186246844, + "grad_norm": 1.063197135925293, + "learning_rate": 1.5238378137531563e-06, + "loss": 0.0525, "step": 57070 }, { - "epoch": 4.238823704143769, - "grad_norm": 1.3916947841644287, - "learning_rate": 4.567057775137383e-06, - "loss": 0.0727, + "epoch": 8.477647408287538, + "grad_norm": 0.8567971587181091, + "learning_rate": 1.5223525917124613e-06, + "loss": 0.044, "step": 57080 }, { - "epoch": 4.239566315164117, - "grad_norm": 1.1736927032470703, - "learning_rate": 4.5626021090152975e-06, - "loss": 0.0492, + "epoch": 8.479132630328234, + "grad_norm": 0.8655422925949097, + "learning_rate": 1.520867369671766e-06, + "loss": 0.0687, "step": 57090 }, { - "epoch": 4.240308926184465, - "grad_norm": 2.185208320617676, - "learning_rate": 4.558146442893213e-06, - "loss": 0.0618, + "epoch": 8.48061785236893, + "grad_norm": 0.5591942071914673, + "learning_rate": 1.519382147631071e-06, + "loss": 0.0754, "step": 57100 }, { - "epoch": 4.241051537204812, - "grad_norm": 0.6165786385536194, - "learning_rate": 4.5536907767711275e-06, - "loss": 0.061, + "epoch": 8.482103074409624, + "grad_norm": 1.0601160526275635, + "learning_rate": 1.517896925590376e-06, + "loss": 0.0545, "step": 57110 }, { - "epoch": 4.24179414822516, - "grad_norm": 0.5800535678863525, - "learning_rate": 4.5492351106490425e-06, - "loss": 0.0724, + "epoch": 8.48358829645032, + "grad_norm": 0.7717120051383972, + "learning_rate": 1.516411703549681e-06, + "loss": 0.0542, "step": 57120 }, { - "epoch": 4.242536759245507, - "grad_norm": 0.8701562881469727, - "learning_rate": 4.544779444526957e-06, - "loss": 0.0333, + "epoch": 8.485073518491014, + "grad_norm": 1.1000733375549316, + "learning_rate": 1.5149264815089856e-06, + "loss": 0.043, "step": 57130 }, { - "epoch": 4.243279370265855, - "grad_norm": 0.8652254343032837, - "learning_rate": 4.540323778404872e-06, - "loss": 0.0258, + "epoch": 8.48655874053171, + "grad_norm": 0.4473594129085541, + "learning_rate": 1.5134412594682907e-06, + "loss": 0.0568, "step": 57140 }, { - "epoch": 4.244021981286203, - "grad_norm": 3.373532772064209, - "learning_rate": 4.535868112282786e-06, - "loss": 0.066, + "epoch": 8.488043962572405, + "grad_norm": 0.5959775447845459, + "learning_rate": 1.5119560374275955e-06, + "loss": 0.0698, "step": 57150 }, { - "epoch": 4.24476459230655, - "grad_norm": 0.9906690120697021, - "learning_rate": 4.531412446160702e-06, - "loss": 0.0716, + "epoch": 8.4895291846131, + "grad_norm": 1.5816816091537476, + "learning_rate": 1.5104708153869004e-06, + "loss": 0.0762, "step": 57160 }, { - "epoch": 4.2455072033268975, - "grad_norm": 2.1584084033966064, - "learning_rate": 4.526956780038616e-06, - "loss": 0.0605, + "epoch": 8.491014406653795, + "grad_norm": 1.4168264865875244, + "learning_rate": 1.5089855933462053e-06, + "loss": 0.0441, "step": 57170 }, { - "epoch": 4.2462498143472445, - "grad_norm": 1.1581281423568726, - "learning_rate": 4.522501113916531e-06, - "loss": 0.0437, + "epoch": 8.492499628694489, + "grad_norm": 1.0785027742385864, + "learning_rate": 1.5075003713055103e-06, + "loss": 0.0722, "step": 57180 }, { - "epoch": 4.246992425367592, - "grad_norm": 0.5750119686126709, - "learning_rate": 4.518045447794446e-06, - "loss": 0.0542, + "epoch": 8.493984850735185, + "grad_norm": 0.5428130030632019, + "learning_rate": 1.5060151492648154e-06, + "loss": 0.0474, "step": 57190 }, { - "epoch": 4.24773503638794, - "grad_norm": 0.9591627717018127, - "learning_rate": 4.51358978167236e-06, - "loss": 0.0591, + "epoch": 8.49547007277588, + "grad_norm": 0.9714481830596924, + "learning_rate": 1.50452992722412e-06, + "loss": 0.0634, "step": 57200 }, { - "epoch": 4.248477647408287, - "grad_norm": 1.943953037261963, - "learning_rate": 4.509134115550275e-06, - "loss": 0.0455, + "epoch": 8.496955294816575, + "grad_norm": 0.5792000889778137, + "learning_rate": 1.503044705183425e-06, + "loss": 0.0602, "step": 57210 }, { - "epoch": 4.249220258428635, - "grad_norm": 1.54121732711792, - "learning_rate": 4.50467844942819e-06, - "loss": 0.052, + "epoch": 8.49844051685727, + "grad_norm": 0.6629564166069031, + "learning_rate": 1.50155948314273e-06, + "loss": 0.0398, "step": 57220 }, { - "epoch": 4.249962869448983, - "grad_norm": 1.1736338138580322, - "learning_rate": 4.500222783306105e-06, - "loss": 0.0741, + "epoch": 8.499925738897966, + "grad_norm": 0.5812675356864929, + "learning_rate": 1.500074261102035e-06, + "loss": 0.0447, "step": 57230 }, { - "epoch": 4.25070548046933, - "grad_norm": 1.9899519681930542, - "learning_rate": 4.495767117184019e-06, - "loss": 0.0944, + "epoch": 8.50141096093866, + "grad_norm": 0.9888134598731995, + "learning_rate": 1.4985890390613397e-06, + "loss": 0.0476, "step": 57240 }, { - "epoch": 4.251448091489678, - "grad_norm": 1.6793705224990845, - "learning_rate": 4.491311451061934e-06, - "loss": 0.0749, + "epoch": 8.502896182979356, + "grad_norm": 0.4237917363643646, + "learning_rate": 1.4971038170206447e-06, + "loss": 0.0344, "step": 57250 }, { - "epoch": 4.252190702510025, - "grad_norm": 2.0780696868896484, - "learning_rate": 4.486855784939848e-06, - "loss": 0.0529, + "epoch": 8.50438140502005, + "grad_norm": 0.7772937417030334, + "learning_rate": 1.4956185949799496e-06, + "loss": 0.0541, "step": 57260 }, { - "epoch": 4.252933313530373, - "grad_norm": 0.6456514000892639, - "learning_rate": 4.482400118817763e-06, - "loss": 0.0516, + "epoch": 8.505866627060746, + "grad_norm": 0.8068942427635193, + "learning_rate": 1.4941333729392547e-06, + "loss": 0.0669, "step": 57270 }, { - "epoch": 4.253675924550721, - "grad_norm": 2.047966241836548, - "learning_rate": 4.477944452695678e-06, - "loss": 0.0564, + "epoch": 8.507351849101441, + "grad_norm": 0.7912581562995911, + "learning_rate": 1.4926481508985593e-06, + "loss": 0.0602, "step": 57280 }, { - "epoch": 4.254418535571068, - "grad_norm": 1.8581652641296387, - "learning_rate": 4.473488786573593e-06, - "loss": 0.0645, + "epoch": 8.508837071142135, + "grad_norm": 0.6579568386077881, + "learning_rate": 1.4911629288578644e-06, + "loss": 0.05, "step": 57290 }, { - "epoch": 4.255161146591416, - "grad_norm": 0.8233331441879272, - "learning_rate": 4.469033120451507e-06, - "loss": 0.0339, + "epoch": 8.510322293182831, + "grad_norm": 0.6232265830039978, + "learning_rate": 1.4896777068171692e-06, + "loss": 0.068, "step": 57300 }, { - "epoch": 4.255903757611763, - "grad_norm": 1.5012775659561157, - "learning_rate": 4.464577454329422e-06, - "loss": 0.0678, + "epoch": 8.511807515223525, + "grad_norm": 0.5684225559234619, + "learning_rate": 1.4881924847764743e-06, + "loss": 0.0505, "step": 57310 }, { - "epoch": 4.2566463686321105, - "grad_norm": 1.7684147357940674, - "learning_rate": 4.460121788207337e-06, - "loss": 0.0655, + "epoch": 8.513292737264221, + "grad_norm": 0.5152314305305481, + "learning_rate": 1.4867072627357792e-06, + "loss": 0.0527, "step": 57320 }, { - "epoch": 4.257388979652458, - "grad_norm": 1.061919927597046, - "learning_rate": 4.4556661220852514e-06, - "loss": 0.0663, + "epoch": 8.514777959304917, + "grad_norm": 0.5281168818473816, + "learning_rate": 1.485222040695084e-06, + "loss": 0.0537, "step": 57330 }, { - "epoch": 4.2581315906728054, - "grad_norm": 3.2770798206329346, - "learning_rate": 4.451210455963167e-06, - "loss": 0.0863, + "epoch": 8.516263181345611, + "grad_norm": 0.5841067433357239, + "learning_rate": 1.483736818654389e-06, + "loss": 0.0479, "step": 57340 }, { - "epoch": 4.258874201693153, - "grad_norm": 0.6535485982894897, - "learning_rate": 4.446754789841081e-06, - "loss": 0.0603, + "epoch": 8.517748403386307, + "grad_norm": 0.749219536781311, + "learning_rate": 1.4822515966136937e-06, + "loss": 0.0793, "step": 57350 }, { - "epoch": 4.2596168127135, - "grad_norm": 0.8265778422355652, - "learning_rate": 4.442299123718996e-06, - "loss": 0.0573, + "epoch": 8.519233625427, + "grad_norm": 1.2122151851654053, + "learning_rate": 1.4807663745729988e-06, + "loss": 0.0633, "step": 57360 }, { - "epoch": 4.260359423733848, - "grad_norm": 0.3725847005844116, - "learning_rate": 4.4378434575969106e-06, - "loss": 0.0543, + "epoch": 8.520718847467696, + "grad_norm": 1.5486130714416504, + "learning_rate": 1.4792811525323037e-06, + "loss": 0.0534, "step": 57370 }, { - "epoch": 4.261102034754196, - "grad_norm": 1.1065174341201782, - "learning_rate": 4.4333877914748256e-06, - "loss": 0.0578, + "epoch": 8.522204069508392, + "grad_norm": 1.1606239080429077, + "learning_rate": 1.4777959304916087e-06, + "loss": 0.0619, "step": 57380 }, { - "epoch": 4.261844645774543, - "grad_norm": 3.13139009475708, - "learning_rate": 4.42893212535274e-06, - "loss": 0.0632, + "epoch": 8.523689291549086, + "grad_norm": 0.9954538345336914, + "learning_rate": 1.4763107084509134e-06, + "loss": 0.0636, "step": 57390 }, { - "epoch": 4.262587256794891, - "grad_norm": 1.8046602010726929, - "learning_rate": 4.4244764592306555e-06, - "loss": 0.0508, + "epoch": 8.525174513589782, + "grad_norm": 1.7831127643585205, + "learning_rate": 1.4748254864102184e-06, + "loss": 0.0512, "step": 57400 }, { - "epoch": 4.263329867815238, - "grad_norm": 0.9874463677406311, - "learning_rate": 4.42002079310857e-06, - "loss": 0.0546, + "epoch": 8.526659735630476, + "grad_norm": 0.38579750061035156, + "learning_rate": 1.4733402643695233e-06, + "loss": 0.051, "step": 57410 }, { - "epoch": 4.264072478835586, - "grad_norm": 2.401059865951538, - "learning_rate": 4.415565126986485e-06, - "loss": 0.0502, + "epoch": 8.528144957671172, + "grad_norm": 0.3082354664802551, + "learning_rate": 1.4718550423288284e-06, + "loss": 0.0714, "step": 57420 }, { - "epoch": 4.264815089855934, - "grad_norm": 1.2862542867660522, - "learning_rate": 4.411109460864399e-06, - "loss": 0.0722, + "epoch": 8.529630179711868, + "grad_norm": 0.7812128663063049, + "learning_rate": 1.470369820288133e-06, + "loss": 0.0494, "step": 57430 }, { - "epoch": 4.265557700876281, - "grad_norm": 0.6845186948776245, - "learning_rate": 4.406653794742314e-06, - "loss": 0.053, + "epoch": 8.531115401752562, + "grad_norm": 1.4353504180908203, + "learning_rate": 1.468884598247438e-06, + "loss": 0.0738, "step": 57440 }, { - "epoch": 4.266300311896629, - "grad_norm": 0.5840150117874146, - "learning_rate": 4.40219812862023e-06, - "loss": 0.0395, + "epoch": 8.532600623793257, + "grad_norm": 1.3545862436294556, + "learning_rate": 1.4673993762067432e-06, + "loss": 0.0522, "step": 57450 }, { - "epoch": 4.267042922916976, - "grad_norm": 0.8561588525772095, - "learning_rate": 4.397742462498144e-06, - "loss": 0.076, + "epoch": 8.534085845833951, + "grad_norm": 0.9643948674201965, + "learning_rate": 1.465914154166048e-06, + "loss": 0.0581, "step": 57460 }, { - "epoch": 4.267785533937324, - "grad_norm": 1.2348568439483643, - "learning_rate": 4.393286796376059e-06, - "loss": 0.0679, + "epoch": 8.535571067874647, + "grad_norm": 1.1293238401412964, + "learning_rate": 1.464428932125353e-06, + "loss": 0.0691, "step": 57470 }, { - "epoch": 4.2685281449576715, - "grad_norm": 1.4345803260803223, - "learning_rate": 4.388831130253973e-06, - "loss": 0.0542, + "epoch": 8.537056289915343, + "grad_norm": 0.47619926929473877, + "learning_rate": 1.4629437100846577e-06, + "loss": 0.0608, "step": 57480 }, { - "epoch": 4.2692707559780185, - "grad_norm": 0.30238887667655945, - "learning_rate": 4.384375464131888e-06, - "loss": 0.0568, + "epoch": 8.538541511956037, + "grad_norm": 0.8765807747840881, + "learning_rate": 1.4614584880439628e-06, + "loss": 0.0547, "step": 57490 }, { - "epoch": 4.270013366998366, - "grad_norm": 1.476379156112671, - "learning_rate": 4.379919798009802e-06, - "loss": 0.0448, + "epoch": 8.540026733996733, + "grad_norm": 0.8261600732803345, + "learning_rate": 1.4599732660032676e-06, + "loss": 0.0584, "step": 57500 }, { - "epoch": 4.270755978018713, - "grad_norm": 0.6961525678634644, - "learning_rate": 4.375464131887718e-06, - "loss": 0.053, + "epoch": 8.541511956037427, + "grad_norm": 0.9158366918563843, + "learning_rate": 1.4584880439625725e-06, + "loss": 0.0456, "step": 57510 }, { - "epoch": 4.271498589039061, - "grad_norm": 1.5048184394836426, - "learning_rate": 4.371008465765632e-06, - "loss": 0.0572, + "epoch": 8.542997178078123, + "grad_norm": 0.6802687048912048, + "learning_rate": 1.4570028219218774e-06, + "loss": 0.0497, "step": 57520 }, { - "epoch": 4.272241200059409, - "grad_norm": 1.902750849723816, - "learning_rate": 4.366552799643547e-06, - "loss": 0.0564, + "epoch": 8.544482400118818, + "grad_norm": 0.918070375919342, + "learning_rate": 1.4555175998811824e-06, + "loss": 0.0501, "step": 57530 }, { - "epoch": 4.272983811079756, - "grad_norm": 2.073760747909546, - "learning_rate": 4.362097133521461e-06, - "loss": 0.052, + "epoch": 8.545967622159512, + "grad_norm": 0.9905110597610474, + "learning_rate": 1.454032377840487e-06, + "loss": 0.0571, "step": 57540 }, { - "epoch": 4.273726422100104, - "grad_norm": 0.6217005252838135, - "learning_rate": 4.357641467399376e-06, - "loss": 0.0794, + "epoch": 8.547452844200208, + "grad_norm": 1.13737154006958, + "learning_rate": 1.4525471557997921e-06, + "loss": 0.053, "step": 57550 }, { - "epoch": 4.274469033120451, - "grad_norm": 1.285564661026001, - "learning_rate": 4.35318580127729e-06, - "loss": 0.0667, + "epoch": 8.548938066240902, + "grad_norm": 0.25579169392585754, + "learning_rate": 1.451061933759097e-06, + "loss": 0.0352, "step": 57560 }, { - "epoch": 4.275211644140799, - "grad_norm": 0.4138168692588806, - "learning_rate": 4.348730135155206e-06, - "loss": 0.0688, + "epoch": 8.550423288281598, + "grad_norm": 0.6201579570770264, + "learning_rate": 1.449576711718402e-06, + "loss": 0.0457, "step": 57570 }, { - "epoch": 4.275954255161147, - "grad_norm": 2.414457321166992, - "learning_rate": 4.344274469033121e-06, - "loss": 0.0496, + "epoch": 8.551908510322294, + "grad_norm": 0.6442703604698181, + "learning_rate": 1.4480914896777071e-06, + "loss": 0.0494, "step": 57580 }, { - "epoch": 4.276696866181494, - "grad_norm": 1.193533182144165, - "learning_rate": 4.339818802911035e-06, - "loss": 0.0693, + "epoch": 8.553393732362988, + "grad_norm": 1.298711895942688, + "learning_rate": 1.4466062676370118e-06, + "loss": 0.0549, "step": 57590 }, { - "epoch": 4.277439477201842, - "grad_norm": 2.959575891494751, - "learning_rate": 4.33536313678895e-06, - "loss": 0.0625, + "epoch": 8.554878954403684, + "grad_norm": 0.8238638639450073, + "learning_rate": 1.4451210455963169e-06, + "loss": 0.0481, "step": 57600 }, { - "epoch": 4.278182088222189, - "grad_norm": 4.037361145019531, - "learning_rate": 4.3309074706668645e-06, - "loss": 0.0413, + "epoch": 8.556364176444378, + "grad_norm": 1.0968530178070068, + "learning_rate": 1.4436358235556217e-06, + "loss": 0.0399, "step": 57610 }, { - "epoch": 4.278924699242537, - "grad_norm": 2.123981475830078, - "learning_rate": 4.3264518045447795e-06, - "loss": 0.0575, + "epoch": 8.557849398485073, + "grad_norm": 0.766776442527771, + "learning_rate": 1.4421506015149268e-06, + "loss": 0.0616, "step": 57620 }, { - "epoch": 4.279667310262885, - "grad_norm": 1.8399453163146973, - "learning_rate": 4.3219961384226945e-06, - "loss": 0.0643, + "epoch": 8.55933462052577, + "grad_norm": 0.8537440299987793, + "learning_rate": 1.4406653794742314e-06, + "loss": 0.0636, "step": 57630 }, { - "epoch": 4.280409921283232, - "grad_norm": 0.43379709124565125, - "learning_rate": 4.3175404723006095e-06, - "loss": 0.0514, + "epoch": 8.560819842566463, + "grad_norm": 1.3083012104034424, + "learning_rate": 1.4391801574335365e-06, + "loss": 0.0513, "step": 57640 }, { - "epoch": 4.2811525323035795, - "grad_norm": 0.5563368797302246, - "learning_rate": 4.313084806178524e-06, - "loss": 0.0698, + "epoch": 8.562305064607159, + "grad_norm": 0.84224933385849, + "learning_rate": 1.4376949353928413e-06, + "loss": 0.0525, "step": 57650 }, { - "epoch": 4.2818951433239265, - "grad_norm": 4.087998867034912, - "learning_rate": 4.308629140056439e-06, - "loss": 0.0505, + "epoch": 8.563790286647853, + "grad_norm": 1.0603967905044556, + "learning_rate": 1.4362097133521464e-06, + "loss": 0.0418, "step": 57660 }, { - "epoch": 4.282637754344274, - "grad_norm": 3.5374698638916016, - "learning_rate": 4.304173473934353e-06, - "loss": 0.0892, + "epoch": 8.565275508688549, + "grad_norm": 1.2050191164016724, + "learning_rate": 1.434724491311451e-06, + "loss": 0.0483, "step": 57670 }, { - "epoch": 4.283380365364622, - "grad_norm": 1.2814463376998901, - "learning_rate": 4.299717807812268e-06, - "loss": 0.0633, + "epoch": 8.566760730729245, + "grad_norm": 0.6597033739089966, + "learning_rate": 1.4332392692707561e-06, + "loss": 0.069, "step": 57680 }, { - "epoch": 4.284122976384969, - "grad_norm": 0.6887046098709106, - "learning_rate": 4.295262141690183e-06, - "loss": 0.0323, + "epoch": 8.568245952769939, + "grad_norm": 1.193673849105835, + "learning_rate": 1.431754047230061e-06, + "loss": 0.0613, "step": 57690 }, { - "epoch": 4.284865587405317, - "grad_norm": 1.5045709609985352, - "learning_rate": 4.290806475568098e-06, - "loss": 0.0363, + "epoch": 8.569731174810634, + "grad_norm": 0.638843834400177, + "learning_rate": 1.4302688251893658e-06, + "loss": 0.0574, "step": 57700 }, { - "epoch": 4.285608198425665, - "grad_norm": 0.7762113213539124, - "learning_rate": 4.286350809446012e-06, - "loss": 0.0575, + "epoch": 8.57121639685133, + "grad_norm": 0.8168714046478271, + "learning_rate": 1.4287836031486707e-06, + "loss": 0.0407, "step": 57710 }, { - "epoch": 4.286350809446012, - "grad_norm": 0.4139329195022583, - "learning_rate": 4.281895143323927e-06, - "loss": 0.0461, + "epoch": 8.572701618892024, + "grad_norm": 0.19827646017074585, + "learning_rate": 1.4272983811079758e-06, + "loss": 0.0345, "step": 57720 }, { - "epoch": 4.28709342046636, - "grad_norm": 0.7771281599998474, - "learning_rate": 4.277439477201842e-06, - "loss": 0.043, + "epoch": 8.57418684093272, + "grad_norm": 1.014280080795288, + "learning_rate": 1.4258131590672808e-06, + "loss": 0.0606, "step": 57730 }, { - "epoch": 4.287836031486707, - "grad_norm": 1.1789294481277466, - "learning_rate": 4.272983811079756e-06, - "loss": 0.0563, + "epoch": 8.575672062973414, + "grad_norm": 0.2834644913673401, + "learning_rate": 1.4243279370265855e-06, + "loss": 0.0444, "step": 57740 }, { - "epoch": 4.288578642507055, - "grad_norm": 2.2451627254486084, - "learning_rate": 4.268528144957672e-06, - "loss": 0.0681, + "epoch": 8.57715728501411, + "grad_norm": 1.5880845785140991, + "learning_rate": 1.4228427149858906e-06, + "loss": 0.0573, "step": 57750 }, { - "epoch": 4.289321253527403, - "grad_norm": 0.40132951736450195, - "learning_rate": 4.264072478835586e-06, - "loss": 0.0559, + "epoch": 8.578642507054806, + "grad_norm": 2.1034553050994873, + "learning_rate": 1.4213574929451954e-06, + "loss": 0.0594, "step": 57760 }, { - "epoch": 4.29006386454775, - "grad_norm": 1.868285059928894, - "learning_rate": 4.259616812713501e-06, - "loss": 0.0517, + "epoch": 8.5801277290955, + "grad_norm": 0.22017979621887207, + "learning_rate": 1.4198722709045005e-06, + "loss": 0.0677, "step": 57770 }, { - "epoch": 4.290806475568098, - "grad_norm": 1.2447259426116943, - "learning_rate": 4.255161146591415e-06, - "loss": 0.0629, + "epoch": 8.581612951136195, + "grad_norm": 0.544924795627594, + "learning_rate": 1.4183870488638051e-06, + "loss": 0.0419, "step": 57780 }, { - "epoch": 4.291549086588445, - "grad_norm": 0.3125772774219513, - "learning_rate": 4.25070548046933e-06, - "loss": 0.0589, + "epoch": 8.58309817317689, + "grad_norm": 1.1174598932266235, + "learning_rate": 1.4169018268231102e-06, + "loss": 0.0461, "step": 57790 }, { - "epoch": 4.292291697608793, - "grad_norm": 1.9337828159332275, - "learning_rate": 4.246249814347245e-06, - "loss": 0.0771, + "epoch": 8.584583395217585, + "grad_norm": 0.851573646068573, + "learning_rate": 1.415416604782415e-06, + "loss": 0.0479, "step": 57800 }, { - "epoch": 4.2930343086291405, - "grad_norm": 1.6286951303482056, - "learning_rate": 4.24179414822516e-06, - "loss": 0.0577, + "epoch": 8.586068617258281, + "grad_norm": 0.7249863147735596, + "learning_rate": 1.4139313827417201e-06, + "loss": 0.0498, "step": 57810 }, { - "epoch": 4.2937769196494875, - "grad_norm": 3.2661020755767822, - "learning_rate": 4.237338482103074e-06, - "loss": 0.0565, + "epoch": 8.587553839298975, + "grad_norm": 0.815183699131012, + "learning_rate": 1.4124461607010248e-06, + "loss": 0.0476, "step": 57820 }, { - "epoch": 4.294519530669835, - "grad_norm": 2.473935604095459, - "learning_rate": 4.232882815980989e-06, - "loss": 0.0672, + "epoch": 8.58903906133967, + "grad_norm": 0.8330841064453125, + "learning_rate": 1.4109609386603298e-06, + "loss": 0.0666, "step": 57830 }, { - "epoch": 4.295262141690182, - "grad_norm": 1.5696542263031006, - "learning_rate": 4.2284271498589034e-06, - "loss": 0.0682, + "epoch": 8.590524283380365, + "grad_norm": 0.32213395833969116, + "learning_rate": 1.4094757166196347e-06, + "loss": 0.0483, "step": 57840 }, { - "epoch": 4.29600475271053, - "grad_norm": 0.4458481967449188, - "learning_rate": 4.223971483736818e-06, - "loss": 0.0444, + "epoch": 8.59200950542106, + "grad_norm": 0.7228590250015259, + "learning_rate": 1.4079904945789398e-06, + "loss": 0.062, "step": 57850 }, { - "epoch": 4.296747363730878, - "grad_norm": 2.461646556854248, - "learning_rate": 4.219515817614734e-06, - "loss": 0.0548, + "epoch": 8.593494727461756, + "grad_norm": 1.877468228340149, + "learning_rate": 1.4065052725382446e-06, + "loss": 0.0522, "step": 57860 }, { - "epoch": 4.297489974751225, - "grad_norm": 3.5632870197296143, - "learning_rate": 4.215060151492648e-06, - "loss": 0.0551, + "epoch": 8.59497994950245, + "grad_norm": 1.2192243337631226, + "learning_rate": 1.4050200504975495e-06, + "loss": 0.083, "step": 57870 }, { - "epoch": 4.298232585771573, - "grad_norm": 0.38127386569976807, - "learning_rate": 4.210604485370563e-06, - "loss": 0.0746, + "epoch": 8.596465171543146, + "grad_norm": 0.4343273937702179, + "learning_rate": 1.4035348284568545e-06, + "loss": 0.0543, "step": 57880 }, { - "epoch": 4.29897519679192, - "grad_norm": 0.4123340845108032, - "learning_rate": 4.2061488192484775e-06, - "loss": 0.052, + "epoch": 8.59795039358384, + "grad_norm": 0.4854268729686737, + "learning_rate": 1.4020496064161592e-06, + "loss": 0.043, "step": 57890 }, { - "epoch": 4.299717807812268, - "grad_norm": 1.4147988557815552, - "learning_rate": 4.2016931531263925e-06, - "loss": 0.0534, + "epoch": 8.599435615624536, + "grad_norm": 1.3126654624938965, + "learning_rate": 1.4005643843754643e-06, + "loss": 0.0581, "step": 57900 }, { - "epoch": 4.300460418832616, - "grad_norm": 1.1483192443847656, - "learning_rate": 4.197237487004307e-06, - "loss": 0.0762, + "epoch": 8.600920837665232, + "grad_norm": 0.8461434841156006, + "learning_rate": 1.3990791623347691e-06, + "loss": 0.0569, "step": 57910 }, { - "epoch": 4.301203029852963, - "grad_norm": 1.2446470260620117, - "learning_rate": 4.1927818208822225e-06, - "loss": 0.0827, + "epoch": 8.602406059705926, + "grad_norm": 0.22064021229743958, + "learning_rate": 1.3975939402940742e-06, + "loss": 0.0608, "step": 57920 }, { - "epoch": 4.301945640873311, - "grad_norm": 1.6271134614944458, - "learning_rate": 4.188326154760137e-06, - "loss": 0.0517, + "epoch": 8.603891281746622, + "grad_norm": 0.5799633860588074, + "learning_rate": 1.3961087182533788e-06, + "loss": 0.0432, "step": 57930 }, { - "epoch": 4.302688251893658, - "grad_norm": 0.6140291094779968, - "learning_rate": 4.183870488638052e-06, - "loss": 0.045, + "epoch": 8.605376503787316, + "grad_norm": 0.8562087416648865, + "learning_rate": 1.3946234962126839e-06, + "loss": 0.0513, "step": 57940 }, { - "epoch": 4.303430862914006, - "grad_norm": 2.260127067565918, - "learning_rate": 4.179414822515966e-06, - "loss": 0.0545, + "epoch": 8.606861725828011, + "grad_norm": 1.1041144132614136, + "learning_rate": 1.3931382741719887e-06, + "loss": 0.0631, "step": 57950 }, { - "epoch": 4.304173473934354, - "grad_norm": 1.3899246454238892, - "learning_rate": 4.174959156393881e-06, - "loss": 0.0625, + "epoch": 8.608346947868707, + "grad_norm": 1.4788848161697388, + "learning_rate": 1.3916530521312938e-06, + "loss": 0.0405, "step": 57960 }, { - "epoch": 4.304916084954701, - "grad_norm": 3.2012217044830322, - "learning_rate": 4.170503490271795e-06, - "loss": 0.0644, + "epoch": 8.609832169909401, + "grad_norm": 1.005614995956421, + "learning_rate": 1.3901678300905985e-06, + "loss": 0.0838, "step": 57970 }, { - "epoch": 4.3056586959750485, - "grad_norm": 1.8873796463012695, - "learning_rate": 4.166047824149711e-06, - "loss": 0.0446, + "epoch": 8.611317391950097, + "grad_norm": 0.7344144582748413, + "learning_rate": 1.3886826080499035e-06, + "loss": 0.0694, "step": 57980 }, { - "epoch": 4.3064013069953955, - "grad_norm": 1.7632603645324707, - "learning_rate": 4.161592158027626e-06, - "loss": 0.058, + "epoch": 8.612802613990791, + "grad_norm": 0.5920735597610474, + "learning_rate": 1.3871973860092086e-06, + "loss": 0.0457, "step": 57990 }, { - "epoch": 4.307143918015743, - "grad_norm": 2.116173028945923, - "learning_rate": 4.15713649190554e-06, - "loss": 0.0564, + "epoch": 8.614287836031487, + "grad_norm": 0.4997226595878601, + "learning_rate": 1.3857121639685135e-06, + "loss": 0.051, "step": 58000 }, { - "epoch": 4.307886529036091, - "grad_norm": 1.2623026371002197, - "learning_rate": 4.152680825783455e-06, - "loss": 0.0515, + "epoch": 8.615773058072183, + "grad_norm": 1.0840632915496826, + "learning_rate": 1.3842269419278185e-06, + "loss": 0.0637, "step": 58010 }, { - "epoch": 4.308629140056438, - "grad_norm": 0.5496644377708435, - "learning_rate": 4.148225159661369e-06, - "loss": 0.0891, + "epoch": 8.617258280112877, + "grad_norm": 0.500771164894104, + "learning_rate": 1.3827417198871232e-06, + "loss": 0.0456, "step": 58020 }, { - "epoch": 4.309371751076786, - "grad_norm": 1.1486482620239258, - "learning_rate": 4.143769493539284e-06, - "loss": 0.0526, + "epoch": 8.618743502153572, + "grad_norm": 0.6269918084144592, + "learning_rate": 1.3812564978464282e-06, + "loss": 0.0566, "step": 58030 }, { - "epoch": 4.310114362097133, - "grad_norm": 1.4145158529281616, - "learning_rate": 4.139313827417199e-06, - "loss": 0.0745, + "epoch": 8.620228724194266, + "grad_norm": 0.8221311569213867, + "learning_rate": 1.379771275805733e-06, + "loss": 0.0785, "step": 58040 }, { - "epoch": 4.310856973117481, - "grad_norm": 2.3882088661193848, - "learning_rate": 4.134858161295114e-06, - "loss": 0.0881, + "epoch": 8.621713946234962, + "grad_norm": 0.5016255378723145, + "learning_rate": 1.378286053765038e-06, + "loss": 0.057, "step": 58050 }, { - "epoch": 4.311599584137829, - "grad_norm": 1.1562554836273193, - "learning_rate": 4.130402495173028e-06, - "loss": 0.0556, + "epoch": 8.623199168275658, + "grad_norm": 0.9415386915206909, + "learning_rate": 1.3768008317243428e-06, + "loss": 0.034, "step": 58060 }, { - "epoch": 4.312342195158176, - "grad_norm": 2.3691024780273438, - "learning_rate": 4.125946829050943e-06, - "loss": 0.0798, + "epoch": 8.624684390316352, + "grad_norm": 1.6327321529388428, + "learning_rate": 1.3753156096836479e-06, + "loss": 0.0507, "step": 58070 }, { - "epoch": 4.313084806178524, - "grad_norm": 1.2392774820327759, - "learning_rate": 4.121491162928857e-06, - "loss": 0.0689, + "epoch": 8.626169612357048, + "grad_norm": 0.9239318370819092, + "learning_rate": 1.3738303876429525e-06, + "loss": 0.0561, "step": 58080 }, { - "epoch": 4.313827417198871, - "grad_norm": 1.4039784669876099, - "learning_rate": 4.117035496806772e-06, - "loss": 0.0513, + "epoch": 8.627654834397742, + "grad_norm": 0.25347572565078735, + "learning_rate": 1.3723451656022576e-06, + "loss": 0.0582, "step": 58090 }, { - "epoch": 4.314570028219219, - "grad_norm": 0.6202054619789124, - "learning_rate": 4.112579830684687e-06, - "loss": 0.0448, + "epoch": 8.629140056438438, + "grad_norm": 0.5622754096984863, + "learning_rate": 1.3708599435615624e-06, + "loss": 0.0454, "step": 58100 }, { - "epoch": 4.315312639239567, - "grad_norm": 0.5338848829269409, - "learning_rate": 4.108124164562602e-06, - "loss": 0.0619, + "epoch": 8.630625278479133, + "grad_norm": 1.1426067352294922, + "learning_rate": 1.3693747215208675e-06, + "loss": 0.047, "step": 58110 }, { - "epoch": 4.316055250259914, - "grad_norm": 0.5443835258483887, - "learning_rate": 4.103668498440517e-06, - "loss": 0.0665, + "epoch": 8.632110500519827, + "grad_norm": 1.66712486743927, + "learning_rate": 1.3678894994801726e-06, + "loss": 0.0551, "step": 58120 }, { - "epoch": 4.3167978612802616, - "grad_norm": 2.086144208908081, - "learning_rate": 4.0992128323184315e-06, - "loss": 0.0561, + "epoch": 8.633595722560523, + "grad_norm": 0.9281148314476013, + "learning_rate": 1.3664042774394772e-06, + "loss": 0.0487, "step": 58130 }, { - "epoch": 4.317540472300609, - "grad_norm": 0.8639087677001953, - "learning_rate": 4.0947571661963465e-06, - "loss": 0.0538, + "epoch": 8.635080944601217, + "grad_norm": 0.7190086841583252, + "learning_rate": 1.3649190553987823e-06, + "loss": 0.0508, "step": 58140 }, { - "epoch": 4.3182830833209565, - "grad_norm": 2.400470495223999, - "learning_rate": 4.0903015000742615e-06, - "loss": 0.0504, + "epoch": 8.636566166641913, + "grad_norm": 2.1104774475097656, + "learning_rate": 1.3634338333580872e-06, + "loss": 0.0617, "step": 58150 }, { - "epoch": 4.319025694341304, - "grad_norm": 0.8947586417198181, - "learning_rate": 4.0858458339521765e-06, - "loss": 0.0413, + "epoch": 8.638051388682609, + "grad_norm": 0.7850050926208496, + "learning_rate": 1.3619486113173922e-06, + "loss": 0.0507, "step": 58160 }, { - "epoch": 4.319768305361651, - "grad_norm": 1.5876610279083252, - "learning_rate": 4.081390167830091e-06, - "loss": 0.0721, + "epoch": 8.639536610723303, + "grad_norm": 0.592963457107544, + "learning_rate": 1.3604633892766969e-06, + "loss": 0.0672, "step": 58170 }, { - "epoch": 4.320510916381999, - "grad_norm": 2.025843858718872, - "learning_rate": 4.076934501708006e-06, - "loss": 0.0559, + "epoch": 8.641021832763998, + "grad_norm": 1.102217197418213, + "learning_rate": 1.358978167236002e-06, + "loss": 0.0741, "step": 58180 }, { - "epoch": 4.321253527402346, - "grad_norm": 1.2650307416915894, - "learning_rate": 4.07247883558592e-06, - "loss": 0.0469, + "epoch": 8.642507054804693, + "grad_norm": 0.5851567983627319, + "learning_rate": 1.3574929451953068e-06, + "loss": 0.0443, "step": 58190 }, { - "epoch": 4.321996138422694, - "grad_norm": 0.4236902892589569, - "learning_rate": 4.068023169463835e-06, - "loss": 0.0319, + "epoch": 8.643992276845388, + "grad_norm": 0.8554288148880005, + "learning_rate": 1.3560077231546119e-06, + "loss": 0.0524, "step": 58200 }, { - "epoch": 4.322738749443042, - "grad_norm": 1.2939475774765015, - "learning_rate": 4.06356750334175e-06, - "loss": 0.0748, + "epoch": 8.645477498886084, + "grad_norm": 0.8415836691856384, + "learning_rate": 1.3545225011139165e-06, + "loss": 0.0476, "step": 58210 }, { - "epoch": 4.323481360463389, - "grad_norm": 0.4584546685218811, - "learning_rate": 4.059111837219665e-06, - "loss": 0.0305, + "epoch": 8.646962720926778, + "grad_norm": 1.2989027500152588, + "learning_rate": 1.3530372790732216e-06, + "loss": 0.0437, "step": 58220 }, { - "epoch": 4.324223971483737, - "grad_norm": 1.0066841840744019, - "learning_rate": 4.054656171097579e-06, - "loss": 0.0638, + "epoch": 8.648447942967474, + "grad_norm": 0.886523425579071, + "learning_rate": 1.3515520570325264e-06, + "loss": 0.0356, "step": 58230 }, { - "epoch": 4.324966582504084, - "grad_norm": 1.1847662925720215, - "learning_rate": 4.050200504975494e-06, - "loss": 0.0383, + "epoch": 8.649933165008168, + "grad_norm": 0.4318709373474121, + "learning_rate": 1.3500668349918313e-06, + "loss": 0.0525, "step": 58240 }, { - "epoch": 4.325709193524432, - "grad_norm": 1.5462864637374878, - "learning_rate": 4.045744838853409e-06, - "loss": 0.0557, + "epoch": 8.651418387048864, + "grad_norm": 1.2308825254440308, + "learning_rate": 1.3485816129511364e-06, + "loss": 0.0554, "step": 58250 }, { - "epoch": 4.32645180454478, - "grad_norm": 1.220777153968811, - "learning_rate": 4.041289172731323e-06, - "loss": 0.054, + "epoch": 8.65290360908956, + "grad_norm": 0.6957741379737854, + "learning_rate": 1.3470963909104412e-06, + "loss": 0.0402, "step": 58260 }, { - "epoch": 4.327194415565127, - "grad_norm": 2.4415807723999023, - "learning_rate": 4.036833506609239e-06, - "loss": 0.062, + "epoch": 8.654388831130253, + "grad_norm": 0.4085921347141266, + "learning_rate": 1.3456111688697463e-06, + "loss": 0.0562, "step": 58270 }, { - "epoch": 4.327937026585475, - "grad_norm": 1.2786998748779297, - "learning_rate": 4.032377840487153e-06, - "loss": 0.0608, + "epoch": 8.65587405317095, + "grad_norm": 0.7616286277770996, + "learning_rate": 1.344125946829051e-06, + "loss": 0.07, "step": 58280 }, { - "epoch": 4.328679637605822, - "grad_norm": 0.7902323007583618, - "learning_rate": 4.027922174365068e-06, - "loss": 0.0624, + "epoch": 8.657359275211643, + "grad_norm": 0.45014530420303345, + "learning_rate": 1.342640724788356e-06, + "loss": 0.059, "step": 58290 }, { - "epoch": 4.3294222486261695, - "grad_norm": 1.4461417198181152, - "learning_rate": 4.023466508242982e-06, - "loss": 0.0546, + "epoch": 8.658844497252339, + "grad_norm": 1.3107436895370483, + "learning_rate": 1.3411555027476609e-06, + "loss": 0.0732, "step": 58300 }, { - "epoch": 4.330164859646517, - "grad_norm": 0.4553472697734833, - "learning_rate": 4.019010842120897e-06, - "loss": 0.0413, + "epoch": 8.660329719293035, + "grad_norm": 0.715117335319519, + "learning_rate": 1.339670280706966e-06, + "loss": 0.0447, "step": 58310 }, { - "epoch": 4.330907470666864, - "grad_norm": 1.363437294960022, - "learning_rate": 4.014555175998811e-06, - "loss": 0.0512, + "epoch": 8.661814941333729, + "grad_norm": 0.9561058282852173, + "learning_rate": 1.3381850586662706e-06, + "loss": 0.0587, "step": 58320 }, { - "epoch": 4.331650081687212, - "grad_norm": 1.6967765092849731, - "learning_rate": 4.010099509876727e-06, - "loss": 0.0611, + "epoch": 8.663300163374425, + "grad_norm": 1.1679437160491943, + "learning_rate": 1.3366998366255756e-06, + "loss": 0.0401, "step": 58330 }, { - "epoch": 4.332392692707559, - "grad_norm": 1.917182207107544, - "learning_rate": 4.005643843754641e-06, - "loss": 0.0887, + "epoch": 8.664785385415119, + "grad_norm": 0.3750496208667755, + "learning_rate": 1.3352146145848805e-06, + "loss": 0.065, "step": 58340 }, { - "epoch": 4.333135303727907, - "grad_norm": 2.6857664585113525, - "learning_rate": 4.001188177632556e-06, - "loss": 0.06, + "epoch": 8.666270607455814, + "grad_norm": 0.5229451656341553, + "learning_rate": 1.3337293925441856e-06, + "loss": 0.0376, "step": 58350 }, { - "epoch": 4.333877914748255, - "grad_norm": 2.381786346435547, - "learning_rate": 3.99673251151047e-06, - "loss": 0.0701, + "epoch": 8.66775582949651, + "grad_norm": 0.6506863236427307, + "learning_rate": 1.3322441705034902e-06, + "loss": 0.0508, "step": 58360 }, { - "epoch": 4.334620525768602, - "grad_norm": 1.848760724067688, - "learning_rate": 3.992276845388385e-06, - "loss": 0.0649, + "epoch": 8.669241051537204, + "grad_norm": 0.9688324928283691, + "learning_rate": 1.3307589484627953e-06, + "loss": 0.0582, "step": 58370 }, { - "epoch": 4.33536313678895, - "grad_norm": 1.4879848957061768, - "learning_rate": 3.9878211792663e-06, - "loss": 0.0502, + "epoch": 8.6707262735779, + "grad_norm": 0.7780069708824158, + "learning_rate": 1.3292737264221003e-06, + "loss": 0.0513, "step": 58380 }, { - "epoch": 4.336105747809297, - "grad_norm": 1.29238760471344, - "learning_rate": 3.983365513144215e-06, - "loss": 0.0527, + "epoch": 8.672211495618594, + "grad_norm": 0.8056851625442505, + "learning_rate": 1.3277885043814052e-06, + "loss": 0.0671, "step": 58390 }, { - "epoch": 4.336848358829645, - "grad_norm": 0.42699211835861206, - "learning_rate": 3.97890984702213e-06, - "loss": 0.047, + "epoch": 8.67369671765929, + "grad_norm": 1.495537519454956, + "learning_rate": 1.32630328234071e-06, + "loss": 0.0451, "step": 58400 }, { - "epoch": 4.337590969849993, - "grad_norm": 2.1831252574920654, - "learning_rate": 3.9744541809000445e-06, - "loss": 0.0746, + "epoch": 8.675181939699986, + "grad_norm": 1.1849182844161987, + "learning_rate": 1.324818060300015e-06, + "loss": 0.0479, "step": 58410 }, { - "epoch": 4.33833358087034, - "grad_norm": 0.27628546953201294, - "learning_rate": 3.9699985147779595e-06, - "loss": 0.0367, + "epoch": 8.67666716174068, + "grad_norm": 1.2419134378433228, + "learning_rate": 1.32333283825932e-06, + "loss": 0.0802, "step": 58420 }, { - "epoch": 4.339076191890688, - "grad_norm": 1.2963409423828125, - "learning_rate": 3.965542848655874e-06, - "loss": 0.0693, + "epoch": 8.678152383781375, + "grad_norm": 1.0507631301879883, + "learning_rate": 1.3218476162186246e-06, + "loss": 0.0599, "step": 58430 }, { - "epoch": 4.339818802911036, - "grad_norm": 3.566678762435913, - "learning_rate": 3.961087182533789e-06, - "loss": 0.0523, + "epoch": 8.679637605822071, + "grad_norm": 1.0075569152832031, + "learning_rate": 1.3203623941779297e-06, + "loss": 0.0525, "step": 58440 }, { - "epoch": 4.340561413931383, - "grad_norm": 0.6280962228775024, - "learning_rate": 3.956631516411704e-06, - "loss": 0.0329, + "epoch": 8.681122827862765, + "grad_norm": 0.7980392575263977, + "learning_rate": 1.3188771721372346e-06, + "loss": 0.0437, "step": 58450 }, { - "epoch": 4.3413040249517305, - "grad_norm": 0.4241829812526703, - "learning_rate": 3.952175850289619e-06, - "loss": 0.0304, + "epoch": 8.682608049903461, + "grad_norm": 0.5224838256835938, + "learning_rate": 1.3173919500965396e-06, + "loss": 0.0547, "step": 58460 }, { - "epoch": 4.3420466359720775, - "grad_norm": 0.3502853214740753, - "learning_rate": 3.947720184167533e-06, - "loss": 0.0429, + "epoch": 8.684093271944155, + "grad_norm": 0.7504544258117676, + "learning_rate": 1.3159067280558443e-06, + "loss": 0.0565, "step": 58470 }, { - "epoch": 4.342789246992425, - "grad_norm": 3.029207229614258, - "learning_rate": 3.943264518045448e-06, - "loss": 0.0859, + "epoch": 8.68557849398485, + "grad_norm": 0.7422505617141724, + "learning_rate": 1.3144215060151493e-06, + "loss": 0.0669, "step": 58480 }, { - "epoch": 4.343531858012773, - "grad_norm": 0.4693982005119324, - "learning_rate": 3.938808851923362e-06, - "loss": 0.0478, + "epoch": 8.687063716025547, + "grad_norm": 0.5323972702026367, + "learning_rate": 1.3129362839744542e-06, + "loss": 0.053, "step": 58490 }, { - "epoch": 4.34427446903312, - "grad_norm": 1.9650620222091675, - "learning_rate": 3.934353185801278e-06, - "loss": 0.0574, + "epoch": 8.68854893806624, + "grad_norm": 0.5587813258171082, + "learning_rate": 1.3114510619337593e-06, + "loss": 0.0728, "step": 58500 }, { - "epoch": 4.345017080053468, - "grad_norm": 2.096945285797119, - "learning_rate": 3.929897519679193e-06, - "loss": 0.0566, + "epoch": 8.690034160106936, + "grad_norm": 1.3359700441360474, + "learning_rate": 1.3099658398930643e-06, + "loss": 0.0561, "step": 58510 }, { - "epoch": 4.345759691073815, - "grad_norm": 0.6621731519699097, - "learning_rate": 3.925441853557107e-06, - "loss": 0.0532, + "epoch": 8.69151938214763, + "grad_norm": 1.0480090379714966, + "learning_rate": 1.308480617852369e-06, + "loss": 0.0468, "step": 58520 }, { - "epoch": 4.346502302094163, - "grad_norm": 3.3194026947021484, - "learning_rate": 3.920986187435022e-06, - "loss": 0.0901, + "epoch": 8.693004604188326, + "grad_norm": 0.5198378562927246, + "learning_rate": 1.306995395811674e-06, + "loss": 0.0718, "step": 58530 }, { - "epoch": 4.347244913114511, - "grad_norm": 0.2626116871833801, - "learning_rate": 3.916530521312936e-06, - "loss": 0.05, + "epoch": 8.694489826229022, + "grad_norm": 0.986173152923584, + "learning_rate": 1.305510173770979e-06, + "loss": 0.0529, "step": 58540 }, { - "epoch": 4.347987524134858, - "grad_norm": 1.0745980739593506, - "learning_rate": 3.912074855190851e-06, - "loss": 0.0672, + "epoch": 8.695975048269716, + "grad_norm": 0.5360456109046936, + "learning_rate": 1.304024951730284e-06, + "loss": 0.0526, "step": 58550 }, { - "epoch": 4.348730135155206, - "grad_norm": 1.1821939945220947, - "learning_rate": 3.907619189068766e-06, - "loss": 0.0755, + "epoch": 8.697460270310412, + "grad_norm": 0.38922441005706787, + "learning_rate": 1.3025397296895886e-06, + "loss": 0.0518, "step": 58560 }, { - "epoch": 4.349472746175553, - "grad_norm": 3.788940668106079, - "learning_rate": 3.903163522946681e-06, - "loss": 0.0683, + "epoch": 8.698945492351106, + "grad_norm": 1.2982813119888306, + "learning_rate": 1.3010545076488937e-06, + "loss": 0.0578, "step": 58570 }, { - "epoch": 4.350215357195901, - "grad_norm": 2.9081804752349854, - "learning_rate": 3.898707856824595e-06, - "loss": 0.0677, + "epoch": 8.700430714391802, + "grad_norm": 1.141973853111267, + "learning_rate": 1.2995692856081985e-06, + "loss": 0.0618, "step": 58580 }, { - "epoch": 4.350957968216249, - "grad_norm": 1.8848897218704224, - "learning_rate": 3.89425219070251e-06, - "loss": 0.0605, + "epoch": 8.701915936432497, + "grad_norm": 1.176173448562622, + "learning_rate": 1.2980840635675034e-06, + "loss": 0.0526, "step": 58590 }, { - "epoch": 4.351700579236596, - "grad_norm": 0.5404842495918274, - "learning_rate": 3.889796524580424e-06, - "loss": 0.04, + "epoch": 8.703401158473191, + "grad_norm": 1.5215836763381958, + "learning_rate": 1.2965988415268083e-06, + "loss": 0.0488, "step": 58600 }, { - "epoch": 4.352443190256944, - "grad_norm": 1.7580265998840332, - "learning_rate": 3.885340858458339e-06, - "loss": 0.0307, + "epoch": 8.704886380513887, + "grad_norm": 0.5017318725585938, + "learning_rate": 1.2951136194861133e-06, + "loss": 0.0648, "step": 58610 }, { - "epoch": 4.353185801277291, - "grad_norm": 1.8819963932037354, - "learning_rate": 3.880885192336254e-06, - "loss": 0.0704, + "epoch": 8.706371602554581, + "grad_norm": 0.46008479595184326, + "learning_rate": 1.293628397445418e-06, + "loss": 0.0496, "step": 58620 }, { - "epoch": 4.3539284122976385, - "grad_norm": 0.9093202948570251, - "learning_rate": 3.876429526214169e-06, - "loss": 0.0915, + "epoch": 8.707856824595277, + "grad_norm": 0.5816351771354675, + "learning_rate": 1.292143175404723e-06, + "loss": 0.0689, "step": 58630 }, { - "epoch": 4.354671023317986, - "grad_norm": 2.8272292613983154, - "learning_rate": 3.871973860092084e-06, - "loss": 0.0886, + "epoch": 8.709342046635973, + "grad_norm": 0.3985196352005005, + "learning_rate": 1.2906579533640281e-06, + "loss": 0.0621, "step": 58640 }, { - "epoch": 4.355413634338333, - "grad_norm": 2.371199131011963, - "learning_rate": 3.8675181939699985e-06, - "loss": 0.0583, + "epoch": 8.710827268676667, + "grad_norm": 0.8761214017868042, + "learning_rate": 1.289172731323333e-06, + "loss": 0.0538, "step": 58650 }, { - "epoch": 4.356156245358681, - "grad_norm": 0.21469803154468536, - "learning_rate": 3.8630625278479135e-06, - "loss": 0.0405, + "epoch": 8.712312490717363, + "grad_norm": 0.8328244686126709, + "learning_rate": 1.287687509282638e-06, + "loss": 0.0497, "step": 58660 }, { - "epoch": 4.356898856379028, - "grad_norm": 1.9610093832015991, - "learning_rate": 3.858606861725828e-06, - "loss": 0.031, + "epoch": 8.713797712758057, + "grad_norm": 1.0810935497283936, + "learning_rate": 1.2862022872419427e-06, + "loss": 0.085, "step": 58670 }, { - "epoch": 4.357641467399376, - "grad_norm": 3.707371950149536, - "learning_rate": 3.8541511956037435e-06, - "loss": 0.0643, + "epoch": 8.715282934798752, + "grad_norm": 1.138525128364563, + "learning_rate": 1.2847170652012477e-06, + "loss": 0.0604, "step": 58680 }, { - "epoch": 4.358384078419724, - "grad_norm": 0.8655148148536682, - "learning_rate": 3.849695529481658e-06, - "loss": 0.0624, + "epoch": 8.716768156839448, + "grad_norm": 1.0397404432296753, + "learning_rate": 1.2832318431605526e-06, + "loss": 0.0609, "step": 58690 }, { - "epoch": 4.359126689440071, - "grad_norm": 2.9356181621551514, - "learning_rate": 3.845239863359573e-06, - "loss": 0.081, + "epoch": 8.718253378880142, + "grad_norm": 2.0040159225463867, + "learning_rate": 1.2817466211198577e-06, + "loss": 0.0432, "step": 58700 }, { - "epoch": 4.359869300460419, - "grad_norm": 1.3285295963287354, - "learning_rate": 3.840784197237487e-06, - "loss": 0.0463, + "epoch": 8.719738600920838, + "grad_norm": 0.5572036504745483, + "learning_rate": 1.2802613990791623e-06, + "loss": 0.0594, "step": 58710 }, { - "epoch": 4.360611911480766, - "grad_norm": 0.44809725880622864, - "learning_rate": 3.836328531115402e-06, - "loss": 0.0769, + "epoch": 8.721223822961532, + "grad_norm": 0.7825028300285339, + "learning_rate": 1.2787761770384674e-06, + "loss": 0.0514, "step": 58720 }, { - "epoch": 4.361354522501114, - "grad_norm": 1.0158475637435913, - "learning_rate": 3.831872864993316e-06, - "loss": 0.0721, + "epoch": 8.722709045002228, + "grad_norm": 0.37247392535209656, + "learning_rate": 1.2772909549977722e-06, + "loss": 0.0588, "step": 58730 }, { - "epoch": 4.362097133521462, - "grad_norm": 1.1209678649902344, - "learning_rate": 3.827417198871232e-06, - "loss": 0.0601, + "epoch": 8.724194267042924, + "grad_norm": 0.9438624382019043, + "learning_rate": 1.2758057329570773e-06, + "loss": 0.0493, "step": 58740 }, { - "epoch": 4.362839744541809, - "grad_norm": 1.2492932081222534, - "learning_rate": 3.822961532749146e-06, - "loss": 0.0776, + "epoch": 8.725679489083618, + "grad_norm": 0.9759641289710999, + "learning_rate": 1.274320510916382e-06, + "loss": 0.0502, "step": 58750 }, { - "epoch": 4.363582355562157, - "grad_norm": 0.5209352374076843, - "learning_rate": 3.818505866627061e-06, - "loss": 0.0311, + "epoch": 8.727164711124313, + "grad_norm": 0.6798367500305176, + "learning_rate": 1.272835288875687e-06, + "loss": 0.0572, "step": 58760 }, { - "epoch": 4.364324966582504, - "grad_norm": 0.8227794766426086, - "learning_rate": 3.814050200504976e-06, - "loss": 0.0282, + "epoch": 8.728649933165007, + "grad_norm": 0.7928009033203125, + "learning_rate": 1.271350066834992e-06, + "loss": 0.0603, "step": 58770 }, { - "epoch": 4.365067577602852, - "grad_norm": 1.6542755365371704, - "learning_rate": 3.8095945343828904e-06, - "loss": 0.0766, + "epoch": 8.730135155205703, + "grad_norm": 0.44863516092300415, + "learning_rate": 1.2698648447942967e-06, + "loss": 0.0494, "step": 58780 }, { - "epoch": 4.3658101886231995, - "grad_norm": 2.0904018878936768, - "learning_rate": 3.8051388682608054e-06, - "loss": 0.0475, + "epoch": 8.731620377246399, + "grad_norm": 0.6240716576576233, + "learning_rate": 1.2683796227536018e-06, + "loss": 0.0535, "step": 58790 }, { - "epoch": 4.3665527996435465, - "grad_norm": 1.682500958442688, - "learning_rate": 3.8006832021387196e-06, - "loss": 0.0691, + "epoch": 8.733105599287093, + "grad_norm": 1.6525503396987915, + "learning_rate": 1.2668944007129067e-06, + "loss": 0.0508, "step": 58800 }, { - "epoch": 4.367295410663894, - "grad_norm": 2.096959352493286, - "learning_rate": 3.796227536016635e-06, - "loss": 0.0475, + "epoch": 8.734590821327789, + "grad_norm": 1.3705474138259888, + "learning_rate": 1.2654091786722117e-06, + "loss": 0.0605, "step": 58810 }, { - "epoch": 4.368038021684241, - "grad_norm": 2.7535927295684814, - "learning_rate": 3.791771869894549e-06, - "loss": 0.0791, + "epoch": 8.736076043368483, + "grad_norm": 0.4468485116958618, + "learning_rate": 1.2639239566315164e-06, + "loss": 0.0639, "step": 58820 }, { - "epoch": 4.368780632704589, - "grad_norm": 0.5873667001724243, - "learning_rate": 3.787316203772464e-06, - "loss": 0.0359, + "epoch": 8.737561265409179, + "grad_norm": 1.8079521656036377, + "learning_rate": 1.2624387345908214e-06, + "loss": 0.0505, "step": 58830 }, { - "epoch": 4.369523243724937, - "grad_norm": 3.4915168285369873, - "learning_rate": 3.7828605376503787e-06, - "loss": 0.0502, + "epoch": 8.739046487449874, + "grad_norm": 1.1422653198242188, + "learning_rate": 1.2609535125501263e-06, + "loss": 0.0569, "step": 58840 }, { - "epoch": 4.370265854745284, - "grad_norm": 0.34856438636779785, - "learning_rate": 3.7784048715282937e-06, - "loss": 0.0606, + "epoch": 8.740531709490568, + "grad_norm": 0.506270706653595, + "learning_rate": 1.2594682905094314e-06, + "loss": 0.0573, "step": 58850 }, { - "epoch": 4.371008465765632, - "grad_norm": 0.42992135882377625, - "learning_rate": 3.773949205406208e-06, - "loss": 0.0645, + "epoch": 8.742016931531264, + "grad_norm": 0.41067951917648315, + "learning_rate": 1.257983068468736e-06, + "loss": 0.0427, "step": 58860 }, { - "epoch": 4.37175107678598, - "grad_norm": 1.6763559579849243, - "learning_rate": 3.7694935392841233e-06, - "loss": 0.0786, + "epoch": 8.74350215357196, + "grad_norm": 0.4286954998970032, + "learning_rate": 1.256497846428041e-06, + "loss": 0.0507, "step": 58870 }, { - "epoch": 4.372493687806327, - "grad_norm": 2.7361538410186768, - "learning_rate": 3.7650378731620374e-06, - "loss": 0.0677, + "epoch": 8.744987375612654, + "grad_norm": 1.6591687202453613, + "learning_rate": 1.255012624387346e-06, + "loss": 0.0754, "step": 58880 }, { - "epoch": 4.373236298826675, - "grad_norm": 2.2586610317230225, - "learning_rate": 3.7605822070399524e-06, - "loss": 0.0419, + "epoch": 8.74647259765335, + "grad_norm": 0.6580554842948914, + "learning_rate": 1.253527402346651e-06, + "loss": 0.0632, "step": 58890 }, { - "epoch": 4.373978909847022, - "grad_norm": 0.7950805425643921, - "learning_rate": 3.756126540917868e-06, - "loss": 0.0381, + "epoch": 8.747957819694044, + "grad_norm": 1.3528720140457153, + "learning_rate": 1.252042180305956e-06, + "loss": 0.0537, "step": 58900 }, { - "epoch": 4.37472152086737, - "grad_norm": 0.500672459602356, - "learning_rate": 3.751670874795782e-06, - "loss": 0.0576, + "epoch": 8.74944304173474, + "grad_norm": 0.600568950176239, + "learning_rate": 1.2505569582652607e-06, + "loss": 0.0492, "step": 58910 }, { - "epoch": 4.375464131887718, - "grad_norm": 1.354580044746399, - "learning_rate": 3.7472152086736965e-06, - "loss": 0.0514, + "epoch": 8.750928263775435, + "grad_norm": 1.0042638778686523, + "learning_rate": 1.2490717362245656e-06, + "loss": 0.0499, "step": 58920 }, { - "epoch": 4.376206742908065, - "grad_norm": 2.166482925415039, - "learning_rate": 3.7427595425516115e-06, - "loss": 0.0508, + "epoch": 8.75241348581613, + "grad_norm": 0.5799403786659241, + "learning_rate": 1.2475865141838707e-06, + "loss": 0.0447, "step": 58930 }, { - "epoch": 4.376949353928413, - "grad_norm": 0.8477782011032104, - "learning_rate": 3.738303876429526e-06, - "loss": 0.0472, + "epoch": 8.753898707856825, + "grad_norm": 0.4826274514198303, + "learning_rate": 1.2461012921431755e-06, + "loss": 0.0497, "step": 58940 }, { - "epoch": 4.37769196494876, - "grad_norm": 0.9658443331718445, - "learning_rate": 3.733848210307441e-06, - "loss": 0.0729, + "epoch": 8.75538392989752, + "grad_norm": 0.8607178330421448, + "learning_rate": 1.2446160701024804e-06, + "loss": 0.0545, "step": 58950 }, { - "epoch": 4.3784345759691075, - "grad_norm": 3.2751779556274414, - "learning_rate": 3.7293925441853557e-06, - "loss": 0.0389, + "epoch": 8.756869151938215, + "grad_norm": 0.7830055356025696, + "learning_rate": 1.2431308480617852e-06, + "loss": 0.0422, "step": 58960 }, { - "epoch": 4.379177186989455, - "grad_norm": 0.7033012509346008, - "learning_rate": 3.7249368780632707e-06, - "loss": 0.051, + "epoch": 8.75835437397891, + "grad_norm": 0.3618166148662567, + "learning_rate": 1.2416456260210903e-06, + "loss": 0.0484, "step": 58970 }, { - "epoch": 4.379919798009802, - "grad_norm": 0.37652888894081116, - "learning_rate": 3.7204812119411857e-06, - "loss": 0.0488, + "epoch": 8.759839596019605, + "grad_norm": 0.8886063694953918, + "learning_rate": 1.2401604039803951e-06, + "loss": 0.0507, "step": 58980 }, { - "epoch": 4.38066240903015, - "grad_norm": 1.1736301183700562, - "learning_rate": 3.7160255458191002e-06, - "loss": 0.0742, + "epoch": 8.7613248180603, + "grad_norm": 1.0976297855377197, + "learning_rate": 1.2386751819397002e-06, + "loss": 0.0348, "step": 58990 }, { - "epoch": 4.381405020050497, - "grad_norm": 1.1174696683883667, - "learning_rate": 3.711569879697015e-06, - "loss": 0.0505, + "epoch": 8.762810040100995, + "grad_norm": 0.6038275361061096, + "learning_rate": 1.237189959899005e-06, + "loss": 0.0518, "step": 59000 }, { - "epoch": 4.382147631070845, - "grad_norm": 2.9306743144989014, - "learning_rate": 3.70711421357493e-06, - "loss": 0.051, + "epoch": 8.76429526214169, + "grad_norm": 1.2169570922851562, + "learning_rate": 1.23570473785831e-06, + "loss": 0.0597, "step": 59010 }, { - "epoch": 4.382890242091193, - "grad_norm": 1.5580140352249146, - "learning_rate": 3.7026585474528444e-06, - "loss": 0.0598, + "epoch": 8.765780484182386, + "grad_norm": 1.306175708770752, + "learning_rate": 1.2342195158176148e-06, + "loss": 0.0663, "step": 59020 }, { - "epoch": 4.38363285311154, - "grad_norm": 1.074127435684204, - "learning_rate": 3.698202881330759e-06, - "loss": 0.053, + "epoch": 8.76726570622308, + "grad_norm": 0.9833489656448364, + "learning_rate": 1.2327342937769196e-06, + "loss": 0.0475, "step": 59030 }, { - "epoch": 4.384375464131888, - "grad_norm": 0.9551728367805481, - "learning_rate": 3.693747215208674e-06, - "loss": 0.0438, + "epoch": 8.768750928263776, + "grad_norm": 0.8598132729530334, + "learning_rate": 1.2312490717362247e-06, + "loss": 0.0383, "step": 59040 }, { - "epoch": 4.385118075152235, - "grad_norm": 1.77925705909729, - "learning_rate": 3.6892915490865885e-06, - "loss": 0.0489, + "epoch": 8.77023615030447, + "grad_norm": 0.6318262219429016, + "learning_rate": 1.2297638496955296e-06, + "loss": 0.0461, "step": 59050 }, { - "epoch": 4.385860686172583, - "grad_norm": 0.7532749772071838, - "learning_rate": 3.684835882964503e-06, - "loss": 0.0431, + "epoch": 8.771721372345166, + "grad_norm": 1.8029948472976685, + "learning_rate": 1.2282786276548344e-06, + "loss": 0.0561, "step": 59060 }, { - "epoch": 4.386603297192931, - "grad_norm": 1.1308987140655518, - "learning_rate": 3.680380216842418e-06, - "loss": 0.0617, + "epoch": 8.773206594385861, + "grad_norm": 1.5056575536727905, + "learning_rate": 1.2267934056141393e-06, + "loss": 0.0542, "step": 59070 }, { - "epoch": 4.387345908213278, - "grad_norm": 3.694500684738159, - "learning_rate": 3.6759245507203326e-06, - "loss": 0.065, + "epoch": 8.774691816426555, + "grad_norm": 0.7758419513702393, + "learning_rate": 1.2253081835734444e-06, + "loss": 0.0468, "step": 59080 }, { - "epoch": 4.388088519233626, - "grad_norm": 2.997882127761841, - "learning_rate": 3.671468884598247e-06, - "loss": 0.063, + "epoch": 8.776177038467251, + "grad_norm": 1.0615826845169067, + "learning_rate": 1.2238229615327492e-06, + "loss": 0.0397, "step": 59090 }, { - "epoch": 4.388831130253973, - "grad_norm": 1.4791189432144165, - "learning_rate": 3.6670132184761626e-06, - "loss": 0.0603, + "epoch": 8.777662260507945, + "grad_norm": 0.5602389574050903, + "learning_rate": 1.2223377394920543e-06, + "loss": 0.0586, "step": 59100 }, { - "epoch": 4.3895737412743205, - "grad_norm": 1.4636845588684082, - "learning_rate": 3.662557552354077e-06, - "loss": 0.0548, + "epoch": 8.779147482548641, + "grad_norm": 0.767863929271698, + "learning_rate": 1.2208525174513591e-06, + "loss": 0.0722, "step": 59110 }, { - "epoch": 4.390316352294668, - "grad_norm": 1.0181242227554321, - "learning_rate": 3.6581018862319918e-06, - "loss": 0.0424, + "epoch": 8.780632704589337, + "grad_norm": 1.2912675142288208, + "learning_rate": 1.219367295410664e-06, + "loss": 0.0761, "step": 59120 }, { - "epoch": 4.391058963315015, - "grad_norm": 0.7411527037620544, - "learning_rate": 3.6536462201099068e-06, - "loss": 0.0591, + "epoch": 8.78211792663003, + "grad_norm": 1.0784565210342407, + "learning_rate": 1.2178820733699689e-06, + "loss": 0.043, "step": 59130 }, { - "epoch": 4.391801574335363, - "grad_norm": 0.6368983387947083, - "learning_rate": 3.6491905539878213e-06, - "loss": 0.0731, + "epoch": 8.783603148670727, + "grad_norm": 0.5213328003883362, + "learning_rate": 1.216396851329274e-06, + "loss": 0.05, "step": 59140 }, { - "epoch": 4.39254418535571, - "grad_norm": 0.9122620224952698, - "learning_rate": 3.644734887865736e-06, - "loss": 0.0579, + "epoch": 8.78508837071142, + "grad_norm": 0.12252107262611389, + "learning_rate": 1.2149116292885788e-06, + "loss": 0.0373, "step": 59150 }, { - "epoch": 4.393286796376058, - "grad_norm": 1.5511554479599, - "learning_rate": 3.640279221743651e-06, - "loss": 0.0578, + "epoch": 8.786573592752116, + "grad_norm": 0.5756390690803528, + "learning_rate": 1.2134264072478836e-06, + "loss": 0.0423, "step": 59160 }, { - "epoch": 4.394029407396406, - "grad_norm": 1.4701236486434937, - "learning_rate": 3.6358235556215655e-06, - "loss": 0.0377, + "epoch": 8.788058814792812, + "grad_norm": 0.5673865675926208, + "learning_rate": 1.2119411852071885e-06, + "loss": 0.0557, "step": 59170 }, { - "epoch": 4.394772018416753, - "grad_norm": 0.35914507508277893, - "learning_rate": 3.63136788949948e-06, - "loss": 0.0586, + "epoch": 8.789544036833506, + "grad_norm": 1.11675226688385, + "learning_rate": 1.2104559631664936e-06, + "loss": 0.0704, "step": 59180 }, { - "epoch": 4.395514629437101, - "grad_norm": 1.4817779064178467, - "learning_rate": 3.626912223377395e-06, - "loss": 0.0664, + "epoch": 8.791029258874202, + "grad_norm": 0.8019585609436035, + "learning_rate": 1.2089707411257984e-06, + "loss": 0.063, "step": 59190 }, { - "epoch": 4.396257240457448, - "grad_norm": 2.5295608043670654, - "learning_rate": 3.6224565572553096e-06, - "loss": 0.0454, + "epoch": 8.792514480914896, + "grad_norm": 0.49639007449150085, + "learning_rate": 1.2074855190851033e-06, + "loss": 0.0612, "step": 59200 }, { - "epoch": 4.396999851477796, - "grad_norm": 0.7852722406387329, - "learning_rate": 3.618000891133224e-06, - "loss": 0.0399, + "epoch": 8.793999702955592, + "grad_norm": 1.0924346446990967, + "learning_rate": 1.2060002970444081e-06, + "loss": 0.0469, "step": 59210 }, { - "epoch": 4.397742462498144, - "grad_norm": 0.24262398481369019, - "learning_rate": 3.613545225011139e-06, - "loss": 0.0409, + "epoch": 8.795484924996288, + "grad_norm": 0.4211598038673401, + "learning_rate": 1.204515075003713e-06, + "loss": 0.0485, "step": 59220 }, { - "epoch": 4.398485073518491, - "grad_norm": 0.33007925748825073, - "learning_rate": 3.609089558889054e-06, - "loss": 0.0593, + "epoch": 8.796970147036982, + "grad_norm": 0.7586895823478699, + "learning_rate": 1.203029852963018e-06, + "loss": 0.0639, "step": 59230 }, - { - "epoch": 4.399227684538839, - "grad_norm": 1.3206017017364502, - "learning_rate": 3.6046338927669687e-06, - "loss": 0.0687, + { + "epoch": 8.798455369077677, + "grad_norm": 0.7787258625030518, + "learning_rate": 1.2015446309223231e-06, + "loss": 0.0568, "step": 59240 }, { - "epoch": 4.399970295559186, - "grad_norm": 2.052076578140259, - "learning_rate": 3.6001782266448837e-06, - "loss": 0.0808, + "epoch": 8.799940591118371, + "grad_norm": 0.5634714365005493, + "learning_rate": 1.200059408881628e-06, + "loss": 0.0442, "step": 59250 }, { - "epoch": 4.400712906579534, - "grad_norm": 3.8353078365325928, - "learning_rate": 3.5957225605227983e-06, - "loss": 0.0581, + "epoch": 8.801425813159067, + "grad_norm": 1.3035249710083008, + "learning_rate": 1.1985741868409328e-06, + "loss": 0.0604, "step": 59260 }, { - "epoch": 4.4014555175998815, - "grad_norm": 0.6021201014518738, - "learning_rate": 3.591266894400713e-06, - "loss": 0.0714, + "epoch": 8.802911035199763, + "grad_norm": 0.8066299557685852, + "learning_rate": 1.1970889648002377e-06, + "loss": 0.0489, "step": 59270 }, { - "epoch": 4.4021981286202285, - "grad_norm": 2.1962730884552, - "learning_rate": 3.586811228278628e-06, - "loss": 0.0501, + "epoch": 8.804396257240457, + "grad_norm": 1.6491585969924927, + "learning_rate": 1.1956037427595428e-06, + "loss": 0.0658, "step": 59280 }, { - "epoch": 4.402940739640576, - "grad_norm": 0.2795027792453766, - "learning_rate": 3.5823555621565424e-06, - "loss": 0.0573, + "epoch": 8.805881479281153, + "grad_norm": 2.1438181400299072, + "learning_rate": 1.1941185207188476e-06, + "loss": 0.0563, "step": 59290 }, { - "epoch": 4.403683350660923, - "grad_norm": 1.4487619400024414, - "learning_rate": 3.5778998960344574e-06, - "loss": 0.0312, + "epoch": 8.807366701321847, + "grad_norm": 1.1060525178909302, + "learning_rate": 1.1926332986781525e-06, + "loss": 0.0585, "step": 59300 }, { - "epoch": 4.404425961681271, - "grad_norm": 2.9178457260131836, - "learning_rate": 3.573444229912372e-06, - "loss": 0.0564, + "epoch": 8.808851923362543, + "grad_norm": 0.823993444442749, + "learning_rate": 1.1911480766374573e-06, + "loss": 0.0388, "step": 59310 }, { - "epoch": 4.405168572701619, - "grad_norm": 1.1441450119018555, - "learning_rate": 3.5689885637902866e-06, - "loss": 0.0618, + "epoch": 8.810337145403238, + "grad_norm": 0.8131610155105591, + "learning_rate": 1.1896628545967622e-06, + "loss": 0.0565, "step": 59320 }, { - "epoch": 4.405911183721966, - "grad_norm": 2.4585134983062744, - "learning_rate": 3.5645328976682016e-06, - "loss": 0.044, + "epoch": 8.811822367443932, + "grad_norm": 0.6244588494300842, + "learning_rate": 1.1881776325560673e-06, + "loss": 0.0575, "step": 59330 }, { - "epoch": 4.406653794742314, - "grad_norm": 1.8690593242645264, - "learning_rate": 3.560077231546116e-06, - "loss": 0.0638, + "epoch": 8.813307589484628, + "grad_norm": 0.7041961550712585, + "learning_rate": 1.1866924105153721e-06, + "loss": 0.063, "step": 59340 }, { - "epoch": 4.407396405762661, - "grad_norm": 1.3198564052581787, - "learning_rate": 3.5556215654240307e-06, - "loss": 0.0596, + "epoch": 8.814792811525322, + "grad_norm": 0.21381068229675293, + "learning_rate": 1.185207188474677e-06, + "loss": 0.0309, "step": 59350 }, { - "epoch": 4.408139016783009, - "grad_norm": 1.511386513710022, - "learning_rate": 3.551165899301946e-06, - "loss": 0.0775, + "epoch": 8.816278033566018, + "grad_norm": 0.7408416271209717, + "learning_rate": 1.183721966433982e-06, + "loss": 0.0605, "step": 59360 }, { - "epoch": 4.408881627803357, - "grad_norm": 1.489264965057373, - "learning_rate": 3.5467102331798607e-06, - "loss": 0.0824, + "epoch": 8.817763255606714, + "grad_norm": 0.9459415674209595, + "learning_rate": 1.182236744393287e-06, + "loss": 0.0734, "step": 59370 }, { - "epoch": 4.409624238823704, - "grad_norm": 0.6738532185554504, - "learning_rate": 3.5422545670577753e-06, - "loss": 0.1038, + "epoch": 8.819248477647408, + "grad_norm": 0.6017314195632935, + "learning_rate": 1.1807515223525918e-06, + "loss": 0.0533, "step": 59380 }, { - "epoch": 4.410366849844052, - "grad_norm": 1.928895115852356, - "learning_rate": 3.5377989009356903e-06, - "loss": 0.0687, + "epoch": 8.820733699688104, + "grad_norm": 0.6662879586219788, + "learning_rate": 1.1792663003118968e-06, + "loss": 0.051, "step": 59390 }, { - "epoch": 4.411109460864399, - "grad_norm": 2.568911552429199, - "learning_rate": 3.533343234813605e-06, - "loss": 0.0677, + "epoch": 8.822218921728798, + "grad_norm": 1.1942579746246338, + "learning_rate": 1.1777810782712017e-06, + "loss": 0.0568, "step": 59400 }, { - "epoch": 4.411852071884747, - "grad_norm": 3.7109265327453613, - "learning_rate": 3.5288875686915194e-06, - "loss": 0.0468, + "epoch": 8.823704143769493, + "grad_norm": 1.188172459602356, + "learning_rate": 1.1762958562305065e-06, + "loss": 0.0681, "step": 59410 }, { - "epoch": 4.412594682905095, - "grad_norm": 1.1439366340637207, - "learning_rate": 3.5244319025694344e-06, - "loss": 0.0471, + "epoch": 8.82518936581019, + "grad_norm": 0.9328430891036987, + "learning_rate": 1.1748106341898114e-06, + "loss": 0.0544, "step": 59420 }, { - "epoch": 4.413337293925442, - "grad_norm": 1.701995849609375, - "learning_rate": 3.519976236447349e-06, - "loss": 0.0555, + "epoch": 8.826674587850883, + "grad_norm": 0.6567057371139526, + "learning_rate": 1.1733254121491165e-06, + "loss": 0.0605, "step": 59430 }, { - "epoch": 4.4140799049457895, - "grad_norm": 3.7509143352508545, - "learning_rate": 3.5155205703252635e-06, - "loss": 0.0486, + "epoch": 8.828159809891579, + "grad_norm": 0.9897271394729614, + "learning_rate": 1.1718401901084213e-06, + "loss": 0.0556, "step": 59440 }, { - "epoch": 4.4148225159661365, - "grad_norm": 2.0457451343536377, - "learning_rate": 3.5110649042031785e-06, - "loss": 0.0599, + "epoch": 8.829645031932273, + "grad_norm": 0.8121185898780823, + "learning_rate": 1.1703549680677262e-06, + "loss": 0.0681, "step": 59450 }, { - "epoch": 4.415565126986484, - "grad_norm": 1.8995429277420044, - "learning_rate": 3.506609238081093e-06, - "loss": 0.0546, + "epoch": 8.831130253972969, + "grad_norm": 0.4883832335472107, + "learning_rate": 1.168869746027031e-06, + "loss": 0.048, "step": 59460 }, { - "epoch": 4.416307738006832, - "grad_norm": 1.6824947595596313, - "learning_rate": 3.5021535719590077e-06, - "loss": 0.0679, + "epoch": 8.832615476013665, + "grad_norm": 1.0136661529541016, + "learning_rate": 1.167384523986336e-06, + "loss": 0.0607, "step": 59470 }, { - "epoch": 4.417050349027179, - "grad_norm": 2.0449635982513428, - "learning_rate": 3.4976979058369227e-06, - "loss": 0.0552, + "epoch": 8.834100698054359, + "grad_norm": 1.6294145584106445, + "learning_rate": 1.165899301945641e-06, + "loss": 0.059, "step": 59480 }, { - "epoch": 4.417792960047527, - "grad_norm": 0.3595518469810486, - "learning_rate": 3.4932422397148372e-06, - "loss": 0.0452, + "epoch": 8.835585920095054, + "grad_norm": 1.445611834526062, + "learning_rate": 1.1644140799049458e-06, + "loss": 0.0488, "step": 59490 }, { - "epoch": 4.418535571067874, - "grad_norm": 0.20933711528778076, - "learning_rate": 3.4887865735927522e-06, - "loss": 0.0662, + "epoch": 8.837071142135748, + "grad_norm": 0.6219216585159302, + "learning_rate": 1.1629288578642509e-06, + "loss": 0.0465, "step": 59500 }, { - "epoch": 4.419278182088222, - "grad_norm": 2.1247775554656982, - "learning_rate": 3.4843309074706672e-06, - "loss": 0.0577, + "epoch": 8.838556364176444, + "grad_norm": 0.4000525176525116, + "learning_rate": 1.1614436358235557e-06, + "loss": 0.0475, "step": 59510 }, { - "epoch": 4.42002079310857, - "grad_norm": 1.3232066631317139, - "learning_rate": 3.479875241348582e-06, - "loss": 0.0739, + "epoch": 8.84004158621714, + "grad_norm": 0.29526957869529724, + "learning_rate": 1.1599584137828606e-06, + "loss": 0.0398, "step": 59520 }, { - "epoch": 4.420763404128917, - "grad_norm": 1.2847281694412231, - "learning_rate": 3.4754195752264964e-06, - "loss": 0.1052, + "epoch": 8.841526808257834, + "grad_norm": 1.7476985454559326, + "learning_rate": 1.1584731917421657e-06, + "loss": 0.0522, "step": 59530 }, { - "epoch": 4.421506015149265, - "grad_norm": 2.9592535495758057, - "learning_rate": 3.4709639091044114e-06, - "loss": 0.0837, + "epoch": 8.84301203029853, + "grad_norm": 1.4126195907592773, + "learning_rate": 1.1569879697014705e-06, + "loss": 0.0543, "step": 59540 }, { - "epoch": 4.422248626169612, - "grad_norm": 3.9245753288269043, - "learning_rate": 3.466508242982326e-06, - "loss": 0.0602, + "epoch": 8.844497252339224, + "grad_norm": 0.6883609890937805, + "learning_rate": 1.1555027476607754e-06, + "loss": 0.0585, "step": 59550 }, { - "epoch": 4.42299123718996, - "grad_norm": 0.8036717176437378, - "learning_rate": 3.4620525768602405e-06, - "loss": 0.0609, + "epoch": 8.84598247437992, + "grad_norm": 1.0310041904449463, + "learning_rate": 1.1540175256200802e-06, + "loss": 0.0457, "step": 59560 }, { - "epoch": 4.423733848210308, - "grad_norm": 0.5781071782112122, - "learning_rate": 3.4575969107381555e-06, - "loss": 0.056, + "epoch": 8.847467696420615, + "grad_norm": 0.666269063949585, + "learning_rate": 1.152532303579385e-06, + "loss": 0.0494, "step": 59570 }, { - "epoch": 4.424476459230655, - "grad_norm": 2.8381593227386475, - "learning_rate": 3.45314124461607e-06, - "loss": 0.0667, + "epoch": 8.84895291846131, + "grad_norm": 0.570885956287384, + "learning_rate": 1.1510470815386902e-06, + "loss": 0.0514, "step": 59580 }, { - "epoch": 4.425219070251003, - "grad_norm": 2.3946638107299805, - "learning_rate": 3.4486855784939846e-06, - "loss": 0.0354, + "epoch": 8.850438140502005, + "grad_norm": 2.130302906036377, + "learning_rate": 1.149561859497995e-06, + "loss": 0.0514, "step": 59590 }, { - "epoch": 4.4259616812713505, - "grad_norm": 1.2118726968765259, - "learning_rate": 3.4442299123718996e-06, - "loss": 0.0549, + "epoch": 8.851923362542701, + "grad_norm": 1.2145628929138184, + "learning_rate": 1.1480766374572999e-06, + "loss": 0.029, "step": 59600 }, { - "epoch": 4.4267042922916975, - "grad_norm": 2.8117597103118896, - "learning_rate": 3.439774246249814e-06, - "loss": 0.0692, + "epoch": 8.853408584583395, + "grad_norm": 0.6939643621444702, + "learning_rate": 1.1465914154166047e-06, + "loss": 0.0472, "step": 59610 }, { - "epoch": 4.427446903312045, - "grad_norm": 2.231323719024658, - "learning_rate": 3.4353185801277288e-06, - "loss": 0.0931, + "epoch": 8.85489380662409, + "grad_norm": 0.7668987512588501, + "learning_rate": 1.1451061933759098e-06, + "loss": 0.0605, "step": 59620 }, { - "epoch": 4.428189514332392, - "grad_norm": 2.092609405517578, - "learning_rate": 3.430862914005644e-06, - "loss": 0.089, + "epoch": 8.856379028664785, + "grad_norm": 1.1189602613449097, + "learning_rate": 1.1436209713352149e-06, + "loss": 0.0534, "step": 59630 }, { - "epoch": 4.42893212535274, - "grad_norm": 1.9209469556808472, - "learning_rate": 3.4264072478835588e-06, - "loss": 0.0671, + "epoch": 8.85786425070548, + "grad_norm": 1.8580517768859863, + "learning_rate": 1.1421357492945197e-06, + "loss": 0.0663, "step": 59640 }, { - "epoch": 4.429674736373088, - "grad_norm": 2.280712842941284, - "learning_rate": 3.4219515817614738e-06, - "loss": 0.0598, + "epoch": 8.859349472746176, + "grad_norm": 1.231301188468933, + "learning_rate": 1.1406505272538246e-06, + "loss": 0.0602, "step": 59650 }, { - "epoch": 4.430417347393435, - "grad_norm": 1.3931483030319214, - "learning_rate": 3.4174959156393883e-06, - "loss": 0.063, + "epoch": 8.86083469478687, + "grad_norm": 0.23550699651241302, + "learning_rate": 1.1391653052131294e-06, + "loss": 0.0786, "step": 59660 }, { - "epoch": 4.431159958413783, - "grad_norm": 1.0616021156311035, - "learning_rate": 3.413040249517303e-06, - "loss": 0.0531, + "epoch": 8.862319916827566, + "grad_norm": 1.3787282705307007, + "learning_rate": 1.1376800831724343e-06, + "loss": 0.0523, "step": 59670 }, { - "epoch": 4.43190256943413, - "grad_norm": 1.928513765335083, - "learning_rate": 3.408584583395218e-06, - "loss": 0.0707, + "epoch": 8.86380513886826, + "grad_norm": 0.7296797037124634, + "learning_rate": 1.1361948611317394e-06, + "loss": 0.0664, "step": 59680 }, { - "epoch": 4.432645180454478, - "grad_norm": 0.7961556315422058, - "learning_rate": 3.4041289172731325e-06, - "loss": 0.0584, + "epoch": 8.865290360908956, + "grad_norm": 1.0177968740463257, + "learning_rate": 1.1347096390910442e-06, + "loss": 0.0701, "step": 59690 }, { - "epoch": 4.433387791474826, - "grad_norm": 1.220015525817871, - "learning_rate": 3.399673251151047e-06, - "loss": 0.0569, + "epoch": 8.866775582949652, + "grad_norm": 1.0494672060012817, + "learning_rate": 1.133224417050349e-06, + "loss": 0.0563, "step": 59700 }, { - "epoch": 4.434130402495173, - "grad_norm": 0.7394031882286072, - "learning_rate": 3.395217585028962e-06, - "loss": 0.0611, + "epoch": 8.868260804990346, + "grad_norm": 1.0491480827331543, + "learning_rate": 1.131739195009654e-06, + "loss": 0.0568, "step": 59710 }, { - "epoch": 4.434873013515521, - "grad_norm": 2.8178136348724365, - "learning_rate": 3.3907619189068766e-06, - "loss": 0.082, + "epoch": 8.869746027031042, + "grad_norm": 0.9075988531112671, + "learning_rate": 1.130253972968959e-06, + "loss": 0.0732, "step": 59720 }, { - "epoch": 4.435615624535868, - "grad_norm": 0.8858090043067932, - "learning_rate": 3.386306252784791e-06, - "loss": 0.064, + "epoch": 8.871231249071736, + "grad_norm": 0.7524979114532471, + "learning_rate": 1.1287687509282639e-06, + "loss": 0.0502, "step": 59730 }, { - "epoch": 4.436358235556216, - "grad_norm": 1.7345311641693115, - "learning_rate": 3.381850586662706e-06, - "loss": 0.0571, + "epoch": 8.872716471112431, + "grad_norm": 0.7980123162269592, + "learning_rate": 1.1272835288875687e-06, + "loss": 0.056, "step": 59740 }, { - "epoch": 4.437100846576564, - "grad_norm": 1.1461231708526611, - "learning_rate": 3.3773949205406207e-06, - "loss": 0.0449, + "epoch": 8.874201693153127, + "grad_norm": 0.6302353143692017, + "learning_rate": 1.1257983068468736e-06, + "loss": 0.0492, "step": 59750 }, { - "epoch": 4.437843457596911, - "grad_norm": 5.428303241729736, - "learning_rate": 3.3729392544185357e-06, - "loss": 0.0415, + "epoch": 8.875686915193821, + "grad_norm": 1.1649489402770996, + "learning_rate": 1.1243130848061786e-06, + "loss": 0.0349, "step": 59760 }, { - "epoch": 4.4385860686172585, - "grad_norm": 3.327192544937134, - "learning_rate": 3.3684835882964507e-06, - "loss": 0.0834, + "epoch": 8.877172137234517, + "grad_norm": 0.41437533497810364, + "learning_rate": 1.1228278627654835e-06, + "loss": 0.0543, "step": 59770 }, { - "epoch": 4.4393286796376055, - "grad_norm": 3.348003387451172, - "learning_rate": 3.3640279221743653e-06, - "loss": 0.0855, + "epoch": 8.878657359275211, + "grad_norm": 0.6834044456481934, + "learning_rate": 1.1213426407247886e-06, + "loss": 0.0621, "step": 59780 }, { - "epoch": 4.440071290657953, - "grad_norm": 0.42248573899269104, - "learning_rate": 3.35957225605228e-06, - "loss": 0.0454, + "epoch": 8.880142581315907, + "grad_norm": 1.1875417232513428, + "learning_rate": 1.1198574186840934e-06, + "loss": 0.0571, "step": 59790 }, { - "epoch": 4.440813901678301, - "grad_norm": 0.9625753164291382, - "learning_rate": 3.355116589930195e-06, - "loss": 0.0628, + "epoch": 8.881627803356603, + "grad_norm": 0.9354287981987, + "learning_rate": 1.1183721966433983e-06, + "loss": 0.0673, "step": 59800 }, { - "epoch": 4.441556512698648, - "grad_norm": 0.7581101059913635, - "learning_rate": 3.3506609238081094e-06, - "loss": 0.0483, + "epoch": 8.883113025397297, + "grad_norm": 1.0599032640457153, + "learning_rate": 1.1168869746027031e-06, + "loss": 0.063, "step": 59810 }, { - "epoch": 4.442299123718996, - "grad_norm": 1.2970561981201172, - "learning_rate": 3.346205257686024e-06, - "loss": 0.039, + "epoch": 8.884598247437992, + "grad_norm": 0.8172001838684082, + "learning_rate": 1.1154017525620082e-06, + "loss": 0.057, "step": 59820 }, { - "epoch": 4.443041734739343, - "grad_norm": 2.935488700866699, - "learning_rate": 3.341749591563939e-06, - "loss": 0.0532, + "epoch": 8.886083469478686, + "grad_norm": 1.5262736082077026, + "learning_rate": 1.113916530521313e-06, + "loss": 0.0643, "step": 59830 }, { - "epoch": 4.443784345759691, - "grad_norm": 2.0861330032348633, - "learning_rate": 3.3372939254418536e-06, - "loss": 0.0597, + "epoch": 8.887568691519382, + "grad_norm": 0.44762691855430603, + "learning_rate": 1.112431308480618e-06, + "loss": 0.0364, "step": 59840 }, { - "epoch": 4.444526956780039, - "grad_norm": 3.6742630004882812, - "learning_rate": 3.332838259319768e-06, - "loss": 0.0661, + "epoch": 8.889053913560078, + "grad_norm": 1.2637884616851807, + "learning_rate": 1.1109460864399228e-06, + "loss": 0.049, "step": 59850 }, { - "epoch": 4.445269567800386, - "grad_norm": 2.0556039810180664, - "learning_rate": 3.328382593197683e-06, - "loss": 0.036, + "epoch": 8.890539135600772, + "grad_norm": 0.6419134140014648, + "learning_rate": 1.1094608643992276e-06, + "loss": 0.0492, "step": 59860 }, { - "epoch": 4.446012178820734, - "grad_norm": 0.8471649289131165, - "learning_rate": 3.3239269270755977e-06, - "loss": 0.0508, + "epoch": 8.892024357641468, + "grad_norm": 0.6041998267173767, + "learning_rate": 1.1079756423585327e-06, + "loss": 0.0609, "step": 59870 }, { - "epoch": 4.446754789841081, - "grad_norm": 4.926666736602783, - "learning_rate": 3.3194712609535123e-06, - "loss": 0.0731, + "epoch": 8.893509579682162, + "grad_norm": 1.4558790922164917, + "learning_rate": 1.1064904203178376e-06, + "loss": 0.0583, "step": 59880 }, { - "epoch": 4.447497400861429, - "grad_norm": 0.624416708946228, - "learning_rate": 3.3150155948314277e-06, - "loss": 0.0655, + "epoch": 8.894994801722858, + "grad_norm": 1.1899456977844238, + "learning_rate": 1.1050051982771426e-06, + "loss": 0.0493, "step": 59890 }, { - "epoch": 4.448240011881777, - "grad_norm": 0.27737393975257874, - "learning_rate": 3.3105599287093423e-06, - "loss": 0.044, + "epoch": 8.896480023763553, + "grad_norm": 0.3095281720161438, + "learning_rate": 1.1035199762364475e-06, + "loss": 0.0688, "step": 59900 }, { - "epoch": 4.448982622902124, - "grad_norm": 0.9491309523582458, - "learning_rate": 3.306104262587257e-06, - "loss": 0.0543, + "epoch": 8.897965245804247, + "grad_norm": 0.9570375680923462, + "learning_rate": 1.1020347541957523e-06, + "loss": 0.0533, "step": 59910 }, { - "epoch": 4.4497252339224715, - "grad_norm": 3.4223947525024414, - "learning_rate": 3.301648596465172e-06, - "loss": 0.0775, + "epoch": 8.899450467844943, + "grad_norm": 0.522426187992096, + "learning_rate": 1.1005495321550572e-06, + "loss": 0.0539, "step": 59920 }, { - "epoch": 4.4504678449428186, - "grad_norm": 1.4844588041305542, - "learning_rate": 3.2971929303430864e-06, - "loss": 0.0869, + "epoch": 8.900935689885637, + "grad_norm": 0.6601673364639282, + "learning_rate": 1.0990643101143623e-06, + "loss": 0.0697, "step": 59930 }, { - "epoch": 4.4512104559631664, - "grad_norm": 0.4859474003314972, - "learning_rate": 3.292737264221001e-06, - "loss": 0.0733, + "epoch": 8.902420911926333, + "grad_norm": 0.7265911102294922, + "learning_rate": 1.0975790880736671e-06, + "loss": 0.0476, "step": 59940 }, { - "epoch": 4.451953066983514, - "grad_norm": 3.157867670059204, - "learning_rate": 3.288281598098916e-06, - "loss": 0.0344, + "epoch": 8.903906133967029, + "grad_norm": 1.3074336051940918, + "learning_rate": 1.096093866032972e-06, + "loss": 0.0614, "step": 59950 }, { - "epoch": 4.452695678003861, - "grad_norm": 1.859097957611084, - "learning_rate": 3.2838259319768305e-06, - "loss": 0.0689, + "epoch": 8.905391356007723, + "grad_norm": 1.233241081237793, + "learning_rate": 1.0946086439922768e-06, + "loss": 0.0648, "step": 59960 }, { - "epoch": 4.453438289024209, - "grad_norm": 1.4190930128097534, - "learning_rate": 3.279370265854745e-06, - "loss": 0.0691, + "epoch": 8.906876578048418, + "grad_norm": 2.0665476322174072, + "learning_rate": 1.093123421951582e-06, + "loss": 0.0435, "step": 59970 }, { - "epoch": 4.454180900044556, - "grad_norm": 1.7417631149291992, - "learning_rate": 3.27491459973266e-06, - "loss": 0.0664, + "epoch": 8.908361800089112, + "grad_norm": 0.3625172972679138, + "learning_rate": 1.0916381999108868e-06, + "loss": 0.0539, "step": 59980 }, { - "epoch": 4.454923511064904, - "grad_norm": 0.7751902937889099, - "learning_rate": 3.2704589336105747e-06, - "loss": 0.0361, + "epoch": 8.909847022129808, + "grad_norm": 0.36465543508529663, + "learning_rate": 1.0901529778701916e-06, + "loss": 0.049, "step": 59990 }, { - "epoch": 4.455666122085252, - "grad_norm": 0.1440928876399994, - "learning_rate": 3.2660032674884897e-06, - "loss": 0.0985, + "epoch": 8.911332244170504, + "grad_norm": 0.697887122631073, + "learning_rate": 1.0886677558294965e-06, + "loss": 0.0605, "step": 60000 }, { - "epoch": 4.456408733105599, - "grad_norm": 0.39282867312431335, - "learning_rate": 3.2615476013664042e-06, - "loss": 0.0667, + "epoch": 8.912817466211198, + "grad_norm": 0.8298418521881104, + "learning_rate": 1.0871825337888016e-06, + "loss": 0.0523, "step": 60010 }, { - "epoch": 4.457151344125947, - "grad_norm": 1.775133728981018, - "learning_rate": 3.2570919352443192e-06, - "loss": 0.0448, + "epoch": 8.914302688251894, + "grad_norm": 0.6717225313186646, + "learning_rate": 1.0856973117481064e-06, + "loss": 0.0622, "step": 60020 }, - { - "epoch": 4.457893955146295, - "grad_norm": 0.5823416709899902, - "learning_rate": 3.2526362691222342e-06, - "loss": 0.0666, + { + "epoch": 8.91578791029259, + "grad_norm": 0.8412166833877563, + "learning_rate": 1.0842120897074115e-06, + "loss": 0.0471, "step": 60030 }, { - "epoch": 4.458636566166642, - "grad_norm": 1.3295433521270752, - "learning_rate": 3.248180603000149e-06, - "loss": 0.0311, + "epoch": 8.917273132333284, + "grad_norm": 0.5965431928634644, + "learning_rate": 1.0827268676667163e-06, + "loss": 0.0323, "step": 60040 }, { - "epoch": 4.45937917718699, - "grad_norm": 2.622669219970703, - "learning_rate": 3.2437249368780634e-06, - "loss": 0.0918, + "epoch": 8.91875835437398, + "grad_norm": 0.7671550512313843, + "learning_rate": 1.0812416456260212e-06, + "loss": 0.0637, "step": 60050 }, { - "epoch": 4.460121788207337, - "grad_norm": 1.4052746295928955, - "learning_rate": 3.2392692707559784e-06, - "loss": 0.0562, + "epoch": 8.920243576414673, + "grad_norm": 0.5707191228866577, + "learning_rate": 1.079756423585326e-06, + "loss": 0.037, "step": 60060 }, { - "epoch": 4.460864399227685, - "grad_norm": 0.7786046862602234, - "learning_rate": 3.234813604633893e-06, - "loss": 0.0816, + "epoch": 8.92172879845537, + "grad_norm": 0.5659679174423218, + "learning_rate": 1.0782712015446311e-06, + "loss": 0.0406, "step": 60070 }, { - "epoch": 4.4616070102480325, - "grad_norm": 0.7435610294342041, - "learning_rate": 3.2303579385118075e-06, - "loss": 0.054, + "epoch": 8.923214020496065, + "grad_norm": 1.0556646585464478, + "learning_rate": 1.076785979503936e-06, + "loss": 0.0723, "step": 60080 }, { - "epoch": 4.4623496212683795, - "grad_norm": 1.1836934089660645, - "learning_rate": 3.2259022723897225e-06, - "loss": 0.0801, + "epoch": 8.924699242536759, + "grad_norm": 0.26674020290374756, + "learning_rate": 1.0753007574632408e-06, + "loss": 0.0499, "step": 60090 }, { - "epoch": 4.463092232288727, - "grad_norm": 1.5763529539108276, - "learning_rate": 3.221446606267637e-06, - "loss": 0.0548, + "epoch": 8.926184464577455, + "grad_norm": 0.8880712985992432, + "learning_rate": 1.0738155354225457e-06, + "loss": 0.0489, "step": 60100 }, { - "epoch": 4.463834843309074, - "grad_norm": 1.6699156761169434, - "learning_rate": 3.2169909401455516e-06, - "loss": 0.0634, + "epoch": 8.927669686618149, + "grad_norm": 0.4370151162147522, + "learning_rate": 1.0723303133818505e-06, + "loss": 0.0658, "step": 60110 }, { - "epoch": 4.464577454329422, - "grad_norm": 0.8276064991950989, - "learning_rate": 3.2125352740234666e-06, - "loss": 0.0634, + "epoch": 8.929154908658845, + "grad_norm": 1.0058577060699463, + "learning_rate": 1.0708450913411556e-06, + "loss": 0.0529, "step": 60120 }, { - "epoch": 4.46532006534977, - "grad_norm": 0.7230677008628845, - "learning_rate": 3.208079607901381e-06, - "loss": 0.0656, + "epoch": 8.93064013069954, + "grad_norm": 0.8313179612159729, + "learning_rate": 1.0693598693004605e-06, + "loss": 0.034, "step": 60130 }, { - "epoch": 4.466062676370117, - "grad_norm": 1.405157446861267, - "learning_rate": 3.2036239417792958e-06, - "loss": 0.0971, + "epoch": 8.932125352740234, + "grad_norm": 1.0790013074874878, + "learning_rate": 1.0678746472597653e-06, + "loss": 0.0519, "step": 60140 }, { - "epoch": 4.466805287390465, - "grad_norm": 1.1781624555587769, - "learning_rate": 3.199168275657211e-06, - "loss": 0.0574, + "epoch": 8.93361057478093, + "grad_norm": 1.860836148262024, + "learning_rate": 1.0663894252190704e-06, + "loss": 0.0501, "step": 60150 }, { - "epoch": 4.467547898410812, - "grad_norm": 0.2808609902858734, - "learning_rate": 3.1947126095351258e-06, - "loss": 0.0415, + "epoch": 8.935095796821624, + "grad_norm": 0.9269645810127258, + "learning_rate": 1.0649042031783753e-06, + "loss": 0.0588, "step": 60160 }, { - "epoch": 4.46829050943116, - "grad_norm": 1.525888204574585, - "learning_rate": 3.1902569434130403e-06, - "loss": 0.0791, + "epoch": 8.93658101886232, + "grad_norm": 1.4157025814056396, + "learning_rate": 1.0634189811376803e-06, + "loss": 0.0745, "step": 60170 }, { - "epoch": 4.469033120451508, - "grad_norm": 0.37916257977485657, - "learning_rate": 3.1858012772909553e-06, - "loss": 0.0406, + "epoch": 8.938066240903016, + "grad_norm": 1.012364149093628, + "learning_rate": 1.0619337590969852e-06, + "loss": 0.0586, "step": 60180 }, { - "epoch": 4.469775731471855, - "grad_norm": 1.2917027473449707, - "learning_rate": 3.18134561116887e-06, - "loss": 0.0632, + "epoch": 8.93955146294371, + "grad_norm": 1.1754486560821533, + "learning_rate": 1.06044853705629e-06, + "loss": 0.0525, "step": 60190 }, { - "epoch": 4.470518342492203, - "grad_norm": 1.9711061716079712, - "learning_rate": 3.1768899450467845e-06, - "loss": 0.0628, + "epoch": 8.941036684984406, + "grad_norm": 0.8056031465530396, + "learning_rate": 1.0589633150155949e-06, + "loss": 0.058, "step": 60200 }, { - "epoch": 4.47126095351255, - "grad_norm": 0.7012740969657898, - "learning_rate": 3.1724342789246995e-06, - "loss": 0.0308, + "epoch": 8.9425219070251, + "grad_norm": 1.1152647733688354, + "learning_rate": 1.0574780929748997e-06, + "loss": 0.0432, "step": 60210 }, { - "epoch": 4.472003564532898, - "grad_norm": 2.618312358856201, - "learning_rate": 3.167978612802614e-06, - "loss": 0.0659, + "epoch": 8.944007129065795, + "grad_norm": 2.0299875736236572, + "learning_rate": 1.0559928709342048e-06, + "loss": 0.0505, "step": 60220 }, { - "epoch": 4.472746175553246, - "grad_norm": 0.5947886109352112, - "learning_rate": 3.1635229466805286e-06, - "loss": 0.0411, + "epoch": 8.945492351106491, + "grad_norm": 0.5199902653694153, + "learning_rate": 1.0545076488935097e-06, + "loss": 0.0602, "step": 60230 }, { - "epoch": 4.473488786573593, - "grad_norm": 4.743587493896484, - "learning_rate": 3.1590672805584436e-06, - "loss": 0.0349, + "epoch": 8.946977573147185, + "grad_norm": 1.0558547973632812, + "learning_rate": 1.0530224268528145e-06, + "loss": 0.0543, "step": 60240 }, { - "epoch": 4.4742313975939405, - "grad_norm": 1.2651278972625732, - "learning_rate": 3.154611614436358e-06, - "loss": 0.0567, + "epoch": 8.948462795187881, + "grad_norm": 2.096571922302246, + "learning_rate": 1.0515372048121194e-06, + "loss": 0.0499, "step": 60250 }, { - "epoch": 4.4749740086142875, - "grad_norm": 2.4540841579437256, - "learning_rate": 3.1501559483142727e-06, - "loss": 0.0446, + "epoch": 8.949948017228575, + "grad_norm": 0.9488021731376648, + "learning_rate": 1.0500519827714245e-06, + "loss": 0.0703, "step": 60260 }, { - "epoch": 4.475716619634635, - "grad_norm": 3.072303295135498, - "learning_rate": 3.1457002821921877e-06, - "loss": 0.0747, + "epoch": 8.95143323926927, + "grad_norm": 0.6574435830116272, + "learning_rate": 1.0485667607307293e-06, + "loss": 0.0531, "step": 60270 }, { - "epoch": 4.476459230654983, - "grad_norm": 3.0911707878112793, - "learning_rate": 3.1412446160701023e-06, - "loss": 0.0377, + "epoch": 8.952918461309967, + "grad_norm": 0.9170523881912231, + "learning_rate": 1.0470815386900342e-06, + "loss": 0.0556, "step": 60280 }, { - "epoch": 4.47720184167533, - "grad_norm": 0.6322579979896545, - "learning_rate": 3.1367889499480173e-06, - "loss": 0.0727, + "epoch": 8.95440368335066, + "grad_norm": 1.5840109586715698, + "learning_rate": 1.0455963166493392e-06, + "loss": 0.0468, "step": 60290 }, { - "epoch": 4.477944452695678, - "grad_norm": 0.2527351379394531, - "learning_rate": 3.1323332838259323e-06, - "loss": 0.0379, + "epoch": 8.955888905391356, + "grad_norm": 0.9436348676681519, + "learning_rate": 1.044111094608644e-06, + "loss": 0.061, "step": 60300 }, { - "epoch": 4.478687063716025, - "grad_norm": 2.0621883869171143, - "learning_rate": 3.127877617703847e-06, - "loss": 0.0577, + "epoch": 8.95737412743205, + "grad_norm": 1.0092281103134155, + "learning_rate": 1.042625872567949e-06, + "loss": 0.0596, "step": 60310 }, { - "epoch": 4.479429674736373, - "grad_norm": 0.48624280095100403, - "learning_rate": 3.1234219515817614e-06, - "loss": 0.048, + "epoch": 8.958859349472746, + "grad_norm": 0.7205719351768494, + "learning_rate": 1.041140650527254e-06, + "loss": 0.0481, "step": 60320 }, { - "epoch": 4.480172285756721, - "grad_norm": 0.3645511269569397, - "learning_rate": 3.1189662854596764e-06, - "loss": 0.0462, + "epoch": 8.960344571513442, + "grad_norm": 0.3967041075229645, + "learning_rate": 1.0396554284865589e-06, + "loss": 0.0539, "step": 60330 }, { - "epoch": 4.480914896777068, - "grad_norm": 0.7930494546890259, - "learning_rate": 3.114510619337591e-06, - "loss": 0.0435, + "epoch": 8.961829793554136, + "grad_norm": 1.255529522895813, + "learning_rate": 1.0381702064458637e-06, + "loss": 0.0441, "step": 60340 }, { - "epoch": 4.481657507797416, - "grad_norm": 1.6003566980361938, - "learning_rate": 3.110054953215506e-06, - "loss": 0.0577, + "epoch": 8.963315015594832, + "grad_norm": 1.2569066286087036, + "learning_rate": 1.0366849844051686e-06, + "loss": 0.0587, "step": 60350 }, { - "epoch": 4.482400118817763, - "grad_norm": 0.8492751717567444, - "learning_rate": 3.1055992870934206e-06, - "loss": 0.049, + "epoch": 8.964800237635526, + "grad_norm": 0.9882292151451111, + "learning_rate": 1.0351997623644737e-06, + "loss": 0.0442, "step": 60360 }, { - "epoch": 4.483142729838111, - "grad_norm": 2.092189311981201, - "learning_rate": 3.101143620971335e-06, - "loss": 0.057, + "epoch": 8.966285459676222, + "grad_norm": 0.5611401796340942, + "learning_rate": 1.0337145403237785e-06, + "loss": 0.0463, "step": 60370 }, { - "epoch": 4.483885340858459, - "grad_norm": 0.5786078572273254, - "learning_rate": 3.09668795484925e-06, - "loss": 0.0569, + "epoch": 8.967770681716917, + "grad_norm": 1.5252411365509033, + "learning_rate": 1.0322293182830834e-06, + "loss": 0.0382, "step": 60380 }, { - "epoch": 4.484627951878806, - "grad_norm": 0.4640141725540161, - "learning_rate": 3.0922322887271647e-06, - "loss": 0.0618, + "epoch": 8.969255903757611, + "grad_norm": 1.0554333925247192, + "learning_rate": 1.0307440962423882e-06, + "loss": 0.0518, "step": 60390 }, { - "epoch": 4.485370562899154, - "grad_norm": 2.307746171951294, - "learning_rate": 3.0877766226050793e-06, - "loss": 0.0578, + "epoch": 8.970741125798307, + "grad_norm": 0.7427452802658081, + "learning_rate": 1.029258874201693e-06, + "loss": 0.0524, "step": 60400 }, { - "epoch": 4.486113173919501, - "grad_norm": 1.5179271697998047, - "learning_rate": 3.0833209564829943e-06, - "loss": 0.0363, + "epoch": 8.972226347839001, + "grad_norm": 1.093377709388733, + "learning_rate": 1.0277736521609982e-06, + "loss": 0.0497, "step": 60410 }, { - "epoch": 4.4868557849398485, - "grad_norm": 0.30121228098869324, - "learning_rate": 3.0788652903609093e-06, - "loss": 0.05, + "epoch": 8.973711569879697, + "grad_norm": 0.6824570298194885, + "learning_rate": 1.0262884301203032e-06, + "loss": 0.0416, "step": 60420 }, { - "epoch": 4.487598395960196, - "grad_norm": 2.5715456008911133, - "learning_rate": 3.074409624238824e-06, - "loss": 0.0622, + "epoch": 8.975196791920393, + "grad_norm": 0.846742570400238, + "learning_rate": 1.024803208079608e-06, + "loss": 0.0551, "step": 60430 }, { - "epoch": 4.488341006980543, - "grad_norm": 2.3509955406188965, - "learning_rate": 3.069953958116739e-06, - "loss": 0.0728, + "epoch": 8.976682013961087, + "grad_norm": 1.6127779483795166, + "learning_rate": 1.023317986038913e-06, + "loss": 0.0429, "step": 60440 }, { - "epoch": 4.489083618000891, - "grad_norm": 1.9906114339828491, - "learning_rate": 3.0654982919946534e-06, - "loss": 0.0598, + "epoch": 8.978167236001783, + "grad_norm": 1.100595235824585, + "learning_rate": 1.0218327639982178e-06, + "loss": 0.0481, "step": 60450 }, { - "epoch": 4.489826229021238, - "grad_norm": 1.7488093376159668, - "learning_rate": 3.061042625872568e-06, - "loss": 0.0452, + "epoch": 8.979652458042477, + "grad_norm": 1.0692342519760132, + "learning_rate": 1.0203475419575227e-06, + "loss": 0.0514, "step": 60460 }, { - "epoch": 4.490568840041586, - "grad_norm": 0.5462767481803894, - "learning_rate": 3.056586959750483e-06, - "loss": 0.0505, + "epoch": 8.981137680083172, + "grad_norm": 1.2287728786468506, + "learning_rate": 1.0188623199168277e-06, + "loss": 0.055, "step": 60470 }, { - "epoch": 4.491311451061934, - "grad_norm": 0.6967370510101318, - "learning_rate": 3.0521312936283975e-06, - "loss": 0.0968, + "epoch": 8.982622902123868, + "grad_norm": 0.44107210636138916, + "learning_rate": 1.0173770978761326e-06, + "loss": 0.0465, "step": 60480 }, { - "epoch": 4.492054062082281, - "grad_norm": 0.6573207378387451, - "learning_rate": 3.047675627506312e-06, - "loss": 0.045, + "epoch": 8.984108124164562, + "grad_norm": 0.5867712497711182, + "learning_rate": 1.0158918758354374e-06, + "loss": 0.0474, "step": 60490 }, { - "epoch": 4.492796673102629, - "grad_norm": 0.9481235146522522, - "learning_rate": 3.043219961384227e-06, - "loss": 0.0377, + "epoch": 8.985593346205258, + "grad_norm": 0.22786487638950348, + "learning_rate": 1.0144066537947423e-06, + "loss": 0.0479, "step": 60500 }, { - "epoch": 4.493539284122976, - "grad_norm": 0.8041633367538452, - "learning_rate": 3.0387642952621417e-06, - "loss": 0.0376, + "epoch": 8.987078568245952, + "grad_norm": 0.6541853547096252, + "learning_rate": 1.0129214317540474e-06, + "loss": 0.049, "step": 60510 }, { - "epoch": 4.494281895143324, - "grad_norm": 0.4451924264431, - "learning_rate": 3.0343086291400562e-06, - "loss": 0.0509, + "epoch": 8.988563790286648, + "grad_norm": 0.5077256560325623, + "learning_rate": 1.0114362097133522e-06, + "loss": 0.0345, "step": 60520 }, { - "epoch": 4.495024506163672, - "grad_norm": 1.242018222808838, - "learning_rate": 3.0298529630179712e-06, - "loss": 0.0612, + "epoch": 8.990049012327344, + "grad_norm": 0.905841588973999, + "learning_rate": 1.009950987672657e-06, + "loss": 0.0754, "step": 60530 }, { - "epoch": 4.495767117184019, - "grad_norm": 1.5927053689956665, - "learning_rate": 3.025397296895886e-06, - "loss": 0.0466, + "epoch": 8.991534234368038, + "grad_norm": 1.5534299612045288, + "learning_rate": 1.008465765631962e-06, + "loss": 0.0417, "step": 60540 }, { - "epoch": 4.496509728204367, - "grad_norm": 0.3729563355445862, - "learning_rate": 3.020941630773801e-06, - "loss": 0.0654, + "epoch": 8.993019456408733, + "grad_norm": 0.6061202883720398, + "learning_rate": 1.006980543591267e-06, + "loss": 0.0545, "step": 60550 }, { - "epoch": 4.497252339224714, - "grad_norm": 1.2885338068008423, - "learning_rate": 3.0164859646517158e-06, - "loss": 0.0394, + "epoch": 8.994504678449427, + "grad_norm": 0.6618340015411377, + "learning_rate": 1.0054953215505719e-06, + "loss": 0.0508, "step": 60560 }, { - "epoch": 4.497994950245062, - "grad_norm": 0.8227145075798035, - "learning_rate": 3.0120302985296304e-06, - "loss": 0.0379, + "epoch": 8.995989900490123, + "grad_norm": 0.9549134373664856, + "learning_rate": 1.004010099509877e-06, + "loss": 0.0739, "step": 60570 }, { - "epoch": 4.4987375612654095, - "grad_norm": 1.220255970954895, - "learning_rate": 3.007574632407545e-06, - "loss": 0.0491, + "epoch": 8.997475122530819, + "grad_norm": 0.991654634475708, + "learning_rate": 1.0025248774691818e-06, + "loss": 0.0385, "step": 60580 }, { - "epoch": 4.4994801722857565, - "grad_norm": 1.4378726482391357, - "learning_rate": 3.00311896628546e-06, - "loss": 0.0348, + "epoch": 8.998960344571513, + "grad_norm": 0.44757747650146484, + "learning_rate": 1.0010396554284866e-06, + "loss": 0.048, "step": 60590 }, { - "epoch": 4.500222783306104, - "grad_norm": 1.8809531927108765, - "learning_rate": 2.9986633001633745e-06, - "loss": 0.0556, + "epoch": 9.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.05483611300587654, + "eval_runtime": 203.9869, + "eval_samples_per_second": 186.38, + "eval_steps_per_second": 5.829, + "step": 60597 + }, + { + "epoch": 9.000445566612209, + "grad_norm": 0.9724392890930176, + "learning_rate": 9.995544333877915e-07, + "loss": 0.0761, "step": 60600 }, { - "epoch": 4.500965394326451, - "grad_norm": 0.39262035489082336, - "learning_rate": 2.994207634041289e-06, - "loss": 0.0387, + "epoch": 9.001930788652903, + "grad_norm": 0.6741613149642944, + "learning_rate": 9.980692113470966e-07, + "loss": 0.0625, "step": 60610 }, { - "epoch": 4.501708005346799, - "grad_norm": 1.774888038635254, - "learning_rate": 2.989751967919204e-06, - "loss": 0.0463, + "epoch": 9.003416010693599, + "grad_norm": 1.055646538734436, + "learning_rate": 9.965839893064014e-07, + "loss": 0.0598, "step": 60620 }, { - "epoch": 4.502450616367147, - "grad_norm": 1.312552809715271, - "learning_rate": 2.9852963017971186e-06, - "loss": 0.0963, + "epoch": 9.004901232734294, + "grad_norm": 0.49605226516723633, + "learning_rate": 9.950987672657063e-07, + "loss": 0.0472, "step": 60630 }, { - "epoch": 4.503193227387494, - "grad_norm": 1.1189274787902832, - "learning_rate": 2.980840635675033e-06, - "loss": 0.0553, + "epoch": 9.006386454774988, + "grad_norm": 0.8366076350212097, + "learning_rate": 9.936135452250111e-07, + "loss": 0.0445, "step": 60640 }, { - "epoch": 4.503935838407842, - "grad_norm": 1.8008004426956177, - "learning_rate": 2.976384969552948e-06, - "loss": 0.0713, + "epoch": 9.007871676815684, + "grad_norm": 0.8322066068649292, + "learning_rate": 9.92128323184316e-07, + "loss": 0.0496, "step": 60650 }, { - "epoch": 4.504678449428189, - "grad_norm": 0.9484963417053223, - "learning_rate": 2.9719293034308628e-06, - "loss": 0.0425, + "epoch": 9.00935689885638, + "grad_norm": 0.7049952149391174, + "learning_rate": 9.90643101143621e-07, + "loss": 0.0509, "step": 60660 }, { - "epoch": 4.505421060448537, - "grad_norm": 1.8093992471694946, - "learning_rate": 2.9674736373087778e-06, - "loss": 0.0491, + "epoch": 9.010842120897074, + "grad_norm": 0.583831250667572, + "learning_rate": 9.89157879102926e-07, + "loss": 0.0515, "step": 60670 }, { - "epoch": 4.506163671468885, - "grad_norm": 0.9192953705787659, - "learning_rate": 2.9630179711866927e-06, - "loss": 0.0439, + "epoch": 9.01232734293777, + "grad_norm": 1.2372843027114868, + "learning_rate": 9.87672657062231e-07, + "loss": 0.0589, "step": 60680 }, { - "epoch": 4.506906282489232, - "grad_norm": 1.6268279552459717, - "learning_rate": 2.9585623050646073e-06, - "loss": 0.0575, + "epoch": 9.013812564978464, + "grad_norm": 0.8074018955230713, + "learning_rate": 9.861874350215358e-07, + "loss": 0.0573, "step": 60690 }, { - "epoch": 4.50764889350958, - "grad_norm": 0.32879000902175903, - "learning_rate": 2.9541066389425223e-06, - "loss": 0.0425, + "epoch": 9.01529778701916, + "grad_norm": 0.665175199508667, + "learning_rate": 9.847022129808407e-07, + "loss": 0.0388, "step": 60700 }, { - "epoch": 4.508391504529927, - "grad_norm": 1.2514315843582153, - "learning_rate": 2.949650972820437e-06, - "loss": 0.0541, + "epoch": 9.016783009059855, + "grad_norm": 1.4056196212768555, + "learning_rate": 9.832169909401458e-07, + "loss": 0.0455, "step": 60710 }, { - "epoch": 4.509134115550275, - "grad_norm": 1.9278550148010254, - "learning_rate": 2.9451953066983515e-06, - "loss": 0.0818, + "epoch": 9.01826823110055, + "grad_norm": 0.739521324634552, + "learning_rate": 9.817317688994506e-07, + "loss": 0.0792, "step": 60720 }, { - "epoch": 4.5098767265706226, - "grad_norm": 0.9422726035118103, - "learning_rate": 2.9407396405762664e-06, - "loss": 0.0572, + "epoch": 9.019753453141245, + "grad_norm": 1.6123601198196411, + "learning_rate": 9.802465468587555e-07, + "loss": 0.0504, "step": 60730 }, { - "epoch": 4.51061933759097, - "grad_norm": 2.648932695388794, - "learning_rate": 2.936283974454181e-06, - "loss": 0.0671, + "epoch": 9.02123867518194, + "grad_norm": 0.39929506182670593, + "learning_rate": 9.787613248180603e-07, + "loss": 0.0435, "step": 60740 }, { - "epoch": 4.5113619486113175, - "grad_norm": 2.699385166168213, - "learning_rate": 2.9318283083320956e-06, - "loss": 0.0477, + "epoch": 9.022723897222635, + "grad_norm": 0.8194249272346497, + "learning_rate": 9.772761027773652e-07, + "loss": 0.0464, "step": 60750 }, { - "epoch": 4.5121045596316645, - "grad_norm": 0.6302013397216797, - "learning_rate": 2.9273726422100106e-06, - "loss": 0.056, + "epoch": 9.02420911926333, + "grad_norm": 0.744668185710907, + "learning_rate": 9.757908807366703e-07, + "loss": 0.0542, "step": 60760 }, { - "epoch": 4.512847170652012, - "grad_norm": 1.3976058959960938, - "learning_rate": 2.922916976087925e-06, - "loss": 0.0684, + "epoch": 9.025694341304025, + "grad_norm": 0.6779729723930359, + "learning_rate": 9.743056586959751e-07, + "loss": 0.0612, "step": 60770 }, { - "epoch": 4.51358978167236, - "grad_norm": 1.1172116994857788, - "learning_rate": 2.9184613099658397e-06, - "loss": 0.0705, + "epoch": 9.02717956334472, + "grad_norm": 1.2432478666305542, + "learning_rate": 9.7282043665528e-07, + "loss": 0.0477, "step": 60780 }, { - "epoch": 4.514332392692707, - "grad_norm": 0.6140624284744263, - "learning_rate": 2.9140056438437547e-06, - "loss": 0.0561, + "epoch": 9.028664785385415, + "grad_norm": 1.8497051000595093, + "learning_rate": 9.713352146145848e-07, + "loss": 0.0598, "step": 60790 }, { - "epoch": 4.515075003713055, - "grad_norm": 1.3316572904586792, - "learning_rate": 2.9095499777216693e-06, - "loss": 0.0861, + "epoch": 9.03015000742611, + "grad_norm": 1.5731254816055298, + "learning_rate": 9.6984999257389e-07, + "loss": 0.046, "step": 60800 }, { - "epoch": 4.515817614733402, - "grad_norm": 0.5338196754455566, - "learning_rate": 2.9050943115995843e-06, - "loss": 0.0516, + "epoch": 9.031635229466806, + "grad_norm": 0.7579479217529297, + "learning_rate": 9.683647705331948e-07, + "loss": 0.0416, "step": 60810 }, { - "epoch": 4.51656022575375, - "grad_norm": 1.8989540338516235, - "learning_rate": 2.9006386454774993e-06, - "loss": 0.0635, + "epoch": 9.0331204515075, + "grad_norm": 0.39340782165527344, + "learning_rate": 9.668795484924998e-07, + "loss": 0.0442, "step": 60820 }, { - "epoch": 4.517302836774098, - "grad_norm": 0.7707126140594482, - "learning_rate": 2.896182979355414e-06, - "loss": 0.076, + "epoch": 9.034605673548196, + "grad_norm": 0.7787233591079712, + "learning_rate": 9.653943264518047e-07, + "loss": 0.0546, "step": 60830 }, { - "epoch": 4.518045447794445, - "grad_norm": 3.540522575378418, - "learning_rate": 2.8917273132333284e-06, - "loss": 0.0565, + "epoch": 9.03609089558889, + "grad_norm": 1.0975919961929321, + "learning_rate": 9.639091044111095e-07, + "loss": 0.0407, "step": 60840 }, { - "epoch": 4.518788058814793, - "grad_norm": 1.879492998123169, - "learning_rate": 2.8872716471112434e-06, - "loss": 0.0722, + "epoch": 9.037576117629586, + "grad_norm": 0.8572696447372437, + "learning_rate": 9.624238823704144e-07, + "loss": 0.0609, "step": 60850 }, { - "epoch": 4.519530669835141, - "grad_norm": 2.0404903888702393, - "learning_rate": 2.882815980989158e-06, - "loss": 0.0665, + "epoch": 9.039061339670281, + "grad_norm": 0.8922178745269775, + "learning_rate": 9.609386603297195e-07, + "loss": 0.0568, "step": 60860 }, { - "epoch": 4.520273280855488, - "grad_norm": 0.7342921495437622, - "learning_rate": 2.8783603148670726e-06, - "loss": 0.0549, + "epoch": 9.040546561710975, + "grad_norm": 1.2011473178863525, + "learning_rate": 9.594534382890243e-07, + "loss": 0.0614, "step": 60870 }, { - "epoch": 4.521015891875836, - "grad_norm": 2.278888463973999, - "learning_rate": 2.8739046487449876e-06, - "loss": 0.0472, + "epoch": 9.042031783751671, + "grad_norm": 0.6790103316307068, + "learning_rate": 9.579682162483292e-07, + "loss": 0.0514, "step": 60880 }, { - "epoch": 4.521758502896183, - "grad_norm": 0.994851291179657, - "learning_rate": 2.869448982622902e-06, - "loss": 0.0613, + "epoch": 9.043517005792365, + "grad_norm": 0.8033337593078613, + "learning_rate": 9.56482994207634e-07, + "loss": 0.065, "step": 60890 }, { - "epoch": 4.5225011139165305, - "grad_norm": 1.3378137350082397, - "learning_rate": 2.8649933165008167e-06, - "loss": 0.0629, + "epoch": 9.045002227833061, + "grad_norm": 0.672807514667511, + "learning_rate": 9.549977721669391e-07, + "loss": 0.0435, "step": 60900 }, { - "epoch": 4.523243724936878, - "grad_norm": 1.5899670124053955, - "learning_rate": 2.8605376503787317e-06, - "loss": 0.0526, + "epoch": 9.046487449873757, + "grad_norm": 1.7294105291366577, + "learning_rate": 9.535125501262439e-07, + "loss": 0.0599, "step": 60910 }, { - "epoch": 4.523986335957225, - "grad_norm": 0.4787708520889282, - "learning_rate": 2.8560819842566463e-06, - "loss": 0.0414, + "epoch": 9.04797267191445, + "grad_norm": 0.6789121031761169, + "learning_rate": 9.520273280855488e-07, + "loss": 0.0472, "step": 60920 }, { - "epoch": 4.524728946977573, - "grad_norm": 1.1397485733032227, - "learning_rate": 2.851626318134561e-06, - "loss": 0.0578, + "epoch": 9.049457893955147, + "grad_norm": 1.1161093711853027, + "learning_rate": 9.505421060448537e-07, + "loss": 0.054, "step": 60930 }, { - "epoch": 4.52547155799792, - "grad_norm": 1.5434584617614746, - "learning_rate": 2.8471706520124762e-06, - "loss": 0.0783, + "epoch": 9.05094311599584, + "grad_norm": 0.7191179394721985, + "learning_rate": 9.490568840041587e-07, + "loss": 0.0675, "step": 60940 }, { - "epoch": 4.526214169018268, - "grad_norm": 0.4989255666732788, - "learning_rate": 2.842714985890391e-06, - "loss": 0.0374, + "epoch": 9.052428338036536, + "grad_norm": 1.8499068021774292, + "learning_rate": 9.475716619634637e-07, + "loss": 0.0633, "step": 60950 }, { - "epoch": 4.526956780038616, - "grad_norm": 1.2678778171539307, - "learning_rate": 2.8382593197683054e-06, - "loss": 0.0509, + "epoch": 9.053913560077232, + "grad_norm": 0.590984582901001, + "learning_rate": 9.460864399227686e-07, + "loss": 0.0538, "step": 60960 }, { - "epoch": 4.527699391058963, - "grad_norm": 1.7154262065887451, - "learning_rate": 2.8338036536462204e-06, - "loss": 0.0404, + "epoch": 9.055398782117926, + "grad_norm": 0.873106837272644, + "learning_rate": 9.446012178820735e-07, + "loss": 0.0653, "step": 60970 }, { - "epoch": 4.528442002079311, - "grad_norm": 0.7340508103370667, - "learning_rate": 2.829347987524135e-06, - "loss": 0.0711, + "epoch": 9.056884004158622, + "grad_norm": 1.1326630115509033, + "learning_rate": 9.431159958413784e-07, + "loss": 0.0481, "step": 60980 }, { - "epoch": 4.529184613099658, - "grad_norm": 1.5638376474380493, - "learning_rate": 2.8248923214020495e-06, - "loss": 0.0575, + "epoch": 9.058369226199316, + "grad_norm": 1.133381724357605, + "learning_rate": 9.416307738006832e-07, + "loss": 0.0438, "step": 60990 }, { - "epoch": 4.529927224120006, - "grad_norm": 1.4016845226287842, - "learning_rate": 2.8204366552799645e-06, - "loss": 0.0277, + "epoch": 9.059854448240012, + "grad_norm": 0.30731022357940674, + "learning_rate": 9.401455517599882e-07, + "loss": 0.0387, "step": 61000 }, { - "epoch": 4.530669835140354, - "grad_norm": 0.4884537160396576, - "learning_rate": 2.815980989157879e-06, - "loss": 0.0537, + "epoch": 9.061339670280708, + "grad_norm": 0.6071634888648987, + "learning_rate": 9.386603297192931e-07, + "loss": 0.0587, "step": 61010 }, { - "epoch": 4.531412446160701, - "grad_norm": 1.8895937204360962, - "learning_rate": 2.811525323035794e-06, - "loss": 0.08, + "epoch": 9.062824892321402, + "grad_norm": 0.34380868077278137, + "learning_rate": 9.37175107678598e-07, + "loss": 0.0471, "step": 61020 }, { - "epoch": 4.532155057181049, - "grad_norm": 0.5907909274101257, - "learning_rate": 2.8070696569137087e-06, - "loss": 0.0555, + "epoch": 9.064310114362097, + "grad_norm": 1.20027494430542, + "learning_rate": 9.356898856379029e-07, + "loss": 0.0346, "step": 61030 }, { - "epoch": 4.532897668201396, - "grad_norm": 3.3188436031341553, - "learning_rate": 2.8026139907916232e-06, - "loss": 0.0834, + "epoch": 9.065795336402791, + "grad_norm": 1.0148383378982544, + "learning_rate": 9.342046635972078e-07, + "loss": 0.056, "step": 61040 }, { - "epoch": 4.533640279221744, - "grad_norm": 1.3350552320480347, - "learning_rate": 2.7981583246695382e-06, - "loss": 0.0544, + "epoch": 9.067280558443487, + "grad_norm": 1.2376158237457275, + "learning_rate": 9.327194415565127e-07, + "loss": 0.0447, "step": 61050 }, { - "epoch": 4.5343828902420915, - "grad_norm": 1.2445580959320068, - "learning_rate": 2.793702658547453e-06, - "loss": 0.0694, + "epoch": 9.068765780484183, + "grad_norm": 0.8556867837905884, + "learning_rate": 9.312342195158177e-07, + "loss": 0.0609, "step": 61060 }, { - "epoch": 4.5351255012624385, - "grad_norm": 2.0901408195495605, - "learning_rate": 2.7892469924253674e-06, - "loss": 0.0721, + "epoch": 9.070251002524877, + "grad_norm": 1.4480454921722412, + "learning_rate": 9.297489974751225e-07, + "loss": 0.0663, "step": 61070 }, { - "epoch": 4.535868112282786, - "grad_norm": 1.3586374521255493, - "learning_rate": 2.7847913263032828e-06, - "loss": 0.0749, + "epoch": 9.071736224565573, + "grad_norm": 0.8202183246612549, + "learning_rate": 9.282637754344276e-07, + "loss": 0.0569, "step": 61080 }, { - "epoch": 4.536610723303134, - "grad_norm": 0.6412113904953003, - "learning_rate": 2.7803356601811973e-06, - "loss": 0.0538, + "epoch": 9.073221446606267, + "grad_norm": 1.4094607830047607, + "learning_rate": 9.267785533937324e-07, + "loss": 0.0552, "step": 61090 }, { - "epoch": 4.537353334323481, - "grad_norm": 0.7717999815940857, - "learning_rate": 2.775879994059112e-06, - "loss": 0.0578, + "epoch": 9.074706668646963, + "grad_norm": 0.8718863725662231, + "learning_rate": 9.252933313530374e-07, + "loss": 0.0544, "step": 61100 }, { - "epoch": 4.538095945343829, - "grad_norm": 0.7013200521469116, - "learning_rate": 2.771424327937027e-06, - "loss": 0.0765, + "epoch": 9.076191890687658, + "grad_norm": 0.912745475769043, + "learning_rate": 9.238081093123423e-07, + "loss": 0.056, "step": 61110 }, { - "epoch": 4.538838556364176, - "grad_norm": 1.3296678066253662, - "learning_rate": 2.7669686618149415e-06, - "loss": 0.0531, + "epoch": 9.077677112728352, + "grad_norm": 0.773205578327179, + "learning_rate": 9.223228872716472e-07, + "loss": 0.0547, "step": 61120 }, { - "epoch": 4.539581167384524, - "grad_norm": 1.7592175006866455, - "learning_rate": 2.762512995692856e-06, - "loss": 0.051, + "epoch": 9.079162334769048, + "grad_norm": 0.6849109530448914, + "learning_rate": 9.208376652309521e-07, + "loss": 0.0501, "step": 61130 }, { - "epoch": 4.540323778404872, - "grad_norm": 1.7775698900222778, - "learning_rate": 2.758057329570771e-06, - "loss": 0.05, + "epoch": 9.080647556809742, + "grad_norm": 1.008122444152832, + "learning_rate": 9.19352443190257e-07, + "loss": 0.0651, "step": 61140 }, { - "epoch": 4.541066389425219, - "grad_norm": 1.649628758430481, - "learning_rate": 2.7536016634486856e-06, - "loss": 0.0464, + "epoch": 9.082132778850438, + "grad_norm": 0.9452615976333618, + "learning_rate": 9.178672211495619e-07, + "loss": 0.0542, "step": 61150 }, { - "epoch": 4.541809000445567, - "grad_norm": 1.310793399810791, - "learning_rate": 2.7491459973266e-06, - "loss": 0.0686, + "epoch": 9.083618000891134, + "grad_norm": 0.38566067814826965, + "learning_rate": 9.163819991088669e-07, + "loss": 0.048, "step": 61160 }, { - "epoch": 4.542551611465914, - "grad_norm": 1.5081290006637573, - "learning_rate": 2.744690331204515e-06, - "loss": 0.0675, + "epoch": 9.085103222931828, + "grad_norm": 0.8904908895492554, + "learning_rate": 9.148967770681717e-07, + "loss": 0.049, "step": 61170 }, { - "epoch": 4.543294222486262, - "grad_norm": 1.5002572536468506, - "learning_rate": 2.7402346650824298e-06, - "loss": 0.0595, + "epoch": 9.086588444972524, + "grad_norm": 0.8872094750404358, + "learning_rate": 9.134115550274766e-07, + "loss": 0.0484, "step": 61180 }, { - "epoch": 4.54403683350661, - "grad_norm": 1.7009143829345703, - "learning_rate": 2.7357789989603443e-06, - "loss": 0.0824, + "epoch": 9.088073667013218, + "grad_norm": 1.5284932851791382, + "learning_rate": 9.119263329867815e-07, + "loss": 0.0275, "step": 61190 }, { - "epoch": 4.544779444526957, - "grad_norm": 2.5938093662261963, - "learning_rate": 2.7313233328382593e-06, - "loss": 0.052, + "epoch": 9.089558889053913, + "grad_norm": 1.5623444318771362, + "learning_rate": 9.104411109460864e-07, + "loss": 0.0565, "step": 61200 }, { - "epoch": 4.545522055547305, - "grad_norm": 1.4305498600006104, - "learning_rate": 2.7268676667161743e-06, - "loss": 0.0621, + "epoch": 9.09104411109461, + "grad_norm": 0.670907199382782, + "learning_rate": 9.089558889053915e-07, + "loss": 0.0548, "step": 61210 }, { - "epoch": 4.546264666567652, - "grad_norm": 1.7838150262832642, - "learning_rate": 2.722412000594089e-06, - "loss": 0.0711, + "epoch": 9.092529333135303, + "grad_norm": 1.4543319940567017, + "learning_rate": 9.074706668646964e-07, + "loss": 0.0524, "step": 61220 }, { - "epoch": 4.5470072775879995, - "grad_norm": 0.737729549407959, - "learning_rate": 2.717956334472004e-06, - "loss": 0.0374, + "epoch": 9.094014555175999, + "grad_norm": 0.6262223720550537, + "learning_rate": 9.059854448240013e-07, + "loss": 0.0519, "step": 61230 }, { - "epoch": 4.547749888608347, - "grad_norm": 1.021944522857666, - "learning_rate": 2.7135006683499184e-06, - "loss": 0.0631, + "epoch": 9.095499777216695, + "grad_norm": 0.23713155090808868, + "learning_rate": 9.045002227833063e-07, + "loss": 0.0397, "step": 61240 }, { - "epoch": 4.548492499628694, - "grad_norm": 0.5011008977890015, - "learning_rate": 2.709045002227833e-06, - "loss": 0.0448, + "epoch": 9.096984999257389, + "grad_norm": 0.33859917521476746, + "learning_rate": 9.030150007426111e-07, + "loss": 0.0588, "step": 61250 }, { - "epoch": 4.549235110649042, - "grad_norm": 3.05027437210083, - "learning_rate": 2.704589336105748e-06, - "loss": 0.0467, + "epoch": 9.098470221298085, + "grad_norm": 0.9685537815093994, + "learning_rate": 9.01529778701916e-07, + "loss": 0.0479, "step": 61260 }, { - "epoch": 4.549977721669389, - "grad_norm": 3.195746660232544, - "learning_rate": 2.7001336699836626e-06, - "loss": 0.0584, + "epoch": 9.099955443338779, + "grad_norm": 0.892279326915741, + "learning_rate": 9.000445566612209e-07, + "loss": 0.0421, "step": 61270 }, { - "epoch": 4.550720332689737, - "grad_norm": 1.1807307004928589, - "learning_rate": 2.695678003861577e-06, - "loss": 0.0474, + "epoch": 9.101440665379474, + "grad_norm": 0.8080083727836609, + "learning_rate": 8.985593346205258e-07, + "loss": 0.0632, "step": 61280 }, { - "epoch": 4.551462943710085, - "grad_norm": 2.7736690044403076, - "learning_rate": 2.691222337739492e-06, - "loss": 0.0686, + "epoch": 9.10292588742017, + "grad_norm": 0.9495354294776917, + "learning_rate": 8.970741125798308e-07, + "loss": 0.0561, "step": 61290 }, { - "epoch": 4.552205554730432, - "grad_norm": 1.5270766019821167, - "learning_rate": 2.6867666716174067e-06, - "loss": 0.0425, + "epoch": 9.104411109460864, + "grad_norm": 0.7430086135864258, + "learning_rate": 8.955888905391356e-07, + "loss": 0.0803, "step": 61300 }, { - "epoch": 4.55294816575078, - "grad_norm": 1.2185275554656982, - "learning_rate": 2.6823110054953213e-06, - "loss": 0.0443, + "epoch": 9.10589633150156, + "grad_norm": 1.0035367012023926, + "learning_rate": 8.941036684984406e-07, + "loss": 0.0485, "step": 61310 }, { - "epoch": 4.553690776771127, - "grad_norm": 1.2609001398086548, - "learning_rate": 2.6778553393732363e-06, - "loss": 0.0793, + "epoch": 9.107381553542254, + "grad_norm": 1.3069180250167847, + "learning_rate": 8.926184464577454e-07, + "loss": 0.0487, "step": 61320 }, { - "epoch": 4.554433387791475, - "grad_norm": 2.8986945152282715, - "learning_rate": 2.673399673251151e-06, - "loss": 0.0704, + "epoch": 9.10886677558295, + "grad_norm": 0.8913138508796692, + "learning_rate": 8.911332244170504e-07, + "loss": 0.0553, "step": 61330 }, { - "epoch": 4.555175998811823, - "grad_norm": 1.5010956525802612, - "learning_rate": 2.668944007129066e-06, - "loss": 0.0629, + "epoch": 9.110351997623646, + "grad_norm": 0.29481425881385803, + "learning_rate": 8.896480023763554e-07, + "loss": 0.0586, "step": 61340 }, { - "epoch": 4.55591860983217, - "grad_norm": 0.4704782962799072, - "learning_rate": 2.664488341006981e-06, - "loss": 0.0613, + "epoch": 9.11183721966434, + "grad_norm": 1.036330223083496, + "learning_rate": 8.881627803356603e-07, + "loss": 0.0396, "step": 61350 }, { - "epoch": 4.556661220852518, - "grad_norm": 0.9099952578544617, - "learning_rate": 2.6600326748848954e-06, - "loss": 0.0481, + "epoch": 9.113322441705035, + "grad_norm": 0.602928638458252, + "learning_rate": 8.866775582949652e-07, + "loss": 0.0696, "step": 61360 }, { - "epoch": 4.557403831872865, - "grad_norm": 3.606095790863037, - "learning_rate": 2.6555770087628104e-06, - "loss": 0.0792, + "epoch": 9.11480766374573, + "grad_norm": 1.1184871196746826, + "learning_rate": 8.851923362542701e-07, + "loss": 0.0451, "step": 61370 }, { - "epoch": 4.558146442893213, - "grad_norm": 1.9132657051086426, - "learning_rate": 2.651121342640725e-06, - "loss": 0.0379, + "epoch": 9.116292885786425, + "grad_norm": 1.7323182821273804, + "learning_rate": 8.83707114213575e-07, + "loss": 0.0739, "step": 61380 }, { - "epoch": 4.5588890539135605, - "grad_norm": 1.5124993324279785, - "learning_rate": 2.6466656765186396e-06, - "loss": 0.0654, + "epoch": 9.117778107827121, + "grad_norm": 1.0334404706954956, + "learning_rate": 8.8222189217288e-07, + "loss": 0.0581, "step": 61390 }, { - "epoch": 4.5596316649339075, - "grad_norm": 0.876136839389801, - "learning_rate": 2.6422100103965545e-06, - "loss": 0.0769, + "epoch": 9.119263329867815, + "grad_norm": 0.5090399384498596, + "learning_rate": 8.807366701321848e-07, + "loss": 0.055, "step": 61400 }, { - "epoch": 4.560374275954255, - "grad_norm": 2.7477357387542725, - "learning_rate": 2.637754344274469e-06, - "loss": 0.0912, + "epoch": 9.12074855190851, + "grad_norm": 1.2942936420440674, + "learning_rate": 8.792514480914898e-07, + "loss": 0.0644, "step": 61410 }, { - "epoch": 4.561116886974602, - "grad_norm": 0.6993147730827332, - "learning_rate": 2.6332986781523837e-06, - "loss": 0.0432, + "epoch": 9.122233773949205, + "grad_norm": 1.3819553852081299, + "learning_rate": 8.777662260507946e-07, + "loss": 0.0666, "step": 61420 }, { - "epoch": 4.56185949799495, - "grad_norm": 2.7787704467773438, - "learning_rate": 2.6288430120302987e-06, - "loss": 0.0649, + "epoch": 9.1237189959899, + "grad_norm": 0.6027370691299438, + "learning_rate": 8.762810040100996e-07, + "loss": 0.0439, "step": 61430 }, { - "epoch": 4.562602109015298, - "grad_norm": 1.603108286857605, - "learning_rate": 2.6243873459082133e-06, - "loss": 0.0468, + "epoch": 9.125204218030596, + "grad_norm": 1.0936373472213745, + "learning_rate": 8.747957819694045e-07, + "loss": 0.0503, "step": 61440 }, { - "epoch": 4.563344720035645, - "grad_norm": 2.7773287296295166, - "learning_rate": 2.619931679786128e-06, - "loss": 0.0923, + "epoch": 9.12668944007129, + "grad_norm": 0.3803895115852356, + "learning_rate": 8.733105599287093e-07, + "loss": 0.0654, "step": 61450 }, { - "epoch": 4.564087331055993, - "grad_norm": 0.5554677844047546, - "learning_rate": 2.615476013664043e-06, - "loss": 0.1035, + "epoch": 9.128174662111986, + "grad_norm": 0.4349137544631958, + "learning_rate": 8.718253378880143e-07, + "loss": 0.0542, "step": 61460 }, { - "epoch": 4.56482994207634, - "grad_norm": 0.5234212279319763, - "learning_rate": 2.611020347541958e-06, - "loss": 0.0597, + "epoch": 9.12965988415268, + "grad_norm": 0.6400302648544312, + "learning_rate": 8.703401158473193e-07, + "loss": 0.0622, "step": 61470 }, { - "epoch": 4.565572553096688, - "grad_norm": 0.9597153067588806, - "learning_rate": 2.6065646814198724e-06, - "loss": 0.0679, + "epoch": 9.131145106193376, + "grad_norm": 1.309779405593872, + "learning_rate": 8.688548938066242e-07, + "loss": 0.0314, "step": 61480 }, { - "epoch": 4.566315164117036, - "grad_norm": 3.1372969150543213, - "learning_rate": 2.6021090152977874e-06, - "loss": 0.0755, + "epoch": 9.132630328234072, + "grad_norm": 1.1691462993621826, + "learning_rate": 8.673696717659292e-07, + "loss": 0.048, "step": 61490 }, { - "epoch": 4.567057775137383, - "grad_norm": 1.275407314300537, - "learning_rate": 2.597653349175702e-06, - "loss": 0.0494, + "epoch": 9.134115550274766, + "grad_norm": 0.7045255899429321, + "learning_rate": 8.65884449725234e-07, + "loss": 0.0476, "step": 61500 }, { - "epoch": 4.567800386157731, - "grad_norm": 1.3001893758773804, - "learning_rate": 2.5931976830536165e-06, - "loss": 0.0581, + "epoch": 9.135600772315462, + "grad_norm": 1.3898378610610962, + "learning_rate": 8.64399227684539e-07, + "loss": 0.0486, "step": 61510 }, { - "epoch": 4.568542997178078, - "grad_norm": 1.2099862098693848, - "learning_rate": 2.5887420169315315e-06, - "loss": 0.1197, + "epoch": 9.137085994356156, + "grad_norm": 0.2608380913734436, + "learning_rate": 8.629140056438438e-07, + "loss": 0.0476, "step": 61520 }, { - "epoch": 4.569285608198426, - "grad_norm": 1.1752756834030151, - "learning_rate": 2.584286350809446e-06, - "loss": 0.055, + "epoch": 9.138571216396851, + "grad_norm": 0.6374778747558594, + "learning_rate": 8.614287836031487e-07, + "loss": 0.0691, "step": 61530 }, { - "epoch": 4.570028219218774, - "grad_norm": 1.800934076309204, - "learning_rate": 2.5798306846873607e-06, - "loss": 0.0495, + "epoch": 9.140056438437547, + "grad_norm": 1.2475641965866089, + "learning_rate": 8.599435615624537e-07, + "loss": 0.0589, "step": 61540 }, { - "epoch": 4.570770830239121, - "grad_norm": 1.4622337818145752, - "learning_rate": 2.5753750185652756e-06, - "loss": 0.0713, + "epoch": 9.141541660478241, + "grad_norm": 0.8836926221847534, + "learning_rate": 8.584583395217585e-07, + "loss": 0.0601, "step": 61550 }, { - "epoch": 4.5715134412594685, - "grad_norm": 3.0324299335479736, - "learning_rate": 2.5709193524431902e-06, - "loss": 0.0626, + "epoch": 9.143026882518937, + "grad_norm": 0.34879186749458313, + "learning_rate": 8.569731174810635e-07, + "loss": 0.0639, "step": 61560 }, { - "epoch": 4.5722560522798155, - "grad_norm": 3.204300880432129, - "learning_rate": 2.5664636863211048e-06, - "loss": 0.05, + "epoch": 9.144512104559631, + "grad_norm": 0.4875172972679138, + "learning_rate": 8.554878954403683e-07, + "loss": 0.072, "step": 61570 }, { - "epoch": 4.572998663300163, - "grad_norm": 0.8802090883255005, - "learning_rate": 2.5620080201990198e-06, - "loss": 0.0416, + "epoch": 9.145997326600327, + "grad_norm": 0.8644539713859558, + "learning_rate": 8.540026733996733e-07, + "loss": 0.0426, "step": 61580 }, { - "epoch": 4.573741274320511, - "grad_norm": 1.4025449752807617, - "learning_rate": 2.5575523540769344e-06, - "loss": 0.0891, + "epoch": 9.147482548641023, + "grad_norm": 1.0071200132369995, + "learning_rate": 8.525174513589782e-07, + "loss": 0.0314, "step": 61590 }, { - "epoch": 4.574483885340858, - "grad_norm": 2.5383615493774414, - "learning_rate": 2.5530966879548493e-06, - "loss": 0.0421, + "epoch": 9.148967770681717, + "grad_norm": 0.5411166548728943, + "learning_rate": 8.510322293182832e-07, + "loss": 0.0786, "step": 61600 }, { - "epoch": 4.575226496361206, - "grad_norm": 0.5844364166259766, - "learning_rate": 2.5486410218327643e-06, - "loss": 0.0615, + "epoch": 9.150452992722412, + "grad_norm": 0.6687445044517517, + "learning_rate": 8.495470072775881e-07, + "loss": 0.0649, "step": 61610 }, { - "epoch": 4.575969107381553, - "grad_norm": 1.9807744026184082, - "learning_rate": 2.544185355710679e-06, - "loss": 0.0521, + "epoch": 9.151938214763106, + "grad_norm": 0.7308300733566284, + "learning_rate": 8.48061785236893e-07, + "loss": 0.0378, "step": 61620 }, { - "epoch": 4.576711718401901, - "grad_norm": 2.7843782901763916, - "learning_rate": 2.5397296895885935e-06, - "loss": 0.0369, + "epoch": 9.153423436803802, + "grad_norm": 1.4409397840499878, + "learning_rate": 8.465765631961979e-07, + "loss": 0.0515, "step": 61630 }, { - "epoch": 4.577454329422249, - "grad_norm": 2.3830394744873047, - "learning_rate": 2.5352740234665085e-06, - "loss": 0.031, + "epoch": 9.154908658844498, + "grad_norm": 0.8028169274330139, + "learning_rate": 8.450913411555029e-07, + "loss": 0.0584, "step": 61640 }, { - "epoch": 4.578196940442596, - "grad_norm": 3.3283472061157227, - "learning_rate": 2.530818357344423e-06, - "loss": 0.0679, + "epoch": 9.156393880885192, + "grad_norm": 0.8186599612236023, + "learning_rate": 8.436061191148077e-07, + "loss": 0.0764, "step": 61650 }, { - "epoch": 4.578939551462944, - "grad_norm": 0.3360592722892761, - "learning_rate": 2.5263626912223376e-06, - "loss": 0.0799, + "epoch": 9.157879102925888, + "grad_norm": 0.739186704158783, + "learning_rate": 8.421208970741127e-07, + "loss": 0.0477, "step": 61660 }, { - "epoch": 4.579682162483291, - "grad_norm": 1.3740484714508057, - "learning_rate": 2.5219070251002526e-06, - "loss": 0.0593, + "epoch": 9.159364324966582, + "grad_norm": 0.792286217212677, + "learning_rate": 8.406356750334175e-07, + "loss": 0.0697, "step": 61670 }, { - "epoch": 4.580424773503639, - "grad_norm": 1.121627926826477, - "learning_rate": 2.517451358978167e-06, - "loss": 0.0485, + "epoch": 9.160849547007277, + "grad_norm": 1.066676378250122, + "learning_rate": 8.391504529927225e-07, + "loss": 0.063, "step": 61680 }, { - "epoch": 4.581167384523987, - "grad_norm": 1.13548743724823, - "learning_rate": 2.512995692856082e-06, - "loss": 0.0602, + "epoch": 9.162334769047973, + "grad_norm": 1.1812419891357422, + "learning_rate": 8.376652309520274e-07, + "loss": 0.0553, "step": 61690 }, { - "epoch": 4.581909995544334, - "grad_norm": 1.0693154335021973, - "learning_rate": 2.5085400267339967e-06, - "loss": 0.0607, + "epoch": 9.163819991088667, + "grad_norm": 1.9885129928588867, + "learning_rate": 8.361800089113323e-07, + "loss": 0.0578, "step": 61700 }, { - "epoch": 4.5826526065646815, - "grad_norm": 0.1895827353000641, - "learning_rate": 2.5040843606119113e-06, - "loss": 0.052, + "epoch": 9.165305213129363, + "grad_norm": 1.2007282972335815, + "learning_rate": 8.346947868706372e-07, + "loss": 0.0638, "step": 61710 }, { - "epoch": 4.5833952175850285, - "grad_norm": 1.7912211418151855, - "learning_rate": 2.4996286944898263e-06, - "loss": 0.0385, + "epoch": 9.166790435170057, + "grad_norm": 1.5052951574325562, + "learning_rate": 8.33209564829942e-07, + "loss": 0.0566, "step": 61720 }, { - "epoch": 4.584137828605376, - "grad_norm": 1.460799217224121, - "learning_rate": 2.495173028367741e-06, - "loss": 0.0519, + "epoch": 9.168275657210753, + "grad_norm": 0.37552034854888916, + "learning_rate": 8.31724342789247e-07, + "loss": 0.0549, "step": 61730 }, { - "epoch": 4.584880439625724, - "grad_norm": 0.9446871280670166, - "learning_rate": 2.490717362245656e-06, - "loss": 0.0334, + "epoch": 9.169760879251449, + "grad_norm": 0.8721835017204285, + "learning_rate": 8.302391207485521e-07, + "loss": 0.05, "step": 61740 }, { - "epoch": 4.585623050646071, - "grad_norm": 2.315859317779541, - "learning_rate": 2.486261696123571e-06, - "loss": 0.0522, + "epoch": 9.171246101292143, + "grad_norm": 1.0215429067611694, + "learning_rate": 8.287538987078569e-07, + "loss": 0.0519, "step": 61750 }, { - "epoch": 4.586365661666419, - "grad_norm": 1.3986417055130005, - "learning_rate": 2.4818060300014854e-06, - "loss": 0.0468, + "epoch": 9.172731323332838, + "grad_norm": 1.0494624376296997, + "learning_rate": 8.272686766671619e-07, + "loss": 0.0458, "step": 61760 }, { - "epoch": 4.587108272686766, - "grad_norm": 0.41333481669425964, - "learning_rate": 2.4773503638794e-06, - "loss": 0.0342, + "epoch": 9.174216545373532, + "grad_norm": 1.1894280910491943, + "learning_rate": 8.257834546264667e-07, + "loss": 0.0545, "step": 61770 }, { - "epoch": 4.587850883707114, - "grad_norm": 1.8333367109298706, - "learning_rate": 2.472894697757315e-06, - "loss": 0.085, + "epoch": 9.175701767414228, + "grad_norm": 1.1743789911270142, + "learning_rate": 8.242982325857717e-07, + "loss": 0.0541, "step": 61780 }, { - "epoch": 4.588593494727462, - "grad_norm": 2.2030510902404785, - "learning_rate": 2.4684390316352296e-06, - "loss": 0.0629, + "epoch": 9.177186989454924, + "grad_norm": 1.3520103693008423, + "learning_rate": 8.228130105450766e-07, + "loss": 0.0678, "step": 61790 }, { - "epoch": 4.589336105747809, - "grad_norm": 1.6054326295852661, - "learning_rate": 2.463983365513144e-06, - "loss": 0.0628, + "epoch": 9.178672211495618, + "grad_norm": 1.1151225566864014, + "learning_rate": 8.213277885043814e-07, + "loss": 0.0586, "step": 61800 }, { - "epoch": 4.590078716768157, - "grad_norm": 0.6966524720191956, - "learning_rate": 2.459527699391059e-06, - "loss": 0.0585, + "epoch": 9.180157433536314, + "grad_norm": 0.4770192503929138, + "learning_rate": 8.198425664636864e-07, + "loss": 0.0608, "step": 61810 }, { - "epoch": 4.590821327788504, - "grad_norm": 0.6496719717979431, - "learning_rate": 2.4550720332689737e-06, - "loss": 0.0488, + "epoch": 9.181642655577008, + "grad_norm": 0.485674649477005, + "learning_rate": 8.183573444229912e-07, + "loss": 0.0582, "step": 61820 }, { - "epoch": 4.591563938808852, - "grad_norm": 1.0793461799621582, - "learning_rate": 2.4506163671468883e-06, - "loss": 0.0609, + "epoch": 9.183127877617704, + "grad_norm": 0.518302321434021, + "learning_rate": 8.168721223822962e-07, + "loss": 0.0581, "step": 61830 }, { - "epoch": 4.5923065498292, - "grad_norm": 2.4242899417877197, - "learning_rate": 2.4461607010248033e-06, - "loss": 0.0906, + "epoch": 9.1846130996584, + "grad_norm": 1.0120786428451538, + "learning_rate": 8.153869003416011e-07, + "loss": 0.0708, "step": 61840 }, { - "epoch": 4.593049160849547, - "grad_norm": 1.3996238708496094, - "learning_rate": 2.441705034902718e-06, - "loss": 0.07, + "epoch": 9.186098321699093, + "grad_norm": 1.759045124053955, + "learning_rate": 8.13901678300906e-07, + "loss": 0.0487, "step": 61850 }, { - "epoch": 4.593791771869895, - "grad_norm": 1.6079001426696777, - "learning_rate": 2.4372493687806324e-06, - "loss": 0.0418, + "epoch": 9.18758354373979, + "grad_norm": 0.931167721748352, + "learning_rate": 8.124164562602109e-07, + "loss": 0.0492, "step": 61860 }, { - "epoch": 4.594534382890242, - "grad_norm": 0.46042919158935547, - "learning_rate": 2.432793702658548e-06, - "loss": 0.0495, + "epoch": 9.189068765780485, + "grad_norm": 0.9127204418182373, + "learning_rate": 8.109312342195159e-07, + "loss": 0.074, "step": 61870 }, { - "epoch": 4.5952769939105895, - "grad_norm": 0.7299936413764954, - "learning_rate": 2.4283380365364624e-06, - "loss": 0.0731, + "epoch": 9.190553987821179, + "grad_norm": 0.6282828450202942, + "learning_rate": 8.094460121788208e-07, + "loss": 0.0523, "step": 61880 }, { - "epoch": 4.596019604930937, - "grad_norm": 0.81382817029953, - "learning_rate": 2.423882370414377e-06, - "loss": 0.0321, + "epoch": 9.192039209861875, + "grad_norm": 1.20229172706604, + "learning_rate": 8.079607901381258e-07, + "loss": 0.0407, "step": 61890 }, { - "epoch": 4.596762215951284, - "grad_norm": 1.3235479593276978, - "learning_rate": 2.419426704292292e-06, - "loss": 0.0521, + "epoch": 9.193524431902569, + "grad_norm": 0.8881356120109558, + "learning_rate": 8.064755680974306e-07, + "loss": 0.0566, "step": 61900 }, { - "epoch": 4.597504826971632, - "grad_norm": 2.247610330581665, - "learning_rate": 2.4149710381702065e-06, - "loss": 0.0735, + "epoch": 9.195009653943265, + "grad_norm": 0.7234607338905334, + "learning_rate": 8.049903460567356e-07, + "loss": 0.0581, "step": 61910 }, { - "epoch": 4.598247437991979, - "grad_norm": 0.9639174938201904, - "learning_rate": 2.410515372048121e-06, - "loss": 0.1068, + "epoch": 9.19649487598396, + "grad_norm": 0.9576908349990845, + "learning_rate": 8.035051240160404e-07, + "loss": 0.0472, "step": 61920 }, { - "epoch": 4.598990049012327, - "grad_norm": 2.1627914905548096, - "learning_rate": 2.406059705926036e-06, - "loss": 0.0592, + "epoch": 9.197980098024654, + "grad_norm": 1.0542101860046387, + "learning_rate": 8.020199019753454e-07, + "loss": 0.0567, "step": 61930 }, { - "epoch": 4.599732660032675, - "grad_norm": 0.5631018877029419, - "learning_rate": 2.4016040398039507e-06, - "loss": 0.0608, + "epoch": 9.19946532006535, + "grad_norm": 0.6845017075538635, + "learning_rate": 8.005346799346503e-07, + "loss": 0.0388, "step": 61940 }, { - "epoch": 4.600475271053022, - "grad_norm": 1.5326188802719116, - "learning_rate": 2.3971483736818653e-06, - "loss": 0.0571, + "epoch": 9.200950542106044, + "grad_norm": 0.4817188084125519, + "learning_rate": 7.990494578939552e-07, + "loss": 0.0592, "step": 61950 }, { - "epoch": 4.60121788207337, - "grad_norm": 1.1006548404693604, - "learning_rate": 2.3926927075597802e-06, - "loss": 0.0602, + "epoch": 9.20243576414674, + "grad_norm": 0.7306998372077942, + "learning_rate": 7.975642358532601e-07, + "loss": 0.0583, "step": 61960 }, { - "epoch": 4.601960493093717, - "grad_norm": 2.1133296489715576, - "learning_rate": 2.388237041437695e-06, - "loss": 0.0895, + "epoch": 9.203920986187436, + "grad_norm": 1.0176167488098145, + "learning_rate": 7.96079013812565e-07, + "loss": 0.0576, "step": 61970 }, { - "epoch": 4.602703104114065, - "grad_norm": 0.42553678154945374, - "learning_rate": 2.3837813753156094e-06, - "loss": 0.049, + "epoch": 9.20540620822813, + "grad_norm": 0.6478152275085449, + "learning_rate": 7.945937917718699e-07, + "loss": 0.06, "step": 61980 }, { - "epoch": 4.603445715134413, - "grad_norm": 1.3893156051635742, - "learning_rate": 2.3793257091935244e-06, - "loss": 0.0459, + "epoch": 9.206891430268826, + "grad_norm": 0.7212924957275391, + "learning_rate": 7.931085697311748e-07, + "loss": 0.0445, "step": 61990 }, { - "epoch": 4.60418832615476, - "grad_norm": 0.9664128422737122, - "learning_rate": 2.3748700430714394e-06, - "loss": 0.0587, + "epoch": 9.20837665230952, + "grad_norm": 0.6695299744606018, + "learning_rate": 7.916233476904798e-07, + "loss": 0.0498, "step": 62000 }, { - "epoch": 4.604930937175108, - "grad_norm": 0.9554176330566406, - "learning_rate": 2.370414376949354e-06, - "loss": 0.0503, + "epoch": 9.209861874350215, + "grad_norm": 0.9033427834510803, + "learning_rate": 7.901381256497848e-07, + "loss": 0.06, "step": 62010 }, { - "epoch": 4.605673548195456, - "grad_norm": 1.3135801553726196, - "learning_rate": 2.365958710827269e-06, - "loss": 0.07, + "epoch": 9.211347096390911, + "grad_norm": 1.4528840780258179, + "learning_rate": 7.886529036090896e-07, + "loss": 0.0541, "step": 62020 }, { - "epoch": 4.606416159215803, - "grad_norm": 4.117845058441162, - "learning_rate": 2.3615030447051835e-06, - "loss": 0.0766, + "epoch": 9.212832318431605, + "grad_norm": 1.2177852392196655, + "learning_rate": 7.871676815683946e-07, + "loss": 0.054, "step": 62030 }, { - "epoch": 4.6071587702361505, - "grad_norm": 1.4412838220596313, - "learning_rate": 2.3570473785830985e-06, - "loss": 0.0376, + "epoch": 9.214317540472301, + "grad_norm": 0.617085337638855, + "learning_rate": 7.856824595276995e-07, + "loss": 0.0569, "step": 62040 }, { - "epoch": 4.6079013812564975, - "grad_norm": 2.425473690032959, - "learning_rate": 2.352591712461013e-06, - "loss": 0.0399, + "epoch": 9.215802762512995, + "grad_norm": 1.0635652542114258, + "learning_rate": 7.841972374870044e-07, + "loss": 0.0553, "step": 62050 }, { - "epoch": 4.608643992276845, - "grad_norm": 0.8025254011154175, - "learning_rate": 2.3481360463389276e-06, - "loss": 0.0716, + "epoch": 9.21728798455369, + "grad_norm": 0.704115629196167, + "learning_rate": 7.827120154463093e-07, + "loss": 0.0596, "step": 62060 }, { - "epoch": 4.609386603297193, - "grad_norm": 1.2991340160369873, - "learning_rate": 2.3436803802168426e-06, - "loss": 0.0495, + "epoch": 9.218773206594387, + "grad_norm": 0.673342764377594, + "learning_rate": 7.812267934056141e-07, + "loss": 0.0514, "step": 62070 }, { - "epoch": 4.61012921431754, - "grad_norm": 0.7377578020095825, - "learning_rate": 2.339224714094757e-06, - "loss": 0.0436, + "epoch": 9.22025842863508, + "grad_norm": 0.7802639007568359, + "learning_rate": 7.797415713649191e-07, + "loss": 0.0634, "step": 62080 }, { - "epoch": 4.610871825337888, - "grad_norm": 0.37221047282218933, - "learning_rate": 2.3347690479726718e-06, - "loss": 0.0418, + "epoch": 9.221743650675776, + "grad_norm": 1.1733187437057495, + "learning_rate": 7.78256349324224e-07, + "loss": 0.0624, "step": 62090 }, { - "epoch": 4.611614436358235, - "grad_norm": 1.457234501838684, - "learning_rate": 2.3303133818505868e-06, - "loss": 0.0741, + "epoch": 9.22322887271647, + "grad_norm": 0.6247046589851379, + "learning_rate": 7.767711272835289e-07, + "loss": 0.0596, "step": 62100 }, { - "epoch": 4.612357047378583, - "grad_norm": 1.0528523921966553, - "learning_rate": 2.3258577157285013e-06, - "loss": 0.0518, + "epoch": 9.224714094757166, + "grad_norm": 0.615653395652771, + "learning_rate": 7.752859052428338e-07, + "loss": 0.0402, "step": 62110 }, { - "epoch": 4.613099658398931, - "grad_norm": 1.0995347499847412, - "learning_rate": 2.321402049606416e-06, - "loss": 0.0827, + "epoch": 9.226199316797862, + "grad_norm": 0.35232970118522644, + "learning_rate": 7.738006832021387e-07, + "loss": 0.0593, "step": 62120 }, { - "epoch": 4.613842269419278, - "grad_norm": 2.883272409439087, - "learning_rate": 2.3169463834843313e-06, - "loss": 0.0476, + "epoch": 9.227684538838556, + "grad_norm": 2.261904716491699, + "learning_rate": 7.723154611614438e-07, + "loss": 0.0732, "step": 62130 }, { - "epoch": 4.614584880439626, - "grad_norm": 1.5344513654708862, - "learning_rate": 2.312490717362246e-06, - "loss": 0.0426, + "epoch": 9.229169760879252, + "grad_norm": 1.1600795984268188, + "learning_rate": 7.708302391207487e-07, + "loss": 0.0482, "step": 62140 }, { - "epoch": 4.615327491459973, - "grad_norm": 2.136598587036133, - "learning_rate": 2.3080350512401605e-06, - "loss": 0.0948, + "epoch": 9.230654982919946, + "grad_norm": 0.5957081317901611, + "learning_rate": 7.693450170800535e-07, + "loss": 0.0717, "step": 62150 }, { - "epoch": 4.616070102480321, - "grad_norm": 1.1575771570205688, - "learning_rate": 2.3035793851180755e-06, - "loss": 0.0708, + "epoch": 9.232140204960642, + "grad_norm": 0.5073825120925903, + "learning_rate": 7.678597950393585e-07, + "loss": 0.0406, "step": 62160 }, { - "epoch": 4.616812713500669, - "grad_norm": 0.935723066329956, - "learning_rate": 2.29912371899599e-06, - "loss": 0.0624, + "epoch": 9.233625427001337, + "grad_norm": 0.498580664396286, + "learning_rate": 7.663745729986633e-07, + "loss": 0.0525, "step": 62170 }, { - "epoch": 4.617555324521016, - "grad_norm": 2.6540639400482178, - "learning_rate": 2.2946680528739046e-06, - "loss": 0.0716, + "epoch": 9.235110649042031, + "grad_norm": 0.4409525990486145, + "learning_rate": 7.648893509579683e-07, + "loss": 0.0569, "step": 62180 }, { - "epoch": 4.618297935541364, - "grad_norm": 0.7408058047294617, - "learning_rate": 2.2902123867518196e-06, - "loss": 0.0612, + "epoch": 9.236595871082727, + "grad_norm": 0.8985182642936707, + "learning_rate": 7.634041289172732e-07, + "loss": 0.0497, "step": 62190 }, { - "epoch": 4.619040546561711, - "grad_norm": 1.5161961317062378, - "learning_rate": 2.285756720629734e-06, - "loss": 0.0565, + "epoch": 9.238081093123421, + "grad_norm": 0.9006769061088562, + "learning_rate": 7.619189068765781e-07, + "loss": 0.0554, "step": 62200 }, { - "epoch": 4.6197831575820585, - "grad_norm": 1.0326826572418213, - "learning_rate": 2.2813010545076487e-06, - "loss": 0.0633, + "epoch": 9.239566315164117, + "grad_norm": 0.323944091796875, + "learning_rate": 7.60433684835883e-07, + "loss": 0.068, "step": 62210 }, { - "epoch": 4.620525768602406, - "grad_norm": 1.720488429069519, - "learning_rate": 2.2768453883855637e-06, - "loss": 0.0651, + "epoch": 9.241051537204813, + "grad_norm": 0.8015594482421875, + "learning_rate": 7.58948462795188e-07, + "loss": 0.0391, "step": 62220 }, { - "epoch": 4.621268379622753, - "grad_norm": 0.7917996048927307, - "learning_rate": 2.2723897222634783e-06, - "loss": 0.0629, + "epoch": 9.242536759245507, + "grad_norm": 1.1999105215072632, + "learning_rate": 7.574632407544928e-07, + "loss": 0.0589, "step": 62230 }, { - "epoch": 4.622010990643101, - "grad_norm": 1.6765140295028687, - "learning_rate": 2.267934056141393e-06, - "loss": 0.0662, + "epoch": 9.244021981286203, + "grad_norm": 1.5768723487854004, + "learning_rate": 7.559780187137978e-07, + "loss": 0.0721, "step": 62240 }, { - "epoch": 4.622753601663449, - "grad_norm": 0.8106739521026611, - "learning_rate": 2.263478390019308e-06, - "loss": 0.0489, + "epoch": 9.245507203326897, + "grad_norm": 1.066964030265808, + "learning_rate": 7.544927966731026e-07, + "loss": 0.0537, "step": 62250 }, { - "epoch": 4.623496212683796, - "grad_norm": 0.6915829181671143, - "learning_rate": 2.259022723897223e-06, - "loss": 0.0594, + "epoch": 9.246992425367592, + "grad_norm": 1.3641186952590942, + "learning_rate": 7.530075746324077e-07, + "loss": 0.0764, "step": 62260 }, { - "epoch": 4.624238823704144, - "grad_norm": 0.5459581613540649, - "learning_rate": 2.2545670577751374e-06, - "loss": 0.0536, + "epoch": 9.248477647408288, + "grad_norm": 0.9384719133377075, + "learning_rate": 7.515223525917126e-07, + "loss": 0.0472, "step": 62270 }, { - "epoch": 4.624981434724491, - "grad_norm": 1.8974658250808716, - "learning_rate": 2.2501113916530524e-06, - "loss": 0.0486, + "epoch": 9.249962869448982, + "grad_norm": 1.3951371908187866, + "learning_rate": 7.500371305510175e-07, + "loss": 0.0632, "step": 62280 }, { - "epoch": 4.625724045744839, - "grad_norm": 2.0770936012268066, - "learning_rate": 2.245655725530967e-06, - "loss": 0.072, + "epoch": 9.251448091489678, + "grad_norm": 0.7329308390617371, + "learning_rate": 7.485519085103224e-07, + "loss": 0.0535, "step": 62290 }, { - "epoch": 4.626466656765187, - "grad_norm": 1.3229902982711792, - "learning_rate": 2.2412000594088816e-06, - "loss": 0.0599, + "epoch": 9.252933313530372, + "grad_norm": 1.5356649160385132, + "learning_rate": 7.470666864696273e-07, + "loss": 0.0516, "step": 62300 }, { - "epoch": 4.627209267785534, - "grad_norm": 0.6747040748596191, - "learning_rate": 2.2367443932867966e-06, - "loss": 0.0734, + "epoch": 9.254418535571068, + "grad_norm": 0.6318261623382568, + "learning_rate": 7.455814644289322e-07, + "loss": 0.0467, "step": 62310 }, { - "epoch": 4.627951878805882, - "grad_norm": 2.171114921569824, - "learning_rate": 2.232288727164711e-06, - "loss": 0.0809, + "epoch": 9.255903757611764, + "grad_norm": 0.6265151500701904, + "learning_rate": 7.440962423882372e-07, + "loss": 0.0382, "step": 62320 }, { - "epoch": 4.628694489826229, - "grad_norm": 1.1108207702636719, - "learning_rate": 2.2278330610426257e-06, - "loss": 0.0447, + "epoch": 9.257388979652458, + "grad_norm": 0.2857096195220947, + "learning_rate": 7.42611020347542e-07, + "loss": 0.055, "step": 62330 }, { - "epoch": 4.629437100846577, - "grad_norm": 1.5883184671401978, - "learning_rate": 2.2233773949205407e-06, - "loss": 0.0599, + "epoch": 9.258874201693153, + "grad_norm": 0.9067076444625854, + "learning_rate": 7.411257983068469e-07, + "loss": 0.067, "step": 62340 }, { - "epoch": 4.630179711866925, - "grad_norm": 1.2950087785720825, - "learning_rate": 2.2189217287984553e-06, - "loss": 0.0346, + "epoch": 9.260359423733847, + "grad_norm": 1.2431113719940186, + "learning_rate": 7.396405762661518e-07, + "loss": 0.0626, "step": 62350 }, { - "epoch": 4.630922322887272, - "grad_norm": 2.8710503578186035, - "learning_rate": 2.21446606267637e-06, - "loss": 0.0433, + "epoch": 9.261844645774543, + "grad_norm": 0.924633800983429, + "learning_rate": 7.381553542254567e-07, + "loss": 0.0455, "step": 62360 }, { - "epoch": 4.6316649339076195, - "grad_norm": 0.5784642696380615, - "learning_rate": 2.210010396554285e-06, - "loss": 0.0717, + "epoch": 9.263329867815239, + "grad_norm": 1.4786045551300049, + "learning_rate": 7.366701321847616e-07, + "loss": 0.0511, "step": 62370 }, { - "epoch": 4.6324075449279665, - "grad_norm": 1.163934350013733, - "learning_rate": 2.2055547304321994e-06, - "loss": 0.0449, + "epoch": 9.264815089855933, + "grad_norm": 0.7520620822906494, + "learning_rate": 7.351849101440665e-07, + "loss": 0.0499, "step": 62380 }, { - "epoch": 4.633150155948314, - "grad_norm": 2.270219564437866, - "learning_rate": 2.201099064310115e-06, - "loss": 0.0608, + "epoch": 9.266300311896629, + "grad_norm": 0.48528382182121277, + "learning_rate": 7.336996881033716e-07, + "loss": 0.0618, "step": 62390 }, { - "epoch": 4.633892766968662, - "grad_norm": 2.067028045654297, - "learning_rate": 2.1966433981880294e-06, - "loss": 0.0698, + "epoch": 9.267785533937325, + "grad_norm": 0.9447212219238281, + "learning_rate": 7.322144660626765e-07, + "loss": 0.0607, "step": 62400 }, { - "epoch": 4.634635377989009, - "grad_norm": 1.985038161277771, - "learning_rate": 2.192187732065944e-06, - "loss": 0.0819, + "epoch": 9.269270755978019, + "grad_norm": 1.028714895248413, + "learning_rate": 7.307292440219814e-07, + "loss": 0.0438, "step": 62410 }, { - "epoch": 4.635377989009357, - "grad_norm": 3.1525087356567383, - "learning_rate": 2.187732065943859e-06, - "loss": 0.0757, + "epoch": 9.270755978018714, + "grad_norm": 1.3869749307632446, + "learning_rate": 7.292440219812863e-07, + "loss": 0.054, "step": 62420 }, { - "epoch": 4.636120600029704, - "grad_norm": 2.7132487297058105, - "learning_rate": 2.1832763998217735e-06, - "loss": 0.046, + "epoch": 9.272241200059408, + "grad_norm": 1.0759178400039673, + "learning_rate": 7.277587999405912e-07, + "loss": 0.0597, "step": 62430 }, - { - "epoch": 4.636863211050052, - "grad_norm": 1.3874857425689697, - "learning_rate": 2.178820733699688e-06, - "loss": 0.0706, + { + "epoch": 9.273726422100104, + "grad_norm": 1.0529601573944092, + "learning_rate": 7.262735778998961e-07, + "loss": 0.0544, "step": 62440 }, { - "epoch": 4.6376058220704, - "grad_norm": 1.2080367803573608, - "learning_rate": 2.174365067577603e-06, - "loss": 0.0525, + "epoch": 9.2752116441408, + "grad_norm": 0.7281443476676941, + "learning_rate": 7.24788355859201e-07, + "loss": 0.0638, "step": 62450 }, { - "epoch": 4.638348433090747, - "grad_norm": 1.1241955757141113, - "learning_rate": 2.1699094014555177e-06, + "epoch": 9.276696866181494, + "grad_norm": 0.25542089343070984, + "learning_rate": 7.233031338185059e-07, "loss": 0.0514, "step": 62460 }, { - "epoch": 4.639091044111095, - "grad_norm": 1.7691078186035156, - "learning_rate": 2.1654537353334322e-06, - "loss": 0.0997, + "epoch": 9.27818208822219, + "grad_norm": 0.6407657265663147, + "learning_rate": 7.218179117778109e-07, + "loss": 0.0459, "step": 62470 }, { - "epoch": 4.639833655131442, - "grad_norm": 1.5121815204620361, - "learning_rate": 2.1609980692113472e-06, - "loss": 0.0648, + "epoch": 9.279667310262884, + "grad_norm": 0.9292405843734741, + "learning_rate": 7.203326897371157e-07, + "loss": 0.0568, "step": 62480 }, { - "epoch": 4.64057626615179, - "grad_norm": 0.8992467522621155, - "learning_rate": 2.156542403089262e-06, - "loss": 0.0425, + "epoch": 9.28115253230358, + "grad_norm": 0.6538282036781311, + "learning_rate": 7.188474676964207e-07, + "loss": 0.0626, "step": 62490 }, { - "epoch": 4.641318877172138, - "grad_norm": 0.8572467565536499, - "learning_rate": 2.1520867369671764e-06, - "loss": 0.064, + "epoch": 9.282637754344275, + "grad_norm": 0.7332746386528015, + "learning_rate": 7.173622456557255e-07, + "loss": 0.048, "step": 62500 }, { - "epoch": 4.642061488192485, - "grad_norm": 0.3243890106678009, - "learning_rate": 2.1476310708450914e-06, - "loss": 0.0469, + "epoch": 9.28412297638497, + "grad_norm": 0.6291193962097168, + "learning_rate": 7.158770236150305e-07, + "loss": 0.0618, "step": 62510 }, { - "epoch": 4.6428040992128325, - "grad_norm": 0.47468477487564087, - "learning_rate": 2.143175404723006e-06, - "loss": 0.0805, + "epoch": 9.285608198425665, + "grad_norm": 0.783424437046051, + "learning_rate": 7.143918015743354e-07, + "loss": 0.0675, "step": 62520 }, { - "epoch": 4.6435467102331796, - "grad_norm": 0.49177148938179016, - "learning_rate": 2.138719738600921e-06, - "loss": 0.0608, + "epoch": 9.287093420466359, + "grad_norm": 0.8970275521278381, + "learning_rate": 7.129065795336404e-07, + "loss": 0.0421, "step": 62530 }, { - "epoch": 4.6442893212535274, - "grad_norm": 0.7259443402290344, - "learning_rate": 2.134264072478836e-06, - "loss": 0.0658, + "epoch": 9.288578642507055, + "grad_norm": 1.1907296180725098, + "learning_rate": 7.114213574929453e-07, + "loss": 0.0624, "step": 62540 }, { - "epoch": 4.645031932273875, - "grad_norm": 1.287712812423706, - "learning_rate": 2.1298084063567505e-06, - "loss": 0.0708, + "epoch": 9.29006386454775, + "grad_norm": 1.3740684986114502, + "learning_rate": 7.099361354522502e-07, + "loss": 0.0378, "step": 62550 }, { - "epoch": 4.645774543294222, - "grad_norm": 0.7995294332504272, - "learning_rate": 2.125352740234665e-06, - "loss": 0.0502, + "epoch": 9.291549086588445, + "grad_norm": 0.9677320122718811, + "learning_rate": 7.084509134115551e-07, + "loss": 0.0375, "step": 62560 }, { - "epoch": 4.64651715431457, - "grad_norm": 0.879058837890625, - "learning_rate": 2.12089707411258e-06, - "loss": 0.0922, + "epoch": 9.29303430862914, + "grad_norm": 1.0171085596084595, + "learning_rate": 7.069656913708601e-07, + "loss": 0.045, "step": 62570 }, { - "epoch": 4.647259765334917, - "grad_norm": 1.0613620281219482, - "learning_rate": 2.1164414079904946e-06, - "loss": 0.0492, + "epoch": 9.294519530669834, + "grad_norm": 0.4692612588405609, + "learning_rate": 7.054804693301649e-07, + "loss": 0.0768, "step": 62580 }, { - "epoch": 4.648002376355265, - "grad_norm": 0.7447329759597778, - "learning_rate": 2.111985741868409e-06, - "loss": 0.0548, + "epoch": 9.29600475271053, + "grad_norm": 0.7565601468086243, + "learning_rate": 7.039952472894699e-07, + "loss": 0.0549, "step": 62590 }, { - "epoch": 4.648744987375613, - "grad_norm": 1.1669635772705078, - "learning_rate": 2.107530075746324e-06, - "loss": 0.062, + "epoch": 9.297489974751226, + "grad_norm": 0.8417346477508545, + "learning_rate": 7.025100252487747e-07, + "loss": 0.0611, "step": 62600 }, { - "epoch": 4.64948759839596, - "grad_norm": 0.3157268464565277, - "learning_rate": 2.1030744096242388e-06, - "loss": 0.0661, + "epoch": 9.29897519679192, + "grad_norm": 1.0898284912109375, + "learning_rate": 7.010248032080796e-07, + "loss": 0.0659, "step": 62610 }, { - "epoch": 4.650230209416308, - "grad_norm": 0.468450665473938, - "learning_rate": 2.0986187435021533e-06, - "loss": 0.0502, + "epoch": 9.300460418832616, + "grad_norm": 0.7627786993980408, + "learning_rate": 6.995395811673846e-07, + "loss": 0.0432, "step": 62620 }, { - "epoch": 4.650972820436655, - "grad_norm": 0.6913983821868896, - "learning_rate": 2.0941630773800683e-06, - "loss": 0.0732, + "epoch": 9.30194564087331, + "grad_norm": 1.2172629833221436, + "learning_rate": 6.980543591266894e-07, + "loss": 0.0724, "step": 62630 }, { - "epoch": 4.651715431457003, - "grad_norm": 0.5963155031204224, - "learning_rate": 2.089707411257983e-06, - "loss": 0.0588, + "epoch": 9.303430862914006, + "grad_norm": 0.7371120452880859, + "learning_rate": 6.965691370859944e-07, + "loss": 0.0722, "step": 62640 }, { - "epoch": 4.652458042477351, - "grad_norm": 0.6475550532341003, - "learning_rate": 2.0852517451358975e-06, - "loss": 0.0436, + "epoch": 9.304916084954701, + "grad_norm": 1.0806337594985962, + "learning_rate": 6.950839150452992e-07, + "loss": 0.0577, "step": 62650 }, { - "epoch": 4.653200653497698, - "grad_norm": 0.8035712242126465, - "learning_rate": 2.080796079013813e-06, - "loss": 0.0555, + "epoch": 9.306401306995395, + "grad_norm": 1.2978869676589966, + "learning_rate": 6.935986930046043e-07, + "loss": 0.0691, "step": 62660 }, { - "epoch": 4.653943264518046, - "grad_norm": 0.43951982259750366, - "learning_rate": 2.0763404128917275e-06, - "loss": 0.0567, + "epoch": 9.307886529036091, + "grad_norm": 1.1727228164672852, + "learning_rate": 6.921134709639093e-07, + "loss": 0.0521, "step": 62670 }, { - "epoch": 4.654685875538393, - "grad_norm": 2.8251731395721436, - "learning_rate": 2.071884746769642e-06, - "loss": 0.0697, + "epoch": 9.309371751076785, + "grad_norm": 0.4627057909965515, + "learning_rate": 6.906282489232141e-07, + "loss": 0.0561, "step": 62680 }, { - "epoch": 4.6554284865587405, - "grad_norm": 2.3607585430145264, - "learning_rate": 2.067429080647557e-06, - "loss": 0.0586, + "epoch": 9.310856973117481, + "grad_norm": 1.514148473739624, + "learning_rate": 6.89143026882519e-07, + "loss": 0.0635, "step": 62690 }, { - "epoch": 4.656171097579088, - "grad_norm": 1.1953966617584229, - "learning_rate": 2.0629734145254716e-06, - "loss": 0.0645, + "epoch": 9.312342195158177, + "grad_norm": 0.7269427180290222, + "learning_rate": 6.876578048418239e-07, + "loss": 0.0604, "step": 62700 }, { - "epoch": 4.656913708599435, - "grad_norm": 2.366037130355835, - "learning_rate": 2.058517748403386e-06, - "loss": 0.0635, + "epoch": 9.31382741719887, + "grad_norm": 0.46542301774024963, + "learning_rate": 6.861725828011288e-07, + "loss": 0.0647, "step": 62710 }, { - "epoch": 4.657656319619783, - "grad_norm": 0.9777756333351135, - "learning_rate": 2.054062082281301e-06, - "loss": 0.0642, + "epoch": 9.315312639239567, + "grad_norm": 0.5337091684341431, + "learning_rate": 6.846873607604338e-07, + "loss": 0.0572, "step": 62720 }, { - "epoch": 4.65839893064013, - "grad_norm": 2.669673442840576, - "learning_rate": 2.0496064161592157e-06, - "loss": 0.0608, + "epoch": 9.31679786128026, + "grad_norm": 0.7221681475639343, + "learning_rate": 6.832021387197386e-07, + "loss": 0.0467, "step": 62730 }, { - "epoch": 4.659141541660478, - "grad_norm": 2.5014798641204834, - "learning_rate": 2.0451507500371307e-06, - "loss": 0.0718, + "epoch": 9.318283083320956, + "grad_norm": 1.7959321737289429, + "learning_rate": 6.817169166790436e-07, + "loss": 0.0699, "step": 62740 }, { - "epoch": 4.659884152680826, - "grad_norm": 2.0952281951904297, - "learning_rate": 2.0406950839150453e-06, - "loss": 0.0731, + "epoch": 9.319768305361652, + "grad_norm": 0.5353294014930725, + "learning_rate": 6.802316946383484e-07, + "loss": 0.0429, "step": 62750 }, { - "epoch": 4.660626763701173, - "grad_norm": 1.361879587173462, - "learning_rate": 2.03623941779296e-06, - "loss": 0.0482, + "epoch": 9.321253527402346, + "grad_norm": 1.852454662322998, + "learning_rate": 6.787464725976534e-07, + "loss": 0.0428, "step": 62760 }, { - "epoch": 4.661369374721521, - "grad_norm": 0.5971656441688538, - "learning_rate": 2.031783751670875e-06, - "loss": 0.0543, + "epoch": 9.322738749443042, + "grad_norm": 1.3642737865447998, + "learning_rate": 6.772612505569583e-07, + "loss": 0.0585, "step": 62770 }, { - "epoch": 4.662111985741868, - "grad_norm": 2.2407009601593018, - "learning_rate": 2.0273280855487894e-06, - "loss": 0.0915, + "epoch": 9.324223971483736, + "grad_norm": 0.9288305044174194, + "learning_rate": 6.757760285162632e-07, + "loss": 0.0468, "step": 62780 }, { - "epoch": 4.662854596762216, - "grad_norm": 0.24493630230426788, - "learning_rate": 2.0228724194267044e-06, - "loss": 0.046, + "epoch": 9.325709193524432, + "grad_norm": 1.5143994092941284, + "learning_rate": 6.742908064755682e-07, + "loss": 0.0631, "step": 62790 }, { - "epoch": 4.663597207782564, - "grad_norm": 1.5833337306976318, - "learning_rate": 2.0184167533046194e-06, - "loss": 0.0539, + "epoch": 9.327194415565128, + "grad_norm": 0.7979211807250977, + "learning_rate": 6.728055844348731e-07, + "loss": 0.0545, "step": 62800 }, { - "epoch": 4.664339818802911, - "grad_norm": 0.7038244605064392, - "learning_rate": 2.013961087182534e-06, - "loss": 0.0618, + "epoch": 9.328679637605822, + "grad_norm": 1.252830982208252, + "learning_rate": 6.71320362394178e-07, + "loss": 0.0557, "step": 62810 }, { - "epoch": 4.665082429823259, - "grad_norm": 0.6795600652694702, - "learning_rate": 2.0095054210604486e-06, - "loss": 0.0821, + "epoch": 9.330164859646517, + "grad_norm": 1.5728176832199097, + "learning_rate": 6.69835140353483e-07, + "loss": 0.0506, "step": 62820 }, { - "epoch": 4.665825040843606, - "grad_norm": 1.3816779851913452, - "learning_rate": 2.0050497549383636e-06, - "loss": 0.047, + "epoch": 9.331650081687211, + "grad_norm": 1.2865973711013794, + "learning_rate": 6.683499183127878e-07, + "loss": 0.0716, "step": 62830 }, { - "epoch": 4.666567651863954, - "grad_norm": 3.097158193588257, - "learning_rate": 2.000594088816278e-06, - "loss": 0.0707, + "epoch": 9.333135303727907, + "grad_norm": 0.5197668075561523, + "learning_rate": 6.668646962720928e-07, + "loss": 0.052, "step": 62840 }, { - "epoch": 4.6673102628843015, - "grad_norm": 3.206883192062378, - "learning_rate": 1.9961384226941927e-06, - "loss": 0.0426, + "epoch": 9.334620525768603, + "grad_norm": 0.7080972194671631, + "learning_rate": 6.653794742313976e-07, + "loss": 0.0499, "step": 62850 }, { - "epoch": 4.6680528739046485, - "grad_norm": 3.3937978744506836, - "learning_rate": 1.9916827565721077e-06, - "loss": 0.0737, + "epoch": 9.336105747809297, + "grad_norm": 1.5883245468139648, + "learning_rate": 6.638942521907026e-07, + "loss": 0.0527, "step": 62860 }, { - "epoch": 4.668795484924996, - "grad_norm": 0.8779681921005249, - "learning_rate": 1.9872270904500223e-06, - "loss": 0.039, + "epoch": 9.337590969849993, + "grad_norm": 1.628940224647522, + "learning_rate": 6.624090301500075e-07, + "loss": 0.0491, "step": 62870 }, { - "epoch": 4.669538095945343, - "grad_norm": 2.1197397708892822, - "learning_rate": 1.982771424327937e-06, - "loss": 0.0465, + "epoch": 9.339076191890687, + "grad_norm": 0.41147395968437195, + "learning_rate": 6.609238081093123e-07, + "loss": 0.0546, "step": 62880 }, { - "epoch": 4.670280706965691, - "grad_norm": 3.1833298206329346, - "learning_rate": 1.978315758205852e-06, - "loss": 0.0657, + "epoch": 9.340561413931383, + "grad_norm": 0.7498815655708313, + "learning_rate": 6.594385860686173e-07, + "loss": 0.0733, "step": 62890 }, { - "epoch": 4.671023317986039, - "grad_norm": 0.6295514702796936, - "learning_rate": 1.9738600920837664e-06, - "loss": 0.0515, + "epoch": 9.342046635972078, + "grad_norm": 1.1569870710372925, + "learning_rate": 6.579533640279221e-07, + "loss": 0.053, "step": 62900 }, { - "epoch": 4.671765929006386, - "grad_norm": 0.9463853240013123, - "learning_rate": 1.969404425961681e-06, - "loss": 0.0732, + "epoch": 9.343531858012772, + "grad_norm": 1.0395070314407349, + "learning_rate": 6.564681419872271e-07, + "loss": 0.0523, "step": 62910 }, { - "epoch": 4.672508540026734, - "grad_norm": 2.7689971923828125, - "learning_rate": 1.9649487598395964e-06, - "loss": 0.0551, + "epoch": 9.345017080053468, + "grad_norm": 0.9898577928543091, + "learning_rate": 6.549829199465322e-07, + "loss": 0.0438, "step": 62920 }, { - "epoch": 4.673251151047081, - "grad_norm": 1.0542500019073486, - "learning_rate": 1.960493093717511e-06, - "loss": 0.078, + "epoch": 9.346502302094162, + "grad_norm": 1.5343103408813477, + "learning_rate": 6.53497697905837e-07, + "loss": 0.0516, "step": 62930 }, { - "epoch": 4.673993762067429, - "grad_norm": 6.436036586761475, - "learning_rate": 1.9560374275954255e-06, - "loss": 0.0641, + "epoch": 9.347987524134858, + "grad_norm": 0.42942705750465393, + "learning_rate": 6.52012475865142e-07, + "loss": 0.0493, "step": 62940 }, { - "epoch": 4.674736373087777, - "grad_norm": 0.5600406527519226, - "learning_rate": 1.9515817614733405e-06, - "loss": 0.0637, + "epoch": 9.349472746175554, + "grad_norm": 1.3621197938919067, + "learning_rate": 6.505272538244468e-07, + "loss": 0.061, "step": 62950 }, { - "epoch": 4.675478984108124, - "grad_norm": 1.567610263824463, - "learning_rate": 1.947126095351255e-06, - "loss": 0.0449, + "epoch": 9.350957968216248, + "grad_norm": 0.8141943216323853, + "learning_rate": 6.490420317837517e-07, + "loss": 0.041, "step": 62960 }, { - "epoch": 4.676221595128472, - "grad_norm": 2.6825263500213623, - "learning_rate": 1.9426704292291697e-06, - "loss": 0.0558, + "epoch": 9.352443190256944, + "grad_norm": 1.1711074113845825, + "learning_rate": 6.475568097430567e-07, + "loss": 0.0582, "step": 62970 }, { - "epoch": 4.676964206148819, - "grad_norm": 0.2970806658267975, - "learning_rate": 1.9382147631070847e-06, - "loss": 0.0522, + "epoch": 9.353928412297638, + "grad_norm": 1.1483805179595947, + "learning_rate": 6.460715877023615e-07, + "loss": 0.0732, "step": 62980 }, { - "epoch": 4.677706817169167, - "grad_norm": 1.4133620262145996, - "learning_rate": 1.9337590969849992e-06, - "loss": 0.0462, + "epoch": 9.355413634338333, + "grad_norm": 1.2621763944625854, + "learning_rate": 6.445863656616665e-07, + "loss": 0.0526, "step": 62990 }, { - "epoch": 4.678449428189515, - "grad_norm": 1.4297685623168945, - "learning_rate": 1.929303430862914e-06, - "loss": 0.0552, + "epoch": 9.35689885637903, + "grad_norm": 0.48527824878692627, + "learning_rate": 6.431011436209713e-07, + "loss": 0.0574, "step": 63000 }, { - "epoch": 4.679192039209862, - "grad_norm": 2.5894458293914795, - "learning_rate": 1.924847764740829e-06, - "loss": 0.0763, + "epoch": 9.358384078419723, + "grad_norm": 0.6406056880950928, + "learning_rate": 6.416159215802763e-07, + "loss": 0.0599, "step": 63010 }, { - "epoch": 4.6799346502302095, - "grad_norm": 2.202799081802368, - "learning_rate": 1.9203920986187434e-06, - "loss": 0.0616, + "epoch": 9.359869300460419, + "grad_norm": 1.0590063333511353, + "learning_rate": 6.401306995395812e-07, + "loss": 0.0686, "step": 63020 }, { - "epoch": 4.6806772612505565, - "grad_norm": 1.195757269859314, - "learning_rate": 1.915936432496658e-06, - "loss": 0.0802, + "epoch": 9.361354522501115, + "grad_norm": 1.2997311353683472, + "learning_rate": 6.386454774988861e-07, + "loss": 0.0505, "step": 63030 }, { - "epoch": 4.681419872270904, - "grad_norm": 0.8509438633918762, - "learning_rate": 1.911480766374573e-06, - "loss": 0.0413, + "epoch": 9.362839744541809, + "grad_norm": 0.8595489263534546, + "learning_rate": 6.37160255458191e-07, + "loss": 0.0302, "step": 63040 }, { - "epoch": 4.682162483291252, - "grad_norm": 2.7623627185821533, - "learning_rate": 1.907025100252488e-06, - "loss": 0.0487, + "epoch": 9.364324966582505, + "grad_norm": 0.8814943432807922, + "learning_rate": 6.35675033417496e-07, + "loss": 0.0401, "step": 63050 }, { - "epoch": 4.682905094311599, - "grad_norm": 1.9826782941818237, - "learning_rate": 1.9025694341304027e-06, - "loss": 0.0715, + "epoch": 9.365810188623199, + "grad_norm": 1.350519061088562, + "learning_rate": 6.341898113768009e-07, + "loss": 0.0576, "step": 63060 }, { - "epoch": 4.683647705331947, - "grad_norm": 0.5094510316848755, - "learning_rate": 1.8981137680083175e-06, - "loss": 0.0596, + "epoch": 9.367295410663894, + "grad_norm": 1.250366449356079, + "learning_rate": 6.327045893361059e-07, + "loss": 0.059, "step": 63070 }, { - "epoch": 4.684390316352294, - "grad_norm": 1.9838165044784546, - "learning_rate": 1.893658101886232e-06, - "loss": 0.0584, + "epoch": 9.36878063270459, + "grad_norm": 0.40602949261665344, + "learning_rate": 6.312193672954107e-07, + "loss": 0.0631, "step": 63080 }, { - "epoch": 4.685132927372642, - "grad_norm": 0.8991755247116089, - "learning_rate": 1.8892024357641469e-06, - "loss": 0.0556, + "epoch": 9.370265854745284, + "grad_norm": 1.4695994853973389, + "learning_rate": 6.297341452547157e-07, + "loss": 0.0604, "step": 63090 }, { - "epoch": 4.68587553839299, - "grad_norm": 1.3264687061309814, - "learning_rate": 1.8847467696420616e-06, - "loss": 0.0679, + "epoch": 9.37175107678598, + "grad_norm": 0.6759101748466492, + "learning_rate": 6.282489232140205e-07, + "loss": 0.0524, "step": 63100 }, { - "epoch": 4.686618149413337, - "grad_norm": 1.1737868785858154, - "learning_rate": 1.8802911035199762e-06, - "loss": 0.079, + "epoch": 9.373236298826674, + "grad_norm": 0.9469413757324219, + "learning_rate": 6.267637011733255e-07, + "loss": 0.0454, "step": 63110 }, { - "epoch": 4.687360760433685, - "grad_norm": 1.7225794792175293, - "learning_rate": 1.875835437397891e-06, - "loss": 0.0473, + "epoch": 9.37472152086737, + "grad_norm": 1.0243042707443237, + "learning_rate": 6.252784791326304e-07, + "loss": 0.0682, "step": 63120 }, { - "epoch": 4.688103371454032, - "grad_norm": 0.6222664713859558, - "learning_rate": 1.8713797712758058e-06, - "loss": 0.0525, + "epoch": 9.376206742908066, + "grad_norm": 1.182602047920227, + "learning_rate": 6.237932570919353e-07, + "loss": 0.0489, "step": 63130 }, { - "epoch": 4.68884598247438, - "grad_norm": 2.4344029426574707, - "learning_rate": 1.8669241051537206e-06, - "loss": 0.0497, + "epoch": 9.37769196494876, + "grad_norm": 0.5317026972770691, + "learning_rate": 6.223080350512402e-07, + "loss": 0.0523, "step": 63140 }, { - "epoch": 4.689588593494728, - "grad_norm": 0.5827934741973877, - "learning_rate": 1.8624684390316353e-06, - "loss": 0.0458, + "epoch": 9.379177186989455, + "grad_norm": 0.8718259930610657, + "learning_rate": 6.208228130105451e-07, + "loss": 0.0437, "step": 63150 }, { - "epoch": 4.690331204515075, - "grad_norm": 4.944514274597168, - "learning_rate": 1.8580127729095501e-06, - "loss": 0.0326, + "epoch": 9.38066240903015, + "grad_norm": 0.6692548394203186, + "learning_rate": 6.193375909698501e-07, + "loss": 0.0706, "step": 63160 }, { - "epoch": 4.691073815535423, - "grad_norm": 1.2443019151687622, - "learning_rate": 1.853557106787465e-06, - "loss": 0.0435, + "epoch": 9.382147631070845, + "grad_norm": 1.0512770414352417, + "learning_rate": 6.17852368929155e-07, + "loss": 0.0631, "step": 63170 }, { - "epoch": 4.6918164265557705, - "grad_norm": 0.31377652287483215, - "learning_rate": 1.8491014406653795e-06, - "loss": 0.0562, + "epoch": 9.383632853111541, + "grad_norm": 0.8435956835746765, + "learning_rate": 6.163671468884598e-07, + "loss": 0.0456, "step": 63180 }, { - "epoch": 4.6925590375761175, - "grad_norm": 1.3072824478149414, - "learning_rate": 1.8446457745432943e-06, - "loss": 0.0721, + "epoch": 9.385118075152235, + "grad_norm": 0.6097307801246643, + "learning_rate": 6.148819248477648e-07, + "loss": 0.052, "step": 63190 }, { - "epoch": 4.693301648596465, - "grad_norm": 1.9341398477554321, - "learning_rate": 1.840190108421209e-06, - "loss": 0.0605, + "epoch": 9.38660329719293, + "grad_norm": 0.8554158210754395, + "learning_rate": 6.133967028070696e-07, + "loss": 0.0448, "step": 63200 }, { - "epoch": 4.694044259616812, - "grad_norm": 0.47264960408210754, - "learning_rate": 1.8357344422991236e-06, - "loss": 0.0632, + "epoch": 9.388088519233625, + "grad_norm": 1.3168599605560303, + "learning_rate": 6.119114807663746e-07, + "loss": 0.0614, "step": 63210 }, { - "epoch": 4.69478687063716, - "grad_norm": 2.389784574508667, - "learning_rate": 1.8312787761770386e-06, - "loss": 0.0605, + "epoch": 9.38957374127432, + "grad_norm": 0.7756260633468628, + "learning_rate": 6.104262587256796e-07, + "loss": 0.0391, "step": 63220 }, { - "epoch": 4.695529481657508, - "grad_norm": 2.27815842628479, - "learning_rate": 1.8268231100549534e-06, - "loss": 0.0653, + "epoch": 9.391058963315016, + "grad_norm": 0.5791680216789246, + "learning_rate": 6.089410366849844e-07, + "loss": 0.052, "step": 63230 }, { - "epoch": 4.696272092677855, - "grad_norm": 1.2528233528137207, - "learning_rate": 1.822367443932868e-06, - "loss": 0.0568, + "epoch": 9.39254418535571, + "grad_norm": 1.4955168962478638, + "learning_rate": 6.074558146442894e-07, + "loss": 0.0472, "step": 63240 }, { - "epoch": 4.697014703698203, - "grad_norm": 2.5911924839019775, - "learning_rate": 1.8179117778107827e-06, - "loss": 0.0576, + "epoch": 9.394029407396406, + "grad_norm": 0.7317465543746948, + "learning_rate": 6.059705926035942e-07, + "loss": 0.0575, "step": 63250 }, { - "epoch": 4.69775731471855, - "grad_norm": 1.9090697765350342, - "learning_rate": 1.8134561116886975e-06, - "loss": 0.0368, + "epoch": 9.3955146294371, + "grad_norm": 0.9136133790016174, + "learning_rate": 6.044853705628992e-07, + "loss": 0.0588, "step": 63260 }, { - "epoch": 4.698499925738898, - "grad_norm": 0.29426848888397217, - "learning_rate": 1.809000445566612e-06, - "loss": 0.0387, + "epoch": 9.396999851477796, + "grad_norm": 0.40172865986824036, + "learning_rate": 6.030001485222041e-07, + "loss": 0.0531, "step": 63270 }, { - "epoch": 4.699242536759246, - "grad_norm": 0.4976974129676819, - "learning_rate": 1.804544779444527e-06, - "loss": 0.0605, + "epoch": 9.398485073518492, + "grad_norm": 1.070167899131775, + "learning_rate": 6.01514926481509e-07, + "loss": 0.0367, "step": 63280 }, { - "epoch": 4.699985147779593, - "grad_norm": 2.4460465908050537, - "learning_rate": 1.8000891133224419e-06, - "loss": 0.0946, + "epoch": 9.399970295559186, + "grad_norm": 0.7728586792945862, + "learning_rate": 6.00029704440814e-07, + "loss": 0.0466, "step": 63290 }, { - "epoch": 4.700727758799941, - "grad_norm": 1.7145735025405884, - "learning_rate": 1.7956334472003564e-06, - "loss": 0.1029, + "epoch": 9.401455517599882, + "grad_norm": 0.7134668231010437, + "learning_rate": 5.985444824001188e-07, + "loss": 0.0722, "step": 63300 }, { - "epoch": 4.701470369820288, - "grad_norm": 3.410822868347168, - "learning_rate": 1.7911777810782712e-06, - "loss": 0.0448, + "epoch": 9.402940739640576, + "grad_norm": 0.7128061056137085, + "learning_rate": 5.970592603594238e-07, + "loss": 0.0476, "step": 63310 }, { - "epoch": 4.702212980840636, - "grad_norm": 2.253828525543213, - "learning_rate": 1.786722114956186e-06, - "loss": 0.0709, + "epoch": 9.404425961681271, + "grad_norm": 0.4389706552028656, + "learning_rate": 5.955740383187287e-07, + "loss": 0.0683, "step": 63320 }, { - "epoch": 4.7029555918609836, - "grad_norm": 0.8834261894226074, - "learning_rate": 1.7822664488341008e-06, - "loss": 0.0709, + "epoch": 9.405911183721967, + "grad_norm": 0.2874282896518707, + "learning_rate": 5.940888162780336e-07, + "loss": 0.0556, "step": 63330 }, { - "epoch": 4.703698202881331, - "grad_norm": 0.5737817883491516, - "learning_rate": 1.7778107827120154e-06, - "loss": 0.0317, + "epoch": 9.407396405762661, + "grad_norm": 1.0555808544158936, + "learning_rate": 5.926035942373385e-07, + "loss": 0.0674, "step": 63340 }, { - "epoch": 4.7044408139016785, - "grad_norm": 2.2672598361968994, - "learning_rate": 1.7733551165899303e-06, - "loss": 0.0572, + "epoch": 9.408881627803357, + "grad_norm": 0.664207398891449, + "learning_rate": 5.911183721966434e-07, + "loss": 0.065, "step": 63350 }, { - "epoch": 4.7051834249220255, - "grad_norm": 0.7184361219406128, - "learning_rate": 1.7688994504678451e-06, - "loss": 0.0242, + "epoch": 9.410366849844051, + "grad_norm": 1.2207247018814087, + "learning_rate": 5.896331501559484e-07, + "loss": 0.0591, "step": 63360 }, { - "epoch": 4.705926035942373, - "grad_norm": 2.6737210750579834, - "learning_rate": 1.7644437843457597e-06, - "loss": 0.063, + "epoch": 9.411852071884747, + "grad_norm": 1.1976929903030396, + "learning_rate": 5.881479281152533e-07, + "loss": 0.054, "step": 63370 }, { - "epoch": 4.706668646962721, - "grad_norm": 1.496285080909729, - "learning_rate": 1.7599881182236745e-06, - "loss": 0.0321, + "epoch": 9.413337293925442, + "grad_norm": 1.1441959142684937, + "learning_rate": 5.866627060745582e-07, + "loss": 0.0623, "step": 63380 }, { - "epoch": 4.707411257983068, - "grad_norm": 1.187997579574585, - "learning_rate": 1.7555324521015893e-06, - "loss": 0.0588, + "epoch": 9.414822515966137, + "grad_norm": 1.0371273756027222, + "learning_rate": 5.851774840338631e-07, + "loss": 0.0463, "step": 63390 }, { - "epoch": 4.708153869003416, - "grad_norm": 0.6710416078567505, - "learning_rate": 1.7510767859795038e-06, - "loss": 0.04, + "epoch": 9.416307738006832, + "grad_norm": 0.6782839298248291, + "learning_rate": 5.83692261993168e-07, + "loss": 0.0476, "step": 63400 }, { - "epoch": 4.708896480023764, - "grad_norm": 1.344570279121399, - "learning_rate": 1.7466211198574186e-06, - "loss": 0.0616, + "epoch": 9.417792960047526, + "grad_norm": 0.9542140960693359, + "learning_rate": 5.822070399524729e-07, + "loss": 0.0718, "step": 63410 }, { - "epoch": 4.709639091044111, - "grad_norm": 1.7704460620880127, - "learning_rate": 1.7421654537353336e-06, - "loss": 0.0577, + "epoch": 9.419278182088222, + "grad_norm": 0.2834306061267853, + "learning_rate": 5.807218179117779e-07, + "loss": 0.0473, "step": 63420 }, { - "epoch": 4.710381702064459, - "grad_norm": 1.4288161993026733, - "learning_rate": 1.7377097876132482e-06, - "loss": 0.0438, + "epoch": 9.420763404128918, + "grad_norm": 1.0381821393966675, + "learning_rate": 5.792365958710828e-07, + "loss": 0.0549, "step": 63430 }, { - "epoch": 4.711124313084806, - "grad_norm": 0.8680292367935181, - "learning_rate": 1.733254121491163e-06, - "loss": 0.0569, + "epoch": 9.422248626169612, + "grad_norm": 1.2295236587524414, + "learning_rate": 5.777513738303877e-07, + "loss": 0.035, "step": 63440 }, { - "epoch": 4.711866924105154, - "grad_norm": 0.42400112748146057, - "learning_rate": 1.7287984553690777e-06, - "loss": 0.0361, + "epoch": 9.423733848210308, + "grad_norm": 0.9140399098396301, + "learning_rate": 5.762661517896925e-07, + "loss": 0.0577, "step": 63450 }, { - "epoch": 4.712609535125502, - "grad_norm": 0.3633495271205902, - "learning_rate": 1.7243427892469923e-06, - "loss": 0.0364, + "epoch": 9.425219070251002, + "grad_norm": 1.0174657106399536, + "learning_rate": 5.747809297489975e-07, + "loss": 0.073, "step": 63460 }, { - "epoch": 4.713352146145849, - "grad_norm": 0.9491689205169678, - "learning_rate": 1.719887123124907e-06, - "loss": 0.0824, + "epoch": 9.426704292291697, + "grad_norm": 1.6030808687210083, + "learning_rate": 5.732957077083024e-07, + "loss": 0.0436, "step": 63470 }, { - "epoch": 4.714094757166197, - "grad_norm": 2.717560052871704, - "learning_rate": 1.715431457002822e-06, - "loss": 0.0632, + "epoch": 9.428189514332393, + "grad_norm": 0.6991294622421265, + "learning_rate": 5.718104856676074e-07, + "loss": 0.0492, "step": 63480 }, { - "epoch": 4.714837368186544, - "grad_norm": 2.0803704261779785, - "learning_rate": 1.7109757908807369e-06, - "loss": 0.05, + "epoch": 9.429674736373087, + "grad_norm": 0.5950497388839722, + "learning_rate": 5.703252636269123e-07, + "loss": 0.0499, "step": 63490 }, { - "epoch": 4.7155799792068915, - "grad_norm": 1.6950163841247559, - "learning_rate": 1.7065201247586514e-06, - "loss": 0.0636, + "epoch": 9.431159958413783, + "grad_norm": 0.7248765826225281, + "learning_rate": 5.688400415862171e-07, + "loss": 0.0499, "step": 63500 }, { - "epoch": 4.716322590227239, - "grad_norm": 0.8007744550704956, - "learning_rate": 1.7020644586365662e-06, - "loss": 0.0632, + "epoch": 9.432645180454477, + "grad_norm": 1.8232178688049316, + "learning_rate": 5.673548195455221e-07, + "loss": 0.0502, "step": 63510 }, { - "epoch": 4.717065201247586, - "grad_norm": 0.830875039100647, - "learning_rate": 1.697608792514481e-06, - "loss": 0.0272, + "epoch": 9.434130402495173, + "grad_norm": 1.7857370376586914, + "learning_rate": 5.65869597504827e-07, + "loss": 0.0504, "step": 63520 }, { - "epoch": 4.717807812267934, - "grad_norm": 0.48793575167655945, - "learning_rate": 1.6931531263923956e-06, - "loss": 0.043, + "epoch": 9.435615624535869, + "grad_norm": 0.7211408615112305, + "learning_rate": 5.643843754641319e-07, + "loss": 0.0528, "step": 63530 }, { - "epoch": 4.718550423288281, - "grad_norm": 1.9636634588241577, - "learning_rate": 1.6886974602703104e-06, - "loss": 0.0448, + "epoch": 9.437100846576563, + "grad_norm": 0.6421229243278503, + "learning_rate": 5.628991534234368e-07, + "loss": 0.0655, "step": 63540 }, { - "epoch": 4.719293034308629, - "grad_norm": 1.2597278356552124, - "learning_rate": 1.6842417941482254e-06, - "loss": 0.0393, + "epoch": 9.438586068617258, + "grad_norm": 1.601430892944336, + "learning_rate": 5.614139313827418e-07, + "loss": 0.0567, "step": 63550 }, { - "epoch": 4.720035645328977, - "grad_norm": 1.140265941619873, - "learning_rate": 1.67978612802614e-06, - "loss": 0.0668, + "epoch": 9.440071290657954, + "grad_norm": 2.0657436847686768, + "learning_rate": 5.599287093420467e-07, + "loss": 0.0594, "step": 63560 }, { - "epoch": 4.720778256349324, - "grad_norm": 1.5204119682312012, - "learning_rate": 1.6753304619040547e-06, - "loss": 0.0372, + "epoch": 9.441556512698648, + "grad_norm": 0.7642673254013062, + "learning_rate": 5.584434873013516e-07, + "loss": 0.0572, "step": 63570 }, { - "epoch": 4.721520867369672, - "grad_norm": 1.4788594245910645, - "learning_rate": 1.6708747957819695e-06, - "loss": 0.0902, + "epoch": 9.443041734739344, + "grad_norm": 1.3112720251083374, + "learning_rate": 5.569582652606565e-07, + "loss": 0.0479, "step": 63580 }, { - "epoch": 4.722263478390019, - "grad_norm": 1.6881049871444702, - "learning_rate": 1.666419129659884e-06, - "loss": 0.0413, + "epoch": 9.444526956780038, + "grad_norm": 0.7265917062759399, + "learning_rate": 5.554730432199614e-07, + "loss": 0.0459, "step": 63590 }, { - "epoch": 4.723006089410367, - "grad_norm": 2.068535327911377, - "learning_rate": 1.6619634635377988e-06, - "loss": 0.0732, + "epoch": 9.446012178820734, + "grad_norm": 0.47517985105514526, + "learning_rate": 5.539878211792664e-07, + "loss": 0.0498, "step": 63600 }, { - "epoch": 4.723748700430715, - "grad_norm": 2.630728006362915, - "learning_rate": 1.6575077974157138e-06, - "loss": 0.0431, + "epoch": 9.44749740086143, + "grad_norm": 1.4434775114059448, + "learning_rate": 5.525025991385713e-07, + "loss": 0.0661, "step": 63610 }, { - "epoch": 4.724491311451062, - "grad_norm": 0.42184069752693176, - "learning_rate": 1.6530521312936284e-06, - "loss": 0.0364, + "epoch": 9.448982622902124, + "grad_norm": 0.9332127571105957, + "learning_rate": 5.510173770978762e-07, + "loss": 0.0528, "step": 63620 }, { - "epoch": 4.72523392247141, - "grad_norm": 2.413302421569824, - "learning_rate": 1.6485964651715432e-06, - "loss": 0.0655, + "epoch": 9.45046784494282, + "grad_norm": 0.766245424747467, + "learning_rate": 5.495321550571811e-07, + "loss": 0.058, "step": 63630 }, { - "epoch": 4.725976533491757, - "grad_norm": 0.9263471364974976, - "learning_rate": 1.644140799049458e-06, - "loss": 0.0456, + "epoch": 9.451953066983513, + "grad_norm": 1.0345425605773926, + "learning_rate": 5.48046933016486e-07, + "loss": 0.0583, "step": 63640 }, { - "epoch": 4.726719144512105, - "grad_norm": 1.1987792253494263, - "learning_rate": 1.6396851329273726e-06, - "loss": 0.0405, + "epoch": 9.45343828902421, + "grad_norm": 1.2403125762939453, + "learning_rate": 5.46561710975791e-07, + "loss": 0.0651, "step": 63650 }, { - "epoch": 4.7274617555324525, - "grad_norm": 3.2439706325531006, - "learning_rate": 1.6352294668052873e-06, - "loss": 0.0617, + "epoch": 9.454923511064905, + "grad_norm": 0.4796169102191925, + "learning_rate": 5.450764889350958e-07, + "loss": 0.0456, "step": 63660 }, { - "epoch": 4.7282043665527995, - "grad_norm": 0.45291805267333984, - "learning_rate": 1.6307738006832021e-06, - "loss": 0.0573, + "epoch": 9.456408733105599, + "grad_norm": 1.10849928855896, + "learning_rate": 5.435912668944008e-07, + "loss": 0.042, "step": 63670 }, { - "epoch": 4.728946977573147, - "grad_norm": 0.45011815428733826, - "learning_rate": 1.6263181345611171e-06, - "loss": 0.0522, + "epoch": 9.457893955146295, + "grad_norm": 0.5190598368644714, + "learning_rate": 5.421060448537057e-07, + "loss": 0.0615, "step": 63680 }, { - "epoch": 4.729689588593494, - "grad_norm": 0.6326998472213745, - "learning_rate": 1.6218624684390317e-06, - "loss": 0.0621, + "epoch": 9.459379177186989, + "grad_norm": 0.47407037019729614, + "learning_rate": 5.406208228130106e-07, + "loss": 0.0595, "step": 63690 }, { - "epoch": 4.730432199613842, - "grad_norm": 0.5624150633811951, - "learning_rate": 1.6174068023169465e-06, - "loss": 0.0394, + "epoch": 9.460864399227685, + "grad_norm": 0.6373763084411621, + "learning_rate": 5.391356007723156e-07, + "loss": 0.0505, "step": 63700 }, { - "epoch": 4.73117481063419, - "grad_norm": 0.9579383134841919, - "learning_rate": 1.6129511361948612e-06, - "loss": 0.0856, + "epoch": 9.46234962126838, + "grad_norm": 0.38864976167678833, + "learning_rate": 5.376503787316204e-07, + "loss": 0.0415, "step": 63710 }, { - "epoch": 4.731917421654537, - "grad_norm": 1.3412039279937744, - "learning_rate": 1.6084954700727758e-06, - "loss": 0.0584, + "epoch": 9.463834843309074, + "grad_norm": 0.8846045136451721, + "learning_rate": 5.361651566909253e-07, + "loss": 0.0429, "step": 63720 }, { - "epoch": 4.732660032674885, - "grad_norm": 2.0105788707733154, - "learning_rate": 1.6040398039506906e-06, - "loss": 0.0573, + "epoch": 9.46532006534977, + "grad_norm": 0.519731342792511, + "learning_rate": 5.346799346502302e-07, + "loss": 0.0482, "step": 63730 }, { - "epoch": 4.733402643695232, - "grad_norm": 1.5913349390029907, - "learning_rate": 1.5995841378286056e-06, - "loss": 0.0634, + "epoch": 9.466805287390464, + "grad_norm": 0.6602308750152588, + "learning_rate": 5.331947126095352e-07, + "loss": 0.0555, "step": 63740 }, { - "epoch": 4.73414525471558, - "grad_norm": 0.57387375831604, - "learning_rate": 1.5951284717065202e-06, - "loss": 0.037, + "epoch": 9.46829050943116, + "grad_norm": 1.1158581972122192, + "learning_rate": 5.317094905688402e-07, + "loss": 0.0616, "step": 63750 }, { - "epoch": 4.734887865735928, - "grad_norm": 0.6416048407554626, - "learning_rate": 1.590672805584435e-06, - "loss": 0.0565, + "epoch": 9.469775731471856, + "grad_norm": 0.9060409069061279, + "learning_rate": 5.30224268528145e-07, + "loss": 0.0524, "step": 63760 }, { - "epoch": 4.735630476756275, - "grad_norm": 0.8845180869102478, - "learning_rate": 1.5862171394623497e-06, - "loss": 0.0461, + "epoch": 9.47126095351255, + "grad_norm": 0.7163268327713013, + "learning_rate": 5.287390464874499e-07, + "loss": 0.0528, "step": 63770 }, { - "epoch": 4.736373087776623, - "grad_norm": 2.478346347808838, - "learning_rate": 1.5817614733402643e-06, - "loss": 0.0446, + "epoch": 9.472746175553246, + "grad_norm": 1.3203110694885254, + "learning_rate": 5.272538244467548e-07, + "loss": 0.0545, "step": 63780 }, { - "epoch": 4.73711569879697, - "grad_norm": 0.9896259307861328, - "learning_rate": 1.577305807218179e-06, - "loss": 0.085, + "epoch": 9.47423139759394, + "grad_norm": 0.7185261249542236, + "learning_rate": 5.257686024060597e-07, + "loss": 0.0551, "step": 63790 }, { - "epoch": 4.737858309817318, - "grad_norm": 1.2818790674209595, - "learning_rate": 1.5728501410960939e-06, - "loss": 0.0503, + "epoch": 9.475716619634635, + "grad_norm": 0.6173855662345886, + "learning_rate": 5.242833803653647e-07, + "loss": 0.0676, "step": 63800 }, { - "epoch": 4.738600920837666, - "grad_norm": 1.4775575399398804, - "learning_rate": 1.5683944749740086e-06, - "loss": 0.0388, + "epoch": 9.477201841675331, + "grad_norm": 0.670606255531311, + "learning_rate": 5.227981583246696e-07, + "loss": 0.0675, "step": 63810 }, { - "epoch": 4.739343531858013, - "grad_norm": 0.6616837978363037, - "learning_rate": 1.5639388088519234e-06, - "loss": 0.0482, + "epoch": 9.478687063716025, + "grad_norm": 0.4557764530181885, + "learning_rate": 5.213129362839745e-07, + "loss": 0.0418, "step": 63820 }, { - "epoch": 4.7400861428783605, - "grad_norm": 0.6766378879547119, - "learning_rate": 1.5594831427298382e-06, - "loss": 0.0487, + "epoch": 9.480172285756721, + "grad_norm": 0.7182461023330688, + "learning_rate": 5.198277142432794e-07, + "loss": 0.0516, "step": 63830 }, { - "epoch": 4.7408287538987075, - "grad_norm": 4.399589538574219, - "learning_rate": 1.555027476607753e-06, - "loss": 0.0676, + "epoch": 9.481657507797415, + "grad_norm": 0.5253933668136597, + "learning_rate": 5.183424922025843e-07, + "loss": 0.0571, "step": 63840 }, { - "epoch": 4.741571364919055, - "grad_norm": 0.8377204537391663, - "learning_rate": 1.5505718104856676e-06, - "loss": 0.0473, + "epoch": 9.48314272983811, + "grad_norm": 1.8607404232025146, + "learning_rate": 5.168572701618893e-07, + "loss": 0.0472, "step": 63850 }, { - "epoch": 4.742313975939403, - "grad_norm": 0.36241453886032104, - "learning_rate": 1.5461161443635823e-06, - "loss": 0.0365, + "epoch": 9.484627951878807, + "grad_norm": 0.8381548523902893, + "learning_rate": 5.153720481211941e-07, + "loss": 0.0459, "step": 63860 }, { - "epoch": 4.74305658695975, - "grad_norm": 3.0188138484954834, - "learning_rate": 1.5416604782414971e-06, - "loss": 0.0621, + "epoch": 9.4861131739195, + "grad_norm": 0.6967155933380127, + "learning_rate": 5.138868260804991e-07, + "loss": 0.0492, "step": 63870 }, { - "epoch": 4.743799197980098, - "grad_norm": 1.3264600038528442, - "learning_rate": 1.537204812119412e-06, - "loss": 0.0571, + "epoch": 9.487598395960196, + "grad_norm": 0.9393335580825806, + "learning_rate": 5.12401604039804e-07, + "loss": 0.0537, "step": 63880 }, { - "epoch": 4.744541809000445, - "grad_norm": 2.425934076309204, - "learning_rate": 1.5327491459973267e-06, - "loss": 0.0562, + "epoch": 9.48908361800089, + "grad_norm": 0.7347944974899292, + "learning_rate": 5.109163819991089e-07, + "loss": 0.0366, "step": 63890 }, { - "epoch": 4.745284420020793, - "grad_norm": 1.697583794593811, - "learning_rate": 1.5282934798752415e-06, - "loss": 0.0656, + "epoch": 9.490568840041586, + "grad_norm": 1.2542600631713867, + "learning_rate": 5.094311599584139e-07, + "loss": 0.0487, "step": 63900 }, { - "epoch": 4.746027031041141, - "grad_norm": 1.6972935199737549, - "learning_rate": 1.523837813753156e-06, - "loss": 0.0846, + "epoch": 9.492054062082282, + "grad_norm": 0.42937129735946655, + "learning_rate": 5.079459379177187e-07, + "loss": 0.0671, "step": 63910 }, { - "epoch": 4.746769642061488, - "grad_norm": 0.5245199203491211, - "learning_rate": 1.5193821476310708e-06, - "loss": 0.048, + "epoch": 9.493539284122976, + "grad_norm": 0.6828808784484863, + "learning_rate": 5.064607158770237e-07, + "loss": 0.0384, "step": 63920 }, { - "epoch": 4.747512253081836, - "grad_norm": 1.2065707445144653, - "learning_rate": 1.5149264815089856e-06, - "loss": 0.0582, + "epoch": 9.495024506163672, + "grad_norm": 0.7077220678329468, + "learning_rate": 5.049754938363285e-07, + "loss": 0.0607, "step": 63930 }, { - "epoch": 4.748254864102183, - "grad_norm": 2.5891146659851074, - "learning_rate": 1.5104708153869004e-06, - "loss": 0.0675, + "epoch": 9.496509728204366, + "grad_norm": 0.7866410613059998, + "learning_rate": 5.034902717956335e-07, + "loss": 0.0626, "step": 63940 }, { - "epoch": 4.748997475122531, - "grad_norm": 1.8636987209320068, - "learning_rate": 1.5060151492648152e-06, - "loss": 0.0364, + "epoch": 9.497994950245062, + "grad_norm": 0.7980449795722961, + "learning_rate": 5.020050497549385e-07, + "loss": 0.0471, "step": 63950 }, { - "epoch": 4.749740086142879, - "grad_norm": 0.759530246257782, - "learning_rate": 1.50155948314273e-06, - "loss": 0.0409, + "epoch": 9.499480172285757, + "grad_norm": 1.281341552734375, + "learning_rate": 5.005198277142433e-07, + "loss": 0.0486, "step": 63960 }, { - "epoch": 4.750482697163226, - "grad_norm": 1.0763570070266724, - "learning_rate": 1.4971038170206445e-06, - "loss": 0.0513, + "epoch": 9.500965394326451, + "grad_norm": 0.6784372329711914, + "learning_rate": 4.990346056735483e-07, + "loss": 0.0428, "step": 63970 }, { - "epoch": 4.751225308183574, - "grad_norm": 0.6795147657394409, - "learning_rate": 1.4926481508985593e-06, - "loss": 0.0513, + "epoch": 9.502450616367147, + "grad_norm": 1.3833361864089966, + "learning_rate": 4.975493836328531e-07, + "loss": 0.0495, "step": 63980 }, { - "epoch": 4.751967919203921, - "grad_norm": 1.966013789176941, - "learning_rate": 1.488192484776474e-06, - "loss": 0.078, + "epoch": 9.503935838407841, + "grad_norm": 1.5026179552078247, + "learning_rate": 4.96064161592158e-07, + "loss": 0.0517, "step": 63990 }, { - "epoch": 4.7527105302242685, - "grad_norm": 1.7441941499710083, - "learning_rate": 1.4837368186543889e-06, - "loss": 0.0454, + "epoch": 9.505421060448537, + "grad_norm": 0.9454336762428284, + "learning_rate": 4.94578939551463e-07, + "loss": 0.0392, "step": 64000 }, { - "epoch": 4.753453141244616, - "grad_norm": 0.6811540722846985, - "learning_rate": 1.4792811525323037e-06, - "loss": 0.0395, + "epoch": 9.506906282489233, + "grad_norm": 0.583394467830658, + "learning_rate": 4.930937175107679e-07, + "loss": 0.0385, "step": 64010 }, { - "epoch": 4.754195752264963, - "grad_norm": 0.5677528381347656, - "learning_rate": 1.4748254864102184e-06, - "loss": 0.0369, + "epoch": 9.508391504529927, + "grad_norm": 1.3831298351287842, + "learning_rate": 4.916084954700729e-07, + "loss": 0.0557, "step": 64020 }, { - "epoch": 4.754938363285311, - "grad_norm": 0.3127375841140747, - "learning_rate": 1.4703698202881332e-06, - "loss": 0.0519, + "epoch": 9.509876726570623, + "grad_norm": 1.121113896369934, + "learning_rate": 4.901232734293777e-07, + "loss": 0.0477, "step": 64030 }, - { - "epoch": 4.755680974305658, - "grad_norm": 1.2747459411621094, - "learning_rate": 1.4659141541660478e-06, - "loss": 0.0507, + { + "epoch": 9.511361948611317, + "grad_norm": 0.5695465207099915, + "learning_rate": 4.886380513886826e-07, + "loss": 0.0412, "step": 64040 }, { - "epoch": 4.756423585326006, - "grad_norm": 1.9824333190917969, - "learning_rate": 1.4614584880439626e-06, - "loss": 0.0485, + "epoch": 9.512847170652012, + "grad_norm": 0.8473941683769226, + "learning_rate": 4.871528293479876e-07, + "loss": 0.0427, "step": 64050 }, { - "epoch": 4.757166196346354, - "grad_norm": 0.8089881539344788, - "learning_rate": 1.4570028219218774e-06, - "loss": 0.0495, + "epoch": 9.514332392692708, + "grad_norm": 0.5661032795906067, + "learning_rate": 4.856676073072924e-07, + "loss": 0.0598, "step": 64060 }, { - "epoch": 4.757908807366701, - "grad_norm": 0.6729184985160828, - "learning_rate": 1.4525471557997921e-06, - "loss": 0.0291, + "epoch": 9.515817614733402, + "grad_norm": 0.9760502576828003, + "learning_rate": 4.841823852665974e-07, + "loss": 0.0531, "step": 64070 }, { - "epoch": 4.758651418387049, - "grad_norm": 1.3998851776123047, - "learning_rate": 1.448091489677707e-06, - "loss": 0.0749, + "epoch": 9.517302836774098, + "grad_norm": 0.8279736042022705, + "learning_rate": 4.826971632259023e-07, + "loss": 0.0628, "step": 64080 }, { - "epoch": 4.759394029407396, - "grad_norm": 1.4854328632354736, - "learning_rate": 1.4436358235556217e-06, - "loss": 0.0579, + "epoch": 9.518788058814792, + "grad_norm": 1.024143934249878, + "learning_rate": 4.812119411852072e-07, + "loss": 0.0577, "step": 64090 }, { - "epoch": 4.760136640427744, - "grad_norm": 0.500187873840332, - "learning_rate": 1.4391801574335363e-06, - "loss": 0.0491, + "epoch": 9.520273280855488, + "grad_norm": 1.033553123474121, + "learning_rate": 4.797267191445122e-07, + "loss": 0.0648, "step": 64100 }, { - "epoch": 4.760879251448092, - "grad_norm": 2.0688111782073975, - "learning_rate": 1.434724491311451e-06, - "loss": 0.0442, + "epoch": 9.521758502896184, + "grad_norm": 0.7202749848365784, + "learning_rate": 4.78241497103817e-07, + "loss": 0.0499, "step": 64110 }, { - "epoch": 4.761621862468439, - "grad_norm": 0.8344265222549438, - "learning_rate": 1.4302688251893658e-06, - "loss": 0.0608, + "epoch": 9.523243724936878, + "grad_norm": 0.8777990937232971, + "learning_rate": 4.7675627506312193e-07, + "loss": 0.0526, "step": 64120 }, { - "epoch": 4.762364473488787, - "grad_norm": 0.7157622575759888, - "learning_rate": 1.4258131590672804e-06, - "loss": 0.0717, + "epoch": 9.524728946977573, + "grad_norm": 0.8504054546356201, + "learning_rate": 4.7527105302242684e-07, + "loss": 0.0525, "step": 64130 }, { - "epoch": 4.763107084509134, - "grad_norm": 2.2929582595825195, - "learning_rate": 1.4213574929451954e-06, - "loss": 0.0681, + "epoch": 9.526214169018267, + "grad_norm": 0.585706889629364, + "learning_rate": 4.7378583098173186e-07, + "loss": 0.0651, "step": 64140 }, { - "epoch": 4.763849695529482, - "grad_norm": 1.3758022785186768, - "learning_rate": 1.4169018268231102e-06, - "loss": 0.0542, + "epoch": 9.527699391058963, + "grad_norm": 0.19927702844142914, + "learning_rate": 4.7230060894103677e-07, + "loss": 0.0597, "step": 64150 }, { - "epoch": 4.7645923065498295, - "grad_norm": 1.4729808568954468, - "learning_rate": 1.4124461607010248e-06, - "loss": 0.0578, + "epoch": 9.529184613099659, + "grad_norm": 0.7638365626335144, + "learning_rate": 4.708153869003416e-07, + "loss": 0.0416, "step": 64160 }, { - "epoch": 4.7653349175701765, - "grad_norm": 3.680039882659912, - "learning_rate": 1.4079904945789395e-06, - "loss": 0.0774, + "epoch": 9.530669835140353, + "grad_norm": 0.7551804780960083, + "learning_rate": 4.6933016485964653e-07, + "loss": 0.0654, "step": 64170 }, { - "epoch": 4.766077528590524, - "grad_norm": 0.58192378282547, - "learning_rate": 1.4035348284568543e-06, - "loss": 0.0501, + "epoch": 9.532155057181049, + "grad_norm": 1.1386343240737915, + "learning_rate": 4.6784494281895144e-07, + "loss": 0.0447, "step": 64180 }, { - "epoch": 4.766820139610871, - "grad_norm": 0.3413379192352295, - "learning_rate": 1.3990791623347691e-06, - "loss": 0.0271, + "epoch": 9.533640279221743, + "grad_norm": 1.1652734279632568, + "learning_rate": 4.6635972077825635e-07, + "loss": 0.0648, "step": 64190 }, { - "epoch": 4.767562750631219, - "grad_norm": 1.4650648832321167, - "learning_rate": 1.3946234962126837e-06, - "loss": 0.0451, + "epoch": 9.535125501262439, + "grad_norm": 0.6755314469337463, + "learning_rate": 4.6487449873756126e-07, + "loss": 0.0594, "step": 64200 }, { - "epoch": 4.768305361651567, - "grad_norm": 0.2223232537508011, - "learning_rate": 1.3901678300905987e-06, - "loss": 0.0705, + "epoch": 9.536610723303134, + "grad_norm": 0.9879747033119202, + "learning_rate": 4.633892766968662e-07, + "loss": 0.042, "step": 64210 }, { - "epoch": 4.769047972671914, - "grad_norm": 3.3965070247650146, - "learning_rate": 1.3857121639685135e-06, - "loss": 0.0701, + "epoch": 9.538095945343828, + "grad_norm": 0.3094334900379181, + "learning_rate": 4.6190405465617113e-07, + "loss": 0.0528, "step": 64220 }, { - "epoch": 4.769790583692262, - "grad_norm": 1.7762930393218994, - "learning_rate": 1.381256497846428e-06, - "loss": 0.0888, + "epoch": 9.539581167384524, + "grad_norm": 1.2739931344985962, + "learning_rate": 4.6041883261547604e-07, + "loss": 0.0582, "step": 64230 }, { - "epoch": 4.770533194712609, - "grad_norm": 0.9293942451477051, - "learning_rate": 1.3768008317243428e-06, - "loss": 0.0653, + "epoch": 9.54106638942522, + "grad_norm": 1.297001838684082, + "learning_rate": 4.5893361057478095e-07, + "loss": 0.0574, "step": 64240 }, { - "epoch": 4.771275805732957, - "grad_norm": 1.2319601774215698, - "learning_rate": 1.3723451656022576e-06, - "loss": 0.0603, + "epoch": 9.542551611465914, + "grad_norm": 1.0937061309814453, + "learning_rate": 4.5744838853408586e-07, + "loss": 0.068, "step": 64250 }, { - "epoch": 4.772018416753305, - "grad_norm": 0.7350060343742371, - "learning_rate": 1.3678894994801722e-06, - "loss": 0.0535, + "epoch": 9.54403683350661, + "grad_norm": 1.086991548538208, + "learning_rate": 4.559631664933908e-07, + "loss": 0.048, "step": 64260 }, { - "epoch": 4.772761027773652, - "grad_norm": 0.3854759633541107, - "learning_rate": 1.3634338333580872e-06, - "loss": 0.041, + "epoch": 9.545522055547304, + "grad_norm": 0.9579164385795593, + "learning_rate": 4.5447794445269574e-07, + "loss": 0.0561, "step": 64270 }, { - "epoch": 4.773503638794, - "grad_norm": 1.7290388345718384, - "learning_rate": 1.358978167236002e-06, - "loss": 0.0524, + "epoch": 9.547007277588, + "grad_norm": 1.18247389793396, + "learning_rate": 4.5299272241200065e-07, + "loss": 0.0608, "step": 64280 }, { - "epoch": 4.774246249814347, - "grad_norm": 1.4731237888336182, - "learning_rate": 1.3545225011139165e-06, - "loss": 0.0818, + "epoch": 9.548492499628695, + "grad_norm": 1.123586654663086, + "learning_rate": 4.5150750037130556e-07, + "loss": 0.0636, "step": 64290 }, { - "epoch": 4.774988860834695, - "grad_norm": 1.8884638547897339, - "learning_rate": 1.3500668349918313e-06, - "loss": 0.0443, + "epoch": 9.54997772166939, + "grad_norm": 0.5111974477767944, + "learning_rate": 4.5002227833061047e-07, + "loss": 0.0587, "step": 64300 }, { - "epoch": 4.7757314718550425, - "grad_norm": 1.970607042312622, - "learning_rate": 1.345611168869746e-06, - "loss": 0.0766, + "epoch": 9.551462943710085, + "grad_norm": 0.7695721387863159, + "learning_rate": 4.485370562899154e-07, + "loss": 0.0662, "step": 64310 }, { - "epoch": 4.7764740828753895, - "grad_norm": 0.458402544260025, - "learning_rate": 1.3411555027476606e-06, - "loss": 0.067, + "epoch": 9.552948165750779, + "grad_norm": 0.7634204626083374, + "learning_rate": 4.470518342492203e-07, + "loss": 0.0388, "step": 64320 }, { - "epoch": 4.777216693895737, - "grad_norm": 1.3864773511886597, - "learning_rate": 1.3366998366255754e-06, - "loss": 0.0479, + "epoch": 9.554433387791475, + "grad_norm": 1.0317970514297485, + "learning_rate": 4.455666122085252e-07, + "loss": 0.0553, "step": 64330 }, { - "epoch": 4.777959304916085, - "grad_norm": 3.7125790119171143, - "learning_rate": 1.3322441705034904e-06, - "loss": 0.088, + "epoch": 9.55591860983217, + "grad_norm": 0.6158688068389893, + "learning_rate": 4.4408139016783016e-07, + "loss": 0.0465, "step": 64340 }, { - "epoch": 4.778701915936432, - "grad_norm": 0.38081493973731995, - "learning_rate": 1.3277885043814052e-06, - "loss": 0.0677, + "epoch": 9.557403831872865, + "grad_norm": 1.1406865119934082, + "learning_rate": 4.4259616812713507e-07, + "loss": 0.0548, "step": 64350 }, { - "epoch": 4.77944452695678, - "grad_norm": 1.2474788427352905, - "learning_rate": 1.3233328382593198e-06, - "loss": 0.0681, + "epoch": 9.55888905391356, + "grad_norm": 0.5441305041313171, + "learning_rate": 4.4111094608644e-07, + "loss": 0.0503, "step": 64360 }, { - "epoch": 4.780187137977127, - "grad_norm": 0.9167222380638123, - "learning_rate": 1.3188771721372346e-06, - "loss": 0.0884, + "epoch": 9.560374275954254, + "grad_norm": 1.8708332777023315, + "learning_rate": 4.396257240457449e-07, + "loss": 0.0773, "step": 64370 }, { - "epoch": 4.780929748997475, - "grad_norm": 1.0856194496154785, - "learning_rate": 1.3144215060151493e-06, - "loss": 0.0562, + "epoch": 9.56185949799495, + "grad_norm": 0.9381576776504517, + "learning_rate": 4.381405020050498e-07, + "loss": 0.0469, "step": 64380 }, { - "epoch": 4.781672360017823, - "grad_norm": 1.6708109378814697, - "learning_rate": 1.309965839893064e-06, - "loss": 0.1026, + "epoch": 9.563344720035646, + "grad_norm": 1.1940032243728638, + "learning_rate": 4.3665527996435465e-07, + "loss": 0.0534, "step": 64390 }, { - "epoch": 4.78241497103817, - "grad_norm": 1.4255729913711548, - "learning_rate": 1.305510173770979e-06, - "loss": 0.064, + "epoch": 9.56482994207634, + "grad_norm": 0.8630006313323975, + "learning_rate": 4.3517005792365967e-07, + "loss": 0.0506, "step": 64400 }, { - "epoch": 4.783157582058518, - "grad_norm": 0.8202357292175293, - "learning_rate": 1.3010545076488937e-06, - "loss": 0.0501, + "epoch": 9.566315164117036, + "grad_norm": 0.6426660418510437, + "learning_rate": 4.336848358829646e-07, + "loss": 0.0576, "step": 64410 }, { - "epoch": 4.783900193078865, - "grad_norm": 1.9395742416381836, - "learning_rate": 1.2965988415268083e-06, - "loss": 0.0641, + "epoch": 9.56780038615773, + "grad_norm": 0.9737311005592346, + "learning_rate": 4.321996138422695e-07, + "loss": 0.0561, "step": 64420 }, { - "epoch": 4.784642804099213, - "grad_norm": 0.4814720153808594, - "learning_rate": 1.292143175404723e-06, - "loss": 0.0567, + "epoch": 9.569285608198426, + "grad_norm": 1.9430110454559326, + "learning_rate": 4.3071439180157435e-07, + "loss": 0.0543, "step": 64430 }, { - "epoch": 4.785385415119561, - "grad_norm": 1.5214014053344727, - "learning_rate": 1.2876875092826378e-06, - "loss": 0.0261, + "epoch": 9.570770830239121, + "grad_norm": 0.870077908039093, + "learning_rate": 4.2922916976087926e-07, + "loss": 0.0476, "step": 64440 }, { - "epoch": 4.786128026139908, - "grad_norm": 1.4035849571228027, - "learning_rate": 1.2832318431605524e-06, - "loss": 0.0424, + "epoch": 9.572256052279815, + "grad_norm": 0.4826265871524811, + "learning_rate": 4.2774394772018417e-07, + "loss": 0.057, "step": 64450 }, { - "epoch": 4.786870637160256, - "grad_norm": 1.3969433307647705, - "learning_rate": 1.2787761770384672e-06, - "loss": 0.0411, + "epoch": 9.573741274320511, + "grad_norm": 0.300932377576828, + "learning_rate": 4.262587256794891e-07, + "loss": 0.0556, "step": 64460 }, { - "epoch": 4.787613248180603, - "grad_norm": 0.7165231704711914, - "learning_rate": 1.2743205109163822e-06, - "loss": 0.0555, + "epoch": 9.575226496361205, + "grad_norm": 1.122602105140686, + "learning_rate": 4.2477350363879404e-07, + "loss": 0.0619, "step": 64470 }, { - "epoch": 4.7883558592009505, - "grad_norm": 2.941373825073242, - "learning_rate": 1.2698648447942967e-06, - "loss": 0.0755, + "epoch": 9.576711718401901, + "grad_norm": 0.3875451982021332, + "learning_rate": 4.2328828159809895e-07, + "loss": 0.0709, "step": 64480 }, { - "epoch": 4.789098470221298, - "grad_norm": 0.6511389017105103, - "learning_rate": 1.2654091786722115e-06, - "loss": 0.0216, + "epoch": 9.578196940442597, + "grad_norm": 1.1015067100524902, + "learning_rate": 4.2180305955740386e-07, + "loss": 0.0514, "step": 64490 }, { - "epoch": 4.789841081241645, - "grad_norm": 0.8992854952812195, - "learning_rate": 1.2609535125501263e-06, - "loss": 0.0711, + "epoch": 9.57968216248329, + "grad_norm": 0.6941787600517273, + "learning_rate": 4.2031783751670877e-07, + "loss": 0.0719, "step": 64500 }, { - "epoch": 4.790583692261993, - "grad_norm": 1.678450107574463, - "learning_rate": 1.256497846428041e-06, - "loss": 0.0658, + "epoch": 9.581167384523987, + "grad_norm": 0.47489145398139954, + "learning_rate": 4.188326154760137e-07, + "loss": 0.0503, "step": 64510 }, { - "epoch": 4.79132630328234, - "grad_norm": 1.1826390027999878, - "learning_rate": 1.2520421803059557e-06, - "loss": 0.041, + "epoch": 9.58265260656468, + "grad_norm": 1.3649030923843384, + "learning_rate": 4.173473934353186e-07, + "loss": 0.0766, "step": 64520 }, { - "epoch": 4.792068914302688, - "grad_norm": 1.712506651878357, - "learning_rate": 1.2475865141838704e-06, - "loss": 0.0582, + "epoch": 9.584137828605376, + "grad_norm": 0.6902500987052917, + "learning_rate": 4.158621713946235e-07, + "loss": 0.0496, "step": 64530 }, { - "epoch": 4.792811525323036, - "grad_norm": 1.0903819799423218, - "learning_rate": 1.2431308480617854e-06, - "loss": 0.0657, + "epoch": 9.585623050646072, + "grad_norm": 0.4888577163219452, + "learning_rate": 4.1437694935392846e-07, + "loss": 0.0328, "step": 64540 }, { - "epoch": 4.793554136343383, - "grad_norm": 1.190956950187683, - "learning_rate": 1.2386751819397e-06, - "loss": 0.0733, + "epoch": 9.587108272686766, + "grad_norm": 0.9694914221763611, + "learning_rate": 4.1289172731323337e-07, + "loss": 0.0476, "step": 64550 }, { - "epoch": 4.794296747363731, - "grad_norm": 3.9811456203460693, - "learning_rate": 1.2342195158176148e-06, - "loss": 0.0726, + "epoch": 9.588593494727462, + "grad_norm": 0.3271615207195282, + "learning_rate": 4.114065052725383e-07, + "loss": 0.0367, "step": 64560 }, { - "epoch": 4.795039358384079, - "grad_norm": 0.5482442378997803, - "learning_rate": 1.2297638496955296e-06, - "loss": 0.0801, + "epoch": 9.590078716768156, + "grad_norm": 1.1771557331085205, + "learning_rate": 4.099212832318432e-07, + "loss": 0.0502, "step": 64570 }, { - "epoch": 4.795781969404426, - "grad_norm": 0.27074581384658813, - "learning_rate": 1.2253081835734441e-06, - "loss": 0.0592, + "epoch": 9.591563938808852, + "grad_norm": 0.8279902935028076, + "learning_rate": 4.084360611911481e-07, + "loss": 0.0617, "step": 64580 }, { - "epoch": 4.796524580424774, - "grad_norm": 1.4098689556121826, - "learning_rate": 1.220852517451359e-06, - "loss": 0.0702, + "epoch": 9.593049160849548, + "grad_norm": 1.2852143049240112, + "learning_rate": 4.06950839150453e-07, + "loss": 0.0519, "step": 64590 }, { - "epoch": 4.797267191445121, - "grad_norm": 1.838658332824707, - "learning_rate": 1.216396851329274e-06, - "loss": 0.0521, + "epoch": 9.594534382890242, + "grad_norm": 0.7560584545135498, + "learning_rate": 4.0546561710975797e-07, + "loss": 0.0485, "step": 64600 }, { - "epoch": 4.798009802465469, - "grad_norm": 1.1644221544265747, - "learning_rate": 1.2119411852071885e-06, - "loss": 0.0518, + "epoch": 9.596019604930937, + "grad_norm": 0.2690260112285614, + "learning_rate": 4.039803950690629e-07, + "loss": 0.0352, "step": 64610 }, { - "epoch": 4.798752413485817, - "grad_norm": 1.8020392656326294, - "learning_rate": 1.2074855190851033e-06, - "loss": 0.0675, + "epoch": 9.597504826971631, + "grad_norm": 1.46587073802948, + "learning_rate": 4.024951730283678e-07, + "loss": 0.0555, "step": 64620 }, { - "epoch": 4.799495024506164, - "grad_norm": 1.1796648502349854, - "learning_rate": 1.203029852963018e-06, - "loss": 0.0479, + "epoch": 9.598990049012327, + "grad_norm": 0.5200058817863464, + "learning_rate": 4.010099509876727e-07, + "loss": 0.0481, "step": 64630 }, { - "epoch": 4.8002376355265115, - "grad_norm": 2.24526047706604, - "learning_rate": 1.1985741868409326e-06, - "loss": 0.0374, + "epoch": 9.600475271053023, + "grad_norm": 1.343431830406189, + "learning_rate": 3.995247289469776e-07, + "loss": 0.0496, "step": 64640 }, { - "epoch": 4.8009802465468585, - "grad_norm": 0.3404271900653839, - "learning_rate": 1.1941185207188474e-06, - "loss": 0.0318, + "epoch": 9.601960493093717, + "grad_norm": 0.7409623861312866, + "learning_rate": 3.980395069062825e-07, + "loss": 0.0438, "step": 64650 }, { - "epoch": 4.801722857567206, - "grad_norm": 0.40079450607299805, - "learning_rate": 1.1896628545967622e-06, - "loss": 0.0397, + "epoch": 9.603445715134413, + "grad_norm": 1.2803033590316772, + "learning_rate": 3.965542848655874e-07, + "loss": 0.0542, "step": 64660 }, { - "epoch": 4.802465468587554, - "grad_norm": 1.3549119234085083, - "learning_rate": 1.185207188474677e-06, - "loss": 0.0597, + "epoch": 9.604930937175109, + "grad_norm": 0.5689689517021179, + "learning_rate": 3.950690628248924e-07, + "loss": 0.0488, "step": 64670 }, { - "epoch": 4.803208079607901, - "grad_norm": 0.6947437524795532, - "learning_rate": 1.1807515223525918e-06, - "loss": 0.0653, + "epoch": 9.606416159215803, + "grad_norm": 0.8184802532196045, + "learning_rate": 3.935838407841973e-07, + "loss": 0.0418, "step": 64680 }, { - "epoch": 4.803950690628249, - "grad_norm": 0.9724387526512146, - "learning_rate": 1.1762958562305065e-06, - "loss": 0.0544, + "epoch": 9.607901381256498, + "grad_norm": 0.913433849811554, + "learning_rate": 3.920986187435022e-07, + "loss": 0.041, "step": 64690 }, { - "epoch": 4.804693301648596, - "grad_norm": 0.9956182241439819, - "learning_rate": 1.1718401901084213e-06, - "loss": 0.0393, + "epoch": 9.609386603297192, + "grad_norm": 0.21632570028305054, + "learning_rate": 3.9061339670280707e-07, + "loss": 0.0534, "step": 64700 }, { - "epoch": 4.805435912668944, - "grad_norm": 0.9624569416046143, - "learning_rate": 1.1673845239863359e-06, - "loss": 0.0534, + "epoch": 9.610871825337888, + "grad_norm": 1.04236900806427, + "learning_rate": 3.89128174662112e-07, + "loss": 0.0563, "step": 64710 }, { - "epoch": 4.806178523689292, - "grad_norm": 2.9609172344207764, - "learning_rate": 1.1629288578642507e-06, - "loss": 0.054, + "epoch": 9.612357047378584, + "grad_norm": 1.2642252445220947, + "learning_rate": 3.876429526214169e-07, + "loss": 0.0603, "step": 64720 }, { - "epoch": 4.806921134709639, - "grad_norm": 1.744933009147644, - "learning_rate": 1.1584731917421657e-06, - "loss": 0.0621, + "epoch": 9.613842269419278, + "grad_norm": 1.1171890497207642, + "learning_rate": 3.861577305807219e-07, + "loss": 0.0575, "step": 64730 }, { - "epoch": 4.807663745729987, - "grad_norm": 0.723945677280426, - "learning_rate": 1.1540175256200802e-06, - "loss": 0.042, + "epoch": 9.615327491459974, + "grad_norm": 0.5049655437469482, + "learning_rate": 3.8467250854002676e-07, + "loss": 0.0386, "step": 64740 }, { - "epoch": 4.808406356750334, - "grad_norm": 1.1456420421600342, - "learning_rate": 1.149561859497995e-06, - "loss": 0.0395, + "epoch": 9.616812713500668, + "grad_norm": 0.8653619885444641, + "learning_rate": 3.8318728649933167e-07, + "loss": 0.0452, "step": 64750 }, { - "epoch": 4.809148967770682, - "grad_norm": 2.3802542686462402, - "learning_rate": 1.1451061933759098e-06, - "loss": 0.0922, + "epoch": 9.618297935541364, + "grad_norm": 1.2991448640823364, + "learning_rate": 3.817020644586366e-07, + "loss": 0.039, "step": 64760 }, { - "epoch": 4.80989157879103, - "grad_norm": 0.3124806880950928, - "learning_rate": 1.1406505272538244e-06, - "loss": 0.0576, + "epoch": 9.61978315758206, + "grad_norm": 0.8051328063011169, + "learning_rate": 3.802168424179415e-07, + "loss": 0.0538, "step": 64770 }, { - "epoch": 4.810634189811377, - "grad_norm": 0.9946483373641968, - "learning_rate": 1.1361948611317392e-06, - "loss": 0.0601, + "epoch": 9.621268379622753, + "grad_norm": 0.7863057255744934, + "learning_rate": 3.787316203772464e-07, + "loss": 0.0519, "step": 64780 }, { - "epoch": 4.811376800831725, - "grad_norm": 2.7371938228607178, - "learning_rate": 1.131739195009654e-06, - "loss": 0.0527, + "epoch": 9.62275360166345, + "grad_norm": 0.9942373633384705, + "learning_rate": 3.772463983365513e-07, + "loss": 0.0544, "step": 64790 }, { - "epoch": 4.812119411852072, - "grad_norm": 1.2650768756866455, - "learning_rate": 1.1272835288875687e-06, - "loss": 0.0535, + "epoch": 9.624238823704143, + "grad_norm": 0.5974944829940796, + "learning_rate": 3.757611762958563e-07, + "loss": 0.0437, "step": 64800 }, { - "epoch": 4.8128620228724195, - "grad_norm": 1.1405729055404663, - "learning_rate": 1.1228278627654835e-06, - "loss": 0.0361, + "epoch": 9.625724045744839, + "grad_norm": 1.216723084449768, + "learning_rate": 3.742759542551612e-07, + "loss": 0.0553, "step": 64810 }, { - "epoch": 4.813604633892767, - "grad_norm": 1.991651177406311, - "learning_rate": 1.1183721966433983e-06, - "loss": 0.0721, + "epoch": 9.627209267785535, + "grad_norm": 0.6732010841369629, + "learning_rate": 3.727907322144661e-07, + "loss": 0.0553, "step": 64820 }, { - "epoch": 4.814347244913114, - "grad_norm": 2.725224018096924, - "learning_rate": 1.1139165305213129e-06, - "loss": 0.0446, + "epoch": 9.628694489826229, + "grad_norm": 0.4387213885784149, + "learning_rate": 3.71305510173771e-07, + "loss": 0.0401, "step": 64830 }, { - "epoch": 4.815089855933462, - "grad_norm": 0.7116602063179016, - "learning_rate": 1.1094608643992276e-06, - "loss": 0.0459, + "epoch": 9.630179711866925, + "grad_norm": 1.1491668224334717, + "learning_rate": 3.698202881330759e-07, + "loss": 0.0447, "step": 64840 }, { - "epoch": 4.815832466953809, - "grad_norm": 1.70388925075531, - "learning_rate": 1.1050051982771424e-06, - "loss": 0.07, + "epoch": 9.631664933907619, + "grad_norm": 0.705153226852417, + "learning_rate": 3.683350660923808e-07, + "loss": 0.0477, "step": 64850 }, { - "epoch": 4.816575077974157, - "grad_norm": 0.45246338844299316, - "learning_rate": 1.1005495321550574e-06, - "loss": 0.0477, + "epoch": 9.633150155948314, + "grad_norm": 2.005906581878662, + "learning_rate": 3.668498440516858e-07, + "loss": 0.0698, "step": 64860 }, { - "epoch": 4.817317688994505, - "grad_norm": 2.529362678527832, - "learning_rate": 1.096093866032972e-06, - "loss": 0.0773, + "epoch": 9.63463537798901, + "grad_norm": 0.6410548686981201, + "learning_rate": 3.653646220109907e-07, + "loss": 0.0496, "step": 64870 }, { - "epoch": 4.818060300014852, - "grad_norm": 2.60267972946167, - "learning_rate": 1.0916381999108868e-06, - "loss": 0.0683, + "epoch": 9.636120600029704, + "grad_norm": 0.34218358993530273, + "learning_rate": 3.638793999702956e-07, + "loss": 0.0576, "step": 64880 }, { - "epoch": 4.8188029110352, - "grad_norm": 2.9916200637817383, - "learning_rate": 1.0871825337888016e-06, - "loss": 0.0413, + "epoch": 9.6376058220704, + "grad_norm": 0.3363538682460785, + "learning_rate": 3.623941779296005e-07, + "loss": 0.0474, "step": 64890 }, { - "epoch": 4.819545522055547, - "grad_norm": 1.2470331192016602, - "learning_rate": 1.0827268676667161e-06, - "loss": 0.0669, + "epoch": 9.639091044111094, + "grad_norm": 1.0393438339233398, + "learning_rate": 3.6090895588890543e-07, + "loss": 0.0597, "step": 64900 }, { - "epoch": 4.820288133075895, - "grad_norm": 1.2445175647735596, - "learning_rate": 1.078271201544631e-06, - "loss": 0.0672, + "epoch": 9.64057626615179, + "grad_norm": 0.566709578037262, + "learning_rate": 3.5942373384821034e-07, + "loss": 0.0523, "step": 64910 }, { - "epoch": 4.821030744096243, - "grad_norm": 0.964076042175293, - "learning_rate": 1.0738155354225457e-06, - "loss": 0.0642, + "epoch": 9.642061488192486, + "grad_norm": 1.6289558410644531, + "learning_rate": 3.5793851180751525e-07, + "loss": 0.055, "step": 64920 }, { - "epoch": 4.82177335511659, - "grad_norm": 0.8829526901245117, - "learning_rate": 1.0693598693004605e-06, - "loss": 0.0695, + "epoch": 9.64354671023318, + "grad_norm": 0.6053251624107361, + "learning_rate": 3.564532897668202e-07, + "loss": 0.0396, "step": 64930 }, { - "epoch": 4.822515966136938, - "grad_norm": 1.5874435901641846, - "learning_rate": 1.0649042031783753e-06, - "loss": 0.052, + "epoch": 9.645031932273875, + "grad_norm": 3.9865386486053467, + "learning_rate": 3.549680677261251e-07, + "loss": 0.0496, "step": 64940 }, { - "epoch": 4.823258577157285, - "grad_norm": 1.9532781839370728, - "learning_rate": 1.06044853705629e-06, - "loss": 0.0823, + "epoch": 9.64651715431457, + "grad_norm": 1.3346593379974365, + "learning_rate": 3.5348284568543003e-07, + "loss": 0.0708, "step": 64950 }, { - "epoch": 4.824001188177633, - "grad_norm": 1.2501765489578247, - "learning_rate": 1.0559928709342046e-06, - "loss": 0.076, + "epoch": 9.648002376355265, + "grad_norm": 0.7871192097663879, + "learning_rate": 3.5199762364473494e-07, + "loss": 0.0437, "step": 64960 }, { - "epoch": 4.8247437991979805, - "grad_norm": 1.186736822128296, - "learning_rate": 1.0515372048121194e-06, - "loss": 0.0524, + "epoch": 9.649487598395961, + "grad_norm": 0.5941735506057739, + "learning_rate": 3.505124016040398e-07, + "loss": 0.0626, "step": 64970 }, { - "epoch": 4.8254864102183275, - "grad_norm": 1.532196044921875, - "learning_rate": 1.0470815386900342e-06, - "loss": 0.0546, + "epoch": 9.650972820436655, + "grad_norm": 1.1441556215286255, + "learning_rate": 3.490271795633447e-07, + "loss": 0.0843, "step": 64980 }, { - "epoch": 4.826229021238675, - "grad_norm": 0.5501788854598999, - "learning_rate": 1.0426258725679487e-06, - "loss": 0.0407, + "epoch": 9.65245804247735, + "grad_norm": 0.8148675560951233, + "learning_rate": 3.475419575226496e-07, + "loss": 0.0501, "step": 64990 }, { - "epoch": 4.826971632259022, - "grad_norm": 0.5295414328575134, - "learning_rate": 1.0381702064458637e-06, - "loss": 0.0698, + "epoch": 9.653943264518045, + "grad_norm": 1.1982941627502441, + "learning_rate": 3.4605673548195463e-07, + "loss": 0.0516, "step": 65000 }, { - "epoch": 4.82771424327937, - "grad_norm": 2.7445316314697266, - "learning_rate": 1.0337145403237785e-06, - "loss": 0.0864, + "epoch": 9.65542848655874, + "grad_norm": 0.9795641303062439, + "learning_rate": 3.445715134412595e-07, + "loss": 0.0561, "step": 65010 }, { - "epoch": 4.828456854299718, - "grad_norm": 2.2121715545654297, - "learning_rate": 1.029258874201693e-06, - "loss": 0.0606, + "epoch": 9.656913708599436, + "grad_norm": 2.1101455688476562, + "learning_rate": 3.430862914005644e-07, + "loss": 0.0602, "step": 65020 }, { - "epoch": 4.829199465320065, - "grad_norm": 1.5435631275177002, - "learning_rate": 1.0248032080796079e-06, - "loss": 0.0572, + "epoch": 9.65839893064013, + "grad_norm": 0.5057744979858398, + "learning_rate": 3.416010693598693e-07, + "loss": 0.0575, "step": 65030 }, { - "epoch": 4.829942076340413, - "grad_norm": 1.4504753351211548, - "learning_rate": 1.0203475419575227e-06, - "loss": 0.0387, + "epoch": 9.659884152680826, + "grad_norm": 0.9282684326171875, + "learning_rate": 3.401158473191742e-07, + "loss": 0.05, "step": 65040 }, { - "epoch": 4.83068468736076, - "grad_norm": 2.8633036613464355, - "learning_rate": 1.0158918758354374e-06, - "loss": 0.0621, + "epoch": 9.66136937472152, + "grad_norm": 1.1378172636032104, + "learning_rate": 3.3863062527847913e-07, + "loss": 0.06, "step": 65050 }, { - "epoch": 4.831427298381108, - "grad_norm": 2.3440101146698, - "learning_rate": 1.0114362097133522e-06, - "loss": 0.0797, + "epoch": 9.662854596762216, + "grad_norm": 0.2000456601381302, + "learning_rate": 3.371454032377841e-07, + "loss": 0.0675, "step": 65060 }, { - "epoch": 4.832169909401456, - "grad_norm": 0.962746262550354, - "learning_rate": 1.006980543591267e-06, - "loss": 0.0636, + "epoch": 9.664339818802912, + "grad_norm": 0.8669431805610657, + "learning_rate": 3.35660181197089e-07, + "loss": 0.0415, "step": 65070 }, { - "epoch": 4.832912520421803, - "grad_norm": 0.8248947262763977, - "learning_rate": 1.0025248774691818e-06, - "loss": 0.0721, + "epoch": 9.665825040843606, + "grad_norm": 0.8875764012336731, + "learning_rate": 3.341749591563939e-07, + "loss": 0.0588, "step": 65080 }, { - "epoch": 4.833655131442151, - "grad_norm": 1.0095185041427612, - "learning_rate": 9.980692113470964e-07, - "loss": 0.036, + "epoch": 9.667310262884302, + "grad_norm": 1.0086852312088013, + "learning_rate": 3.326897371156988e-07, + "loss": 0.046, "step": 65090 }, { - "epoch": 4.834397742462498, - "grad_norm": 1.019490122795105, - "learning_rate": 9.936135452250111e-07, - "loss": 0.0576, + "epoch": 9.668795484924996, + "grad_norm": 0.4355565309524536, + "learning_rate": 3.3120451507500373e-07, + "loss": 0.0575, "step": 65100 }, { - "epoch": 4.835140353482846, - "grad_norm": 0.30174383521080017, - "learning_rate": 9.89157879102926e-07, - "loss": 0.0483, + "epoch": 9.670280706965691, + "grad_norm": 0.9262057542800903, + "learning_rate": 3.2971929303430864e-07, + "loss": 0.0591, "step": 65110 }, { - "epoch": 4.8358829645031935, - "grad_norm": 1.3941235542297363, - "learning_rate": 9.847022129808405e-07, - "loss": 0.0886, + "epoch": 9.671765929006387, + "grad_norm": 1.1087294816970825, + "learning_rate": 3.2823407099361355e-07, + "loss": 0.056, "step": 65120 }, { - "epoch": 4.8366255755235406, - "grad_norm": 2.1527700424194336, - "learning_rate": 9.802465468587555e-07, - "loss": 0.039, + "epoch": 9.673251151047081, + "grad_norm": 0.8267140984535217, + "learning_rate": 3.267488489529185e-07, + "loss": 0.0586, "step": 65130 }, { - "epoch": 4.8373681865438884, - "grad_norm": 1.5553241968154907, - "learning_rate": 9.757908807366703e-07, - "loss": 0.0417, + "epoch": 9.674736373087777, + "grad_norm": 0.963013768196106, + "learning_rate": 3.252636269122234e-07, + "loss": 0.0535, "step": 65140 }, { - "epoch": 4.8381107975642355, - "grad_norm": 1.5481144189834595, - "learning_rate": 9.713352146145848e-07, - "loss": 0.0638, + "epoch": 9.676221595128471, + "grad_norm": 0.8553949594497681, + "learning_rate": 3.2377840487152833e-07, + "loss": 0.0629, "step": 65150 }, { - "epoch": 4.838853408584583, - "grad_norm": 2.373396873474121, - "learning_rate": 9.668795484924996e-07, - "loss": 0.0551, + "epoch": 9.677706817169167, + "grad_norm": 0.4256061613559723, + "learning_rate": 3.2229318283083324e-07, + "loss": 0.0439, "step": 65160 }, { - "epoch": 4.839596019604931, - "grad_norm": 1.2677528858184814, - "learning_rate": 9.624238823704144e-07, - "loss": 0.0624, + "epoch": 9.679192039209862, + "grad_norm": 0.9870197176933289, + "learning_rate": 3.2080796079013815e-07, + "loss": 0.0854, "step": 65170 }, { - "epoch": 4.840338630625278, - "grad_norm": 3.198784589767456, - "learning_rate": 9.57968216248329e-07, - "loss": 0.0483, + "epoch": 9.680677261250556, + "grad_norm": 1.0996114015579224, + "learning_rate": 3.1932273874944306e-07, + "loss": 0.058, "step": 65180 }, { - "epoch": 4.841081241645626, - "grad_norm": 0.36906376481056213, - "learning_rate": 9.53512550126244e-07, - "loss": 0.0587, + "epoch": 9.682162483291252, + "grad_norm": 0.41394004225730896, + "learning_rate": 3.17837516708748e-07, + "loss": 0.0468, "step": 65190 }, { - "epoch": 4.841823852665973, - "grad_norm": 1.4597629308700562, - "learning_rate": 9.490568840041587e-07, - "loss": 0.0588, + "epoch": 9.683647705331946, + "grad_norm": 0.959151029586792, + "learning_rate": 3.1635229466805293e-07, + "loss": 0.0532, "step": 65200 }, { - "epoch": 4.842566463686321, - "grad_norm": 1.0740894079208374, - "learning_rate": 9.446012178820734e-07, - "loss": 0.0312, + "epoch": 9.685132927372642, + "grad_norm": 0.5184232592582703, + "learning_rate": 3.1486707262735784e-07, + "loss": 0.0641, "step": 65210 }, { - "epoch": 4.843309074706669, - "grad_norm": 2.1893422603607178, - "learning_rate": 9.401455517599881e-07, - "loss": 0.088, + "epoch": 9.686618149413338, + "grad_norm": 2.521256923675537, + "learning_rate": 3.1338185058666275e-07, + "loss": 0.0403, "step": 65220 }, { - "epoch": 4.844051685727016, - "grad_norm": 2.3881919384002686, - "learning_rate": 9.356898856379029e-07, - "loss": 0.0389, + "epoch": 9.688103371454032, + "grad_norm": 0.7861289978027344, + "learning_rate": 3.1189662854596766e-07, + "loss": 0.0553, "step": 65230 }, { - "epoch": 4.844794296747364, - "grad_norm": 0.9090191721916199, - "learning_rate": 9.312342195158177e-07, - "loss": 0.0574, + "epoch": 9.689588593494728, + "grad_norm": 1.0370820760726929, + "learning_rate": 3.1041140650527257e-07, + "loss": 0.0509, "step": 65240 }, { - "epoch": 4.845536907767711, - "grad_norm": 2.5130159854888916, - "learning_rate": 9.267785533937324e-07, - "loss": 0.0519, + "epoch": 9.691073815535422, + "grad_norm": 0.29477956891059875, + "learning_rate": 3.089261844645775e-07, + "loss": 0.0426, "step": 65250 }, { - "epoch": 4.846279518788059, - "grad_norm": 0.5649994611740112, - "learning_rate": 9.223228872716471e-07, - "loss": 0.0314, + "epoch": 9.692559037576117, + "grad_norm": 1.420774221420288, + "learning_rate": 3.074409624238824e-07, + "loss": 0.0638, "step": 65260 }, { - "epoch": 4.847022129808407, - "grad_norm": 1.371864914894104, - "learning_rate": 9.178672211495618e-07, - "loss": 0.0652, + "epoch": 9.694044259616813, + "grad_norm": 1.197407603263855, + "learning_rate": 3.059557403831873e-07, + "loss": 0.0515, "step": 65270 }, { - "epoch": 4.847764740828754, - "grad_norm": 0.3888418972492218, - "learning_rate": 9.134115550274767e-07, - "loss": 0.0693, + "epoch": 9.695529481657507, + "grad_norm": 0.4081036150455475, + "learning_rate": 3.044705183424922e-07, + "loss": 0.0447, "step": 65280 }, { - "epoch": 4.8485073518491015, - "grad_norm": 2.402367115020752, - "learning_rate": 9.089558889053914e-07, - "loss": 0.0578, + "epoch": 9.697014703698203, + "grad_norm": 0.3299735188484192, + "learning_rate": 3.029852963017971e-07, + "loss": 0.0448, "step": 65290 }, { - "epoch": 4.8492499628694485, - "grad_norm": 3.145559310913086, - "learning_rate": 9.04500222783306e-07, - "loss": 0.0581, + "epoch": 9.698499925738897, + "grad_norm": 0.8112266063690186, + "learning_rate": 3.0150007426110203e-07, + "loss": 0.0547, "step": 65300 }, { - "epoch": 4.849992573889796, - "grad_norm": 0.6334661245346069, - "learning_rate": 9.000445566612209e-07, - "loss": 0.07, + "epoch": 9.699985147779593, + "grad_norm": 0.9002323150634766, + "learning_rate": 3.00014852220407e-07, + "loss": 0.0586, "step": 65310 }, { - "epoch": 4.850735184910144, - "grad_norm": 1.6239862442016602, - "learning_rate": 8.955888905391356e-07, - "loss": 0.0437, + "epoch": 9.701470369820289, + "grad_norm": 0.8222044110298157, + "learning_rate": 2.985296301797119e-07, + "loss": 0.0458, "step": 65320 }, { - "epoch": 4.851477795930491, - "grad_norm": 2.169163942337036, - "learning_rate": 8.911332244170504e-07, - "loss": 0.0579, + "epoch": 9.702955591860983, + "grad_norm": 0.6172053813934326, + "learning_rate": 2.970444081390168e-07, + "loss": 0.0619, "step": 65330 }, { - "epoch": 4.852220406950839, - "grad_norm": 0.9393569827079773, - "learning_rate": 8.866775582949652e-07, - "loss": 0.0424, + "epoch": 9.704440813901678, + "grad_norm": 1.5769612789154053, + "learning_rate": 2.955591860983217e-07, + "loss": 0.0713, "step": 65340 }, { - "epoch": 4.852963017971186, - "grad_norm": 0.3016482889652252, - "learning_rate": 8.822218921728799e-07, - "loss": 0.0443, + "epoch": 9.705926035942372, + "grad_norm": 0.9784587621688843, + "learning_rate": 2.9407396405762663e-07, + "loss": 0.0641, "step": 65350 }, { - "epoch": 4.853705628991534, - "grad_norm": 0.676237165927887, - "learning_rate": 8.777662260507946e-07, - "loss": 0.0511, + "epoch": 9.707411257983068, + "grad_norm": 1.5220768451690674, + "learning_rate": 2.9258874201693154e-07, + "loss": 0.0516, "step": 65360 }, { - "epoch": 4.854448240011882, - "grad_norm": 0.3696301579475403, - "learning_rate": 8.733105599287093e-07, - "loss": 0.0598, + "epoch": 9.708896480023764, + "grad_norm": 1.2147810459136963, + "learning_rate": 2.9110351997623645e-07, + "loss": 0.0451, "step": 65370 }, { - "epoch": 4.855190851032229, - "grad_norm": 4.8189921379089355, - "learning_rate": 8.688548938066241e-07, - "loss": 0.0632, + "epoch": 9.710381702064458, + "grad_norm": 1.1519248485565186, + "learning_rate": 2.896182979355414e-07, + "loss": 0.0766, "step": 65380 }, { - "epoch": 4.855933462052577, - "grad_norm": 0.5955801010131836, - "learning_rate": 8.643992276845389e-07, - "loss": 0.029, + "epoch": 9.711866924105154, + "grad_norm": 0.8308933973312378, + "learning_rate": 2.881330758948463e-07, + "loss": 0.0446, "step": 65390 }, { - "epoch": 4.856676073072924, - "grad_norm": 1.868445634841919, - "learning_rate": 8.599435615624536e-07, - "loss": 0.0496, + "epoch": 9.71335214614585, + "grad_norm": 0.7178075909614563, + "learning_rate": 2.866478538541512e-07, + "loss": 0.0647, "step": 65400 }, { - "epoch": 4.857418684093272, - "grad_norm": 0.6771336197853088, - "learning_rate": 8.554878954403684e-07, - "loss": 0.0484, + "epoch": 9.714837368186544, + "grad_norm": 0.5174522399902344, + "learning_rate": 2.8516263181345615e-07, + "loss": 0.0402, "step": 65410 }, { - "epoch": 4.85816129511362, - "grad_norm": 1.803080677986145, - "learning_rate": 8.510322293182831e-07, - "loss": 0.0599, + "epoch": 9.71632259022724, + "grad_norm": 0.8876258730888367, + "learning_rate": 2.8367740977276106e-07, + "loss": 0.0617, "step": 65420 }, { - "epoch": 4.858903906133967, - "grad_norm": 2.175628900527954, - "learning_rate": 8.465765631961978e-07, - "loss": 0.0448, + "epoch": 9.717807812267933, + "grad_norm": 0.8110934495925903, + "learning_rate": 2.8219218773206597e-07, + "loss": 0.0741, "step": 65430 }, { - "epoch": 4.859646517154315, - "grad_norm": 1.489719033241272, - "learning_rate": 8.421208970741127e-07, - "loss": 0.0435, + "epoch": 9.71929303430863, + "grad_norm": 0.5553524494171143, + "learning_rate": 2.807069656913709e-07, + "loss": 0.0438, "step": 65440 }, { - "epoch": 4.860389128174662, - "grad_norm": 0.7035216689109802, - "learning_rate": 8.376652309520274e-07, - "loss": 0.0551, + "epoch": 9.720778256349325, + "grad_norm": 0.6501131057739258, + "learning_rate": 2.792217436506758e-07, + "loss": 0.067, "step": 65450 }, { - "epoch": 4.8611317391950095, - "grad_norm": 1.251814603805542, - "learning_rate": 8.33209564829942e-07, - "loss": 0.0537, + "epoch": 9.722263478390019, + "grad_norm": 1.176830530166626, + "learning_rate": 2.777365216099807e-07, + "loss": 0.0548, "step": 65460 }, { - "epoch": 4.861874350215357, - "grad_norm": 0.5528387427330017, - "learning_rate": 8.287538987078569e-07, - "loss": 0.0571, + "epoch": 9.723748700430715, + "grad_norm": 0.9840707182884216, + "learning_rate": 2.7625129956928566e-07, + "loss": 0.0546, "step": 65470 }, { - "epoch": 4.862616961235704, - "grad_norm": 2.852384328842163, - "learning_rate": 8.242982325857716e-07, - "loss": 0.0791, + "epoch": 9.725233922471409, + "grad_norm": 0.5249770283699036, + "learning_rate": 2.7476607752859057e-07, + "loss": 0.0547, "step": 65480 }, { - "epoch": 4.863359572256052, - "grad_norm": 1.3060245513916016, - "learning_rate": 8.198425664636863e-07, - "loss": 0.0547, + "epoch": 9.726719144512105, + "grad_norm": 1.2236920595169067, + "learning_rate": 2.732808554878955e-07, + "loss": 0.0397, "step": 65490 }, { - "epoch": 4.8641021832764, - "grad_norm": 2.287452220916748, - "learning_rate": 8.153869003416011e-07, - "loss": 0.056, + "epoch": 9.7282043665528, + "grad_norm": 0.892269492149353, + "learning_rate": 2.717956334472004e-07, + "loss": 0.0631, "step": 65500 }, { - "epoch": 4.864844794296747, - "grad_norm": 1.873404860496521, - "learning_rate": 8.109312342195158e-07, - "loss": 0.0859, + "epoch": 9.729689588593494, + "grad_norm": 1.4101309776306152, + "learning_rate": 2.703104114065053e-07, + "loss": 0.0468, "step": 65510 }, { - "epoch": 4.865587405317095, - "grad_norm": 1.0122098922729492, - "learning_rate": 8.064755680974306e-07, - "loss": 0.0531, + "epoch": 9.73117481063419, + "grad_norm": 0.9848302006721497, + "learning_rate": 2.688251893658102e-07, + "loss": 0.0629, "step": 65520 }, { - "epoch": 4.866330016337442, - "grad_norm": 1.163620948791504, - "learning_rate": 8.020199019753453e-07, - "loss": 0.0633, + "epoch": 9.732660032674884, + "grad_norm": 0.7715422511100769, + "learning_rate": 2.673399673251151e-07, + "loss": 0.0412, "step": 65530 }, { - "epoch": 4.86707262735779, - "grad_norm": 2.736693859100342, - "learning_rate": 7.975642358532601e-07, - "loss": 0.0622, + "epoch": 9.73414525471558, + "grad_norm": 0.9916948080062866, + "learning_rate": 2.658547452844201e-07, + "loss": 0.0535, "step": 65540 }, { - "epoch": 4.867815238378138, - "grad_norm": 0.43964171409606934, - "learning_rate": 7.931085697311749e-07, - "loss": 0.027, + "epoch": 9.735630476756276, + "grad_norm": 1.1795718669891357, + "learning_rate": 2.6436952324372494e-07, + "loss": 0.0432, "step": 65550 }, { - "epoch": 4.868557849398485, - "grad_norm": 0.9539849162101746, - "learning_rate": 7.886529036090895e-07, - "loss": 0.043, + "epoch": 9.73711569879697, + "grad_norm": 1.2389646768569946, + "learning_rate": 2.6288430120302985e-07, + "loss": 0.0651, "step": 65560 }, { - "epoch": 4.869300460418833, - "grad_norm": 0.3715505003929138, - "learning_rate": 7.841972374870043e-07, - "loss": 0.0631, + "epoch": 9.738600920837666, + "grad_norm": 0.48417583107948303, + "learning_rate": 2.613990791623348e-07, + "loss": 0.0469, "step": 65570 }, { - "epoch": 4.87004307143918, - "grad_norm": 1.210249423980713, - "learning_rate": 7.797415713649191e-07, - "loss": 0.0706, + "epoch": 9.74008614287836, + "grad_norm": 1.2999908924102783, + "learning_rate": 2.599138571216397e-07, + "loss": 0.059, "step": 65580 }, { - "epoch": 4.870785682459528, - "grad_norm": 2.480217456817627, - "learning_rate": 7.752859052428338e-07, - "loss": 0.0576, + "epoch": 9.741571364919055, + "grad_norm": 0.9606696367263794, + "learning_rate": 2.5842863508094463e-07, + "loss": 0.032, "step": 65590 }, { - "epoch": 4.871528293479876, - "grad_norm": 2.4984142780303955, - "learning_rate": 7.708302391207486e-07, - "loss": 0.0893, + "epoch": 9.743056586959751, + "grad_norm": 0.2228945791721344, + "learning_rate": 2.5694341304024954e-07, + "loss": 0.0369, "step": 65600 }, { - "epoch": 4.872270904500223, - "grad_norm": 0.5741029381752014, - "learning_rate": 7.663745729986633e-07, - "loss": 0.057, + "epoch": 9.744541809000445, + "grad_norm": 0.3582620918750763, + "learning_rate": 2.5545819099955445e-07, + "loss": 0.05, "step": 65610 }, { - "epoch": 4.8730135155205705, - "grad_norm": 2.359872341156006, - "learning_rate": 7.61918906876578e-07, - "loss": 0.0451, + "epoch": 9.746027031041141, + "grad_norm": 0.6840639710426331, + "learning_rate": 2.5397296895885936e-07, + "loss": 0.0369, "step": 65620 }, { - "epoch": 4.8737561265409175, - "grad_norm": 1.7714812755584717, - "learning_rate": 7.574632407544928e-07, - "loss": 0.083, + "epoch": 9.747512253081835, + "grad_norm": 0.572365403175354, + "learning_rate": 2.5248774691816427e-07, + "loss": 0.0536, "step": 65630 }, { - "epoch": 4.874498737561265, - "grad_norm": 3.7563037872314453, - "learning_rate": 7.530075746324076e-07, - "loss": 0.0763, + "epoch": 9.74899747512253, + "grad_norm": 0.2343834489583969, + "learning_rate": 2.5100252487746923e-07, + "loss": 0.0403, "step": 65640 }, { - "epoch": 4.875241348581613, - "grad_norm": 0.9975684285163879, - "learning_rate": 7.485519085103223e-07, - "loss": 0.0359, + "epoch": 9.750482697163227, + "grad_norm": 1.2876397371292114, + "learning_rate": 2.4951730283677414e-07, + "loss": 0.0535, "step": 65650 }, { - "epoch": 4.87598395960196, - "grad_norm": 1.6247881650924683, - "learning_rate": 7.44096242388237e-07, - "loss": 0.0349, + "epoch": 9.75196791920392, + "grad_norm": 1.232550859451294, + "learning_rate": 2.48032080796079e-07, + "loss": 0.0626, "step": 65660 }, { - "epoch": 4.876726570622308, - "grad_norm": 2.389453887939453, - "learning_rate": 7.396405762661518e-07, - "loss": 0.0537, + "epoch": 9.753453141244616, + "grad_norm": 0.9929244518280029, + "learning_rate": 2.4654685875538396e-07, + "loss": 0.0824, "step": 65670 }, { - "epoch": 4.877469181642655, - "grad_norm": 2.3572044372558594, - "learning_rate": 7.351849101440666e-07, - "loss": 0.0356, + "epoch": 9.75493836328531, + "grad_norm": 1.197312593460083, + "learning_rate": 2.4506163671468887e-07, + "loss": 0.0761, "step": 65680 }, { - "epoch": 4.878211792663003, - "grad_norm": 2.2939834594726562, - "learning_rate": 7.307292440219813e-07, - "loss": 0.0741, + "epoch": 9.756423585326006, + "grad_norm": 0.8419313430786133, + "learning_rate": 2.435764146739938e-07, + "loss": 0.0489, "step": 65690 }, { - "epoch": 4.878954403683351, - "grad_norm": 1.0709495544433594, - "learning_rate": 7.262735778998961e-07, - "loss": 0.0741, + "epoch": 9.757908807366702, + "grad_norm": 2.1786866188049316, + "learning_rate": 2.420911926332987e-07, + "loss": 0.0524, "step": 65700 }, { - "epoch": 4.879697014703698, - "grad_norm": 1.768277645111084, - "learning_rate": 7.218179117778109e-07, - "loss": 0.0616, + "epoch": 9.759394029407396, + "grad_norm": 1.1716639995574951, + "learning_rate": 2.406059705926036e-07, + "loss": 0.0562, "step": 65710 }, { - "epoch": 4.880439625724046, - "grad_norm": 1.0159682035446167, - "learning_rate": 7.173622456557255e-07, - "loss": 0.0614, + "epoch": 9.760879251448092, + "grad_norm": 0.7020483613014221, + "learning_rate": 2.391207485519085e-07, + "loss": 0.0517, "step": 65720 }, { - "epoch": 4.881182236744394, - "grad_norm": 0.6093847751617432, - "learning_rate": 7.129065795336402e-07, - "loss": 0.0293, + "epoch": 9.762364473488786, + "grad_norm": 0.8520447015762329, + "learning_rate": 2.3763552651121342e-07, + "loss": 0.0715, "step": 65730 }, { - "epoch": 4.881924847764741, - "grad_norm": 0.5181077718734741, - "learning_rate": 7.084509134115551e-07, - "loss": 0.0546, + "epoch": 9.763849695529482, + "grad_norm": 1.2511392831802368, + "learning_rate": 2.3615030447051838e-07, + "loss": 0.0545, "step": 65740 }, { - "epoch": 4.882667458785089, - "grad_norm": 0.47307220101356506, - "learning_rate": 7.039952472894698e-07, - "loss": 0.0434, + "epoch": 9.765334917570177, + "grad_norm": 0.5448188185691833, + "learning_rate": 2.3466508242982327e-07, + "loss": 0.0412, "step": 65750 }, { - "epoch": 4.883410069805436, - "grad_norm": 1.3281551599502563, - "learning_rate": 6.995395811673846e-07, - "loss": 0.0622, + "epoch": 9.766820139610871, + "grad_norm": 0.8606187105178833, + "learning_rate": 2.3317986038912818e-07, + "loss": 0.0619, "step": 65760 }, { - "epoch": 4.884152680825784, - "grad_norm": 1.8027498722076416, - "learning_rate": 6.950839150452993e-07, - "loss": 0.057, + "epoch": 9.768305361651567, + "grad_norm": 0.7856817245483398, + "learning_rate": 2.316946383484331e-07, + "loss": 0.0375, "step": 65770 }, { - "epoch": 4.8848952918461315, - "grad_norm": 1.0578224658966064, - "learning_rate": 6.90628248923214e-07, - "loss": 0.0633, + "epoch": 9.769790583692261, + "grad_norm": 0.7301252484321594, + "learning_rate": 2.3020941630773802e-07, + "loss": 0.0466, "step": 65780 }, { - "epoch": 4.8856379028664785, - "grad_norm": 1.2955671548843384, - "learning_rate": 6.861725828011288e-07, - "loss": 0.0574, + "epoch": 9.771275805732957, + "grad_norm": 0.7444620132446289, + "learning_rate": 2.2872419426704293e-07, + "loss": 0.0461, "step": 65790 }, { - "epoch": 4.886380513886826, - "grad_norm": 2.2496156692504883, - "learning_rate": 6.817169166790436e-07, - "loss": 0.0629, + "epoch": 9.772761027773653, + "grad_norm": 0.6654163599014282, + "learning_rate": 2.2723897222634787e-07, + "loss": 0.0624, "step": 65800 }, { - "epoch": 4.887123124907173, - "grad_norm": 1.6367658376693726, - "learning_rate": 6.772612505569583e-07, - "loss": 0.0392, + "epoch": 9.774246249814347, + "grad_norm": 1.2745815515518188, + "learning_rate": 2.2575375018565278e-07, + "loss": 0.0666, "step": 65810 }, { - "epoch": 4.887865735927521, - "grad_norm": 0.9300660490989685, - "learning_rate": 6.72805584434873e-07, - "loss": 0.0769, + "epoch": 9.775731471855043, + "grad_norm": 0.8714974522590637, + "learning_rate": 2.242685281449577e-07, + "loss": 0.0559, "step": 65820 }, { - "epoch": 4.888608346947869, - "grad_norm": 0.5818589329719543, - "learning_rate": 6.683499183127877e-07, - "loss": 0.0558, + "epoch": 9.777216693895738, + "grad_norm": 1.2431142330169678, + "learning_rate": 2.227833061042626e-07, + "loss": 0.0488, "step": 65830 }, { - "epoch": 4.889350957968216, - "grad_norm": 2.5883166790008545, - "learning_rate": 6.638942521907026e-07, - "loss": 0.0754, + "epoch": 9.778701915936432, + "grad_norm": 0.9246008396148682, + "learning_rate": 2.2129808406356753e-07, + "loss": 0.0514, "step": 65840 }, { - "epoch": 4.890093568988564, - "grad_norm": 2.193958282470703, - "learning_rate": 6.594385860686173e-07, - "loss": 0.0583, + "epoch": 9.780187137977128, + "grad_norm": 1.1826361417770386, + "learning_rate": 2.1981286202287244e-07, + "loss": 0.0516, "step": 65850 }, { - "epoch": 4.890836180008911, - "grad_norm": 1.7549993991851807, - "learning_rate": 6.54982919946532e-07, - "loss": 0.0608, + "epoch": 9.781672360017822, + "grad_norm": 1.0376538038253784, + "learning_rate": 2.1832763998217733e-07, + "loss": 0.0471, "step": 65860 }, { - "epoch": 4.891578791029259, - "grad_norm": 1.5535106658935547, - "learning_rate": 6.505272538244468e-07, - "loss": 0.0469, + "epoch": 9.783157582058518, + "grad_norm": 0.5390947461128235, + "learning_rate": 2.168424179414823e-07, + "loss": 0.0471, "step": 65870 }, { - "epoch": 4.892321402049607, - "grad_norm": 4.540996074676514, - "learning_rate": 6.460715877023615e-07, - "loss": 0.0707, + "epoch": 9.784642804099214, + "grad_norm": 0.7411609292030334, + "learning_rate": 2.1535719590078717e-07, + "loss": 0.0486, "step": 65880 }, { - "epoch": 4.893064013069954, - "grad_norm": 0.570190966129303, - "learning_rate": 6.416159215802762e-07, - "loss": 0.0562, + "epoch": 9.786128026139908, + "grad_norm": 0.964026153087616, + "learning_rate": 2.1387197386009208e-07, + "loss": 0.045, "step": 65890 }, { - "epoch": 4.893806624090302, - "grad_norm": 2.189643621444702, - "learning_rate": 6.371602554581911e-07, - "loss": 0.0557, + "epoch": 9.787613248180604, + "grad_norm": 0.2791236340999603, + "learning_rate": 2.1238675181939702e-07, + "loss": 0.0456, "step": 65900 }, { - "epoch": 4.894549235110649, - "grad_norm": 1.114089846611023, - "learning_rate": 6.327045893361058e-07, - "loss": 0.0915, + "epoch": 9.789098470221298, + "grad_norm": 0.9963831305503845, + "learning_rate": 2.1090152977870193e-07, + "loss": 0.0638, "step": 65910 }, { - "epoch": 4.895291846130997, - "grad_norm": 1.200714349746704, - "learning_rate": 6.282489232140205e-07, - "loss": 0.037, + "epoch": 9.790583692261993, + "grad_norm": 0.5477802157402039, + "learning_rate": 2.0941630773800684e-07, + "loss": 0.0476, "step": 65920 }, { - "epoch": 4.8960344571513446, - "grad_norm": 1.7422734498977661, - "learning_rate": 6.237932570919352e-07, - "loss": 0.0929, + "epoch": 9.792068914302689, + "grad_norm": 0.26147234439849854, + "learning_rate": 2.0793108569731175e-07, + "loss": 0.0572, "step": 65930 }, { - "epoch": 4.896777068171692, - "grad_norm": 1.0818594694137573, - "learning_rate": 6.1933759096985e-07, - "loss": 0.0399, + "epoch": 9.793554136343383, + "grad_norm": 1.2986868619918823, + "learning_rate": 2.0644586365661669e-07, + "loss": 0.0911, "step": 65940 }, { - "epoch": 4.8975196791920395, - "grad_norm": 0.6319842338562012, - "learning_rate": 6.148819248477648e-07, - "loss": 0.0436, + "epoch": 9.795039358384079, + "grad_norm": 0.6861764788627625, + "learning_rate": 2.049606416159216e-07, + "loss": 0.0651, "step": 65950 }, { - "epoch": 4.8982622902123865, - "grad_norm": 1.0906407833099365, - "learning_rate": 6.104262587256795e-07, - "loss": 0.0474, + "epoch": 9.796524580424773, + "grad_norm": 1.7995854616165161, + "learning_rate": 2.034754195752265e-07, + "loss": 0.0426, "step": 65960 }, { - "epoch": 4.899004901232734, - "grad_norm": 0.6139366626739502, - "learning_rate": 6.059705926035942e-07, - "loss": 0.052, + "epoch": 9.798009802465469, + "grad_norm": 1.2532118558883667, + "learning_rate": 2.0199019753453144e-07, + "loss": 0.0476, "step": 65970 }, { - "epoch": 4.899747512253082, - "grad_norm": 1.4248707294464111, - "learning_rate": 6.01514926481509e-07, - "loss": 0.0459, + "epoch": 9.799495024506164, + "grad_norm": 0.5201395153999329, + "learning_rate": 2.0050497549383635e-07, + "loss": 0.0392, "step": 65980 }, { - "epoch": 4.900490123273429, - "grad_norm": 0.5179479122161865, - "learning_rate": 5.970592603594237e-07, - "loss": 0.0582, + "epoch": 9.800980246546859, + "grad_norm": 0.6813002228736877, + "learning_rate": 1.9901975345314126e-07, + "loss": 0.0565, "step": 65990 }, { - "epoch": 4.901232734293777, - "grad_norm": 4.057821273803711, - "learning_rate": 5.926035942373385e-07, - "loss": 0.0822, + "epoch": 9.802465468587554, + "grad_norm": 1.2058467864990234, + "learning_rate": 1.975345314124462e-07, + "loss": 0.0479, "step": 66000 }, { - "epoch": 4.901975345314124, - "grad_norm": 0.7307072877883911, - "learning_rate": 5.881479281152533e-07, - "loss": 0.0771, + "epoch": 9.803950690628248, + "grad_norm": 1.339169979095459, + "learning_rate": 1.960493093717511e-07, + "loss": 0.0657, "step": 66010 }, { - "epoch": 4.902717956334472, - "grad_norm": 1.7318345308303833, - "learning_rate": 5.836922619931679e-07, - "loss": 0.0519, + "epoch": 9.805435912668944, + "grad_norm": 1.0724490880966187, + "learning_rate": 1.94564087331056e-07, + "loss": 0.0589, "step": 66020 }, { - "epoch": 4.90346056735482, - "grad_norm": 0.3162935972213745, - "learning_rate": 5.792365958710828e-07, - "loss": 0.0516, + "epoch": 9.80692113470964, + "grad_norm": 1.7160189151763916, + "learning_rate": 1.9307886529036095e-07, + "loss": 0.0549, "step": 66030 }, { - "epoch": 4.904203178375167, - "grad_norm": 1.487998127937317, - "learning_rate": 5.747809297489975e-07, - "loss": 0.0676, + "epoch": 9.808406356750334, + "grad_norm": 0.39401769638061523, + "learning_rate": 1.9159364324966584e-07, + "loss": 0.0379, "step": 66040 }, { - "epoch": 4.904945789395515, - "grad_norm": 1.0163052082061768, - "learning_rate": 5.703252636269122e-07, - "loss": 0.0582, + "epoch": 9.80989157879103, + "grad_norm": 1.2391009330749512, + "learning_rate": 1.9010842120897075e-07, + "loss": 0.043, "step": 66050 }, { - "epoch": 4.905688400415862, - "grad_norm": 2.9083852767944336, - "learning_rate": 5.65869597504827e-07, - "loss": 0.0487, + "epoch": 9.811376800831724, + "grad_norm": 1.3258800506591797, + "learning_rate": 1.8862319916827566e-07, + "loss": 0.0543, "step": 66060 }, { - "epoch": 4.90643101143621, - "grad_norm": 1.4686026573181152, - "learning_rate": 5.614139313827418e-07, - "loss": 0.0548, + "epoch": 9.81286202287242, + "grad_norm": 1.50371515750885, + "learning_rate": 1.871379771275806e-07, + "loss": 0.0575, "step": 66070 }, { - "epoch": 4.907173622456558, - "grad_norm": 2.3733696937561035, - "learning_rate": 5.569582652606564e-07, - "loss": 0.0789, + "epoch": 9.814347244913115, + "grad_norm": 1.2874075174331665, + "learning_rate": 1.856527550868855e-07, + "loss": 0.0517, "step": 66080 }, { - "epoch": 4.907916233476905, - "grad_norm": 0.551871120929718, - "learning_rate": 5.525025991385712e-07, - "loss": 0.044, + "epoch": 9.81583246695381, + "grad_norm": 0.9409539699554443, + "learning_rate": 1.841675330461904e-07, + "loss": 0.0681, "step": 66090 }, { - "epoch": 4.9086588444972525, - "grad_norm": 0.9300947189331055, - "learning_rate": 5.48046933016486e-07, - "loss": 0.0611, + "epoch": 9.817317688994505, + "grad_norm": 1.2464892864227295, + "learning_rate": 1.8268231100549535e-07, + "loss": 0.0658, "step": 66100 }, { - "epoch": 4.9094014555175995, - "grad_norm": 0.5513383746147156, - "learning_rate": 5.435912668944008e-07, - "loss": 0.0764, + "epoch": 9.818802911035199, + "grad_norm": 0.7039570212364197, + "learning_rate": 1.8119708896480026e-07, + "loss": 0.0434, "step": 66110 }, { - "epoch": 4.910144066537947, - "grad_norm": 1.1287389993667603, - "learning_rate": 5.391356007723155e-07, - "loss": 0.0416, + "epoch": 9.820288133075895, + "grad_norm": 0.4767332375049591, + "learning_rate": 1.7971186692410517e-07, + "loss": 0.0494, "step": 66120 }, { - "epoch": 4.910886677558295, - "grad_norm": 0.9969607591629028, - "learning_rate": 5.346799346502302e-07, - "loss": 0.0528, + "epoch": 9.82177335511659, + "grad_norm": 1.12473464012146, + "learning_rate": 1.782266448834101e-07, + "loss": 0.0671, "step": 66130 }, { - "epoch": 4.911629288578642, - "grad_norm": 1.272377371788025, - "learning_rate": 5.30224268528145e-07, - "loss": 0.0528, + "epoch": 9.823258577157285, + "grad_norm": 1.3426387310028076, + "learning_rate": 1.7674142284271501e-07, + "loss": 0.0428, "step": 66140 }, { - "epoch": 4.91237189959899, - "grad_norm": 0.5094819068908691, - "learning_rate": 5.257686024060597e-07, - "loss": 0.0744, + "epoch": 9.82474379919798, + "grad_norm": 1.6674333810806274, + "learning_rate": 1.752562008020199e-07, + "loss": 0.0468, "step": 66150 }, { - "epoch": 4.913114510619337, - "grad_norm": 1.5360733270645142, - "learning_rate": 5.213129362839744e-07, - "loss": 0.0431, + "epoch": 9.826229021238674, + "grad_norm": 0.9544386863708496, + "learning_rate": 1.737709787613248e-07, + "loss": 0.0414, "step": 66160 }, { - "epoch": 4.913857121639685, - "grad_norm": 2.1293842792510986, - "learning_rate": 5.168572701618893e-07, - "loss": 0.0791, + "epoch": 9.82771424327937, + "grad_norm": 0.5816496014595032, + "learning_rate": 1.7228575672062974e-07, + "loss": 0.0463, "step": 66170 }, { - "epoch": 4.914599732660033, - "grad_norm": 2.370560646057129, - "learning_rate": 5.124016040398039e-07, - "loss": 0.0784, + "epoch": 9.829199465320066, + "grad_norm": 0.7869675755500793, + "learning_rate": 1.7080053467993465e-07, + "loss": 0.0566, "step": 66180 }, { - "epoch": 4.91534234368038, - "grad_norm": 0.7912160158157349, - "learning_rate": 5.079459379177187e-07, - "loss": 0.0877, + "epoch": 9.83068468736076, + "grad_norm": 1.1577743291854858, + "learning_rate": 1.6931531263923956e-07, + "loss": 0.0625, "step": 66190 }, { - "epoch": 4.916084954700728, - "grad_norm": 2.006499767303467, - "learning_rate": 5.034902717956335e-07, - "loss": 0.0906, + "epoch": 9.832169909401456, + "grad_norm": 0.6714699268341064, + "learning_rate": 1.678300905985445e-07, + "loss": 0.0618, "step": 66200 }, { - "epoch": 4.916827565721075, - "grad_norm": 2.1745078563690186, - "learning_rate": 4.990346056735482e-07, - "loss": 0.0558, + "epoch": 9.83365513144215, + "grad_norm": 0.582687258720398, + "learning_rate": 1.663448685578494e-07, + "loss": 0.0516, "step": 66210 }, { - "epoch": 4.917570176741423, - "grad_norm": 1.09943425655365, - "learning_rate": 4.94578939551463e-07, - "loss": 0.041, + "epoch": 9.835140353482846, + "grad_norm": 1.4580570459365845, + "learning_rate": 1.6485964651715432e-07, + "loss": 0.0445, "step": 66220 }, { - "epoch": 4.918312787761771, - "grad_norm": 1.0442047119140625, - "learning_rate": 4.901232734293777e-07, - "loss": 0.0659, + "epoch": 9.836625575523541, + "grad_norm": 1.1883230209350586, + "learning_rate": 1.6337442447645926e-07, + "loss": 0.0511, "step": 66230 }, { - "epoch": 4.919055398782118, - "grad_norm": 4.601158618927002, - "learning_rate": 4.856676073072924e-07, - "loss": 0.0584, + "epoch": 9.838110797564235, + "grad_norm": 0.6525833606719971, + "learning_rate": 1.6188920243576417e-07, + "loss": 0.0499, "step": 66240 }, { - "epoch": 4.919798009802466, - "grad_norm": 0.6347880363464355, - "learning_rate": 4.812119411852072e-07, - "loss": 0.0551, + "epoch": 9.839596019604931, + "grad_norm": 1.5722192525863647, + "learning_rate": 1.6040398039506908e-07, + "loss": 0.0646, "step": 66250 }, { - "epoch": 4.920540620822813, - "grad_norm": 1.1013524532318115, - "learning_rate": 4.76756275063122e-07, - "loss": 0.0558, + "epoch": 9.841081241645625, + "grad_norm": 0.9194024205207825, + "learning_rate": 1.58918758354374e-07, + "loss": 0.0496, "step": 66260 }, { - "epoch": 4.9212832318431605, - "grad_norm": 1.1002392768859863, - "learning_rate": 4.723006089410367e-07, - "loss": 0.0531, + "epoch": 9.842566463686321, + "grad_norm": 0.7903993129730225, + "learning_rate": 1.5743353631367892e-07, + "loss": 0.0674, "step": 66270 }, { - "epoch": 4.922025842863508, - "grad_norm": 1.1361026763916016, - "learning_rate": 4.6784494281895144e-07, - "loss": 0.0227, + "epoch": 9.844051685727017, + "grad_norm": 1.3717644214630127, + "learning_rate": 1.5594831427298383e-07, + "loss": 0.0655, "step": 66280 }, { - "epoch": 4.922768453883855, - "grad_norm": 0.28624916076660156, - "learning_rate": 4.633892766968662e-07, - "loss": 0.0618, + "epoch": 9.84553690776771, + "grad_norm": 0.5210042595863342, + "learning_rate": 1.5446309223228874e-07, + "loss": 0.0614, "step": 66290 }, { - "epoch": 4.923511064904203, - "grad_norm": 0.3221977651119232, - "learning_rate": 4.589336105747809e-07, - "loss": 0.0617, + "epoch": 9.847022129808407, + "grad_norm": 1.4641269445419312, + "learning_rate": 1.5297787019159365e-07, + "loss": 0.0477, "step": 66300 }, { - "epoch": 4.92425367592455, - "grad_norm": 2.581843137741089, - "learning_rate": 4.544779444526957e-07, - "loss": 0.046, + "epoch": 9.8485073518491, + "grad_norm": 1.273045539855957, + "learning_rate": 1.5149264815089856e-07, + "loss": 0.0508, "step": 66310 }, { - "epoch": 4.924996286944898, - "grad_norm": 1.0967390537261963, - "learning_rate": 4.5002227833061047e-07, - "loss": 0.0466, + "epoch": 9.849992573889796, + "grad_norm": 0.6448570489883423, + "learning_rate": 1.500074261102035e-07, + "loss": 0.0559, "step": 66320 }, { - "epoch": 4.925738897965246, - "grad_norm": 2.719881296157837, - "learning_rate": 4.455666122085252e-07, - "loss": 0.0515, + "epoch": 9.851477795930492, + "grad_norm": 0.7389506697654724, + "learning_rate": 1.485222040695084e-07, + "loss": 0.0489, "step": 66330 }, { - "epoch": 4.926481508985593, - "grad_norm": 1.5159376859664917, - "learning_rate": 4.411109460864399e-07, - "loss": 0.0527, + "epoch": 9.852963017971186, + "grad_norm": 0.46983444690704346, + "learning_rate": 1.4703698202881332e-07, + "loss": 0.0475, "step": 66340 }, { - "epoch": 4.927224120005941, - "grad_norm": 1.2129600048065186, - "learning_rate": 4.3665527996435465e-07, - "loss": 0.0918, + "epoch": 9.854448240011882, + "grad_norm": 0.7253286242485046, + "learning_rate": 1.4555175998811823e-07, + "loss": 0.055, "step": 66350 }, { - "epoch": 4.927966731026288, - "grad_norm": 3.8259670734405518, - "learning_rate": 4.3219961384226944e-07, - "loss": 0.0785, + "epoch": 9.855933462052576, + "grad_norm": 0.9931745529174805, + "learning_rate": 1.4406653794742314e-07, + "loss": 0.0323, "step": 66360 }, { - "epoch": 4.928709342046636, - "grad_norm": 0.3441977798938751, - "learning_rate": 4.277439477201842e-07, - "loss": 0.0542, + "epoch": 9.857418684093272, + "grad_norm": 0.8444429636001587, + "learning_rate": 1.4258131590672807e-07, + "loss": 0.0501, "step": 66370 }, { - "epoch": 4.929451953066984, - "grad_norm": 0.8108426332473755, - "learning_rate": 4.232882815980989e-07, - "loss": 0.0517, + "epoch": 9.858903906133968, + "grad_norm": 1.6108314990997314, + "learning_rate": 1.4109609386603298e-07, + "loss": 0.0533, "step": 66380 }, { - "epoch": 4.930194564087331, - "grad_norm": 3.1225900650024414, - "learning_rate": 4.188326154760137e-07, - "loss": 0.0525, + "epoch": 9.860389128174662, + "grad_norm": 1.9255098104476929, + "learning_rate": 1.396108718253379e-07, + "loss": 0.04, "step": 66390 }, { - "epoch": 4.930937175107679, - "grad_norm": 2.4837539196014404, - "learning_rate": 4.1437694935392846e-07, - "loss": 0.0445, + "epoch": 9.861874350215357, + "grad_norm": 0.5105013847351074, + "learning_rate": 1.3812564978464283e-07, + "loss": 0.0402, "step": 66400 }, { - "epoch": 4.931679786128026, - "grad_norm": 0.961150050163269, - "learning_rate": 4.0992128323184314e-07, - "loss": 0.0613, + "epoch": 9.863359572256051, + "grad_norm": 0.756027340888977, + "learning_rate": 1.3664042774394774e-07, + "loss": 0.0375, "step": 66410 }, { - "epoch": 4.932422397148374, - "grad_norm": 0.4817768931388855, - "learning_rate": 4.054656171097579e-07, - "loss": 0.0348, + "epoch": 9.864844794296747, + "grad_norm": 0.6383519172668457, + "learning_rate": 1.3515520570325265e-07, + "loss": 0.0508, "step": 66420 }, { - "epoch": 4.9331650081687215, - "grad_norm": 3.031409502029419, - "learning_rate": 4.0100995098767265e-07, - "loss": 0.0548, + "epoch": 9.866330016337443, + "grad_norm": 1.1964900493621826, + "learning_rate": 1.3366998366255756e-07, + "loss": 0.0611, "step": 66430 }, { - "epoch": 4.9339076191890685, - "grad_norm": 0.8737612366676331, - "learning_rate": 3.9655428486558743e-07, - "loss": 0.0305, + "epoch": 9.867815238378137, + "grad_norm": 1.084910273551941, + "learning_rate": 1.3218476162186247e-07, + "loss": 0.0555, "step": 66440 }, { - "epoch": 4.934650230209416, - "grad_norm": 1.2402093410491943, - "learning_rate": 3.9209861874350216e-07, - "loss": 0.0571, + "epoch": 9.869300460418833, + "grad_norm": 1.2565182447433472, + "learning_rate": 1.306995395811674e-07, + "loss": 0.0492, "step": 66450 }, { - "epoch": 4.935392841229763, - "grad_norm": 1.8115334510803223, - "learning_rate": 3.876429526214169e-07, - "loss": 0.037, + "epoch": 9.870785682459527, + "grad_norm": 0.6368108987808228, + "learning_rate": 1.2921431754047231e-07, + "loss": 0.047, "step": 66460 }, { - "epoch": 4.936135452250111, - "grad_norm": 0.969933271408081, - "learning_rate": 3.8318728649933167e-07, - "loss": 0.0609, + "epoch": 9.872270904500223, + "grad_norm": 0.6076331734657288, + "learning_rate": 1.2772909549977722e-07, + "loss": 0.0576, "step": 66470 }, { - "epoch": 4.936878063270459, - "grad_norm": 1.2570370435714722, - "learning_rate": 3.787316203772464e-07, - "loss": 0.0489, + "epoch": 9.873756126540918, + "grad_norm": 0.9494715929031372, + "learning_rate": 1.2624387345908213e-07, + "loss": 0.0573, "step": 66480 }, { - "epoch": 4.937620674290806, - "grad_norm": 0.9259024262428284, - "learning_rate": 3.7427595425516113e-07, - "loss": 0.0624, + "epoch": 9.875241348581612, + "grad_norm": 1.1798850297927856, + "learning_rate": 1.2475865141838707e-07, + "loss": 0.0504, "step": 66490 }, { - "epoch": 4.938363285311154, - "grad_norm": 1.2812902927398682, - "learning_rate": 3.698202881330759e-07, - "loss": 0.0582, + "epoch": 9.876726570622308, + "grad_norm": 1.0333198308944702, + "learning_rate": 1.2327342937769198e-07, + "loss": 0.0344, "step": 66500 }, { - "epoch": 4.939105896331501, - "grad_norm": 1.1236120462417603, - "learning_rate": 3.6536462201099064e-07, - "loss": 0.0588, + "epoch": 9.878211792663002, + "grad_norm": 0.6598351001739502, + "learning_rate": 1.217882073369969e-07, + "loss": 0.068, "step": 66510 }, { - "epoch": 4.939848507351849, - "grad_norm": 2.4268791675567627, - "learning_rate": 3.6090895588890543e-07, - "loss": 0.0463, + "epoch": 9.879697014703698, + "grad_norm": 1.9670634269714355, + "learning_rate": 1.203029852963018e-07, + "loss": 0.0524, "step": 66520 }, { - "epoch": 4.940591118372197, - "grad_norm": 1.1556020975112915, - "learning_rate": 3.564532897668201e-07, - "loss": 0.0474, + "epoch": 9.881182236744394, + "grad_norm": 0.5597050786018372, + "learning_rate": 1.1881776325560671e-07, + "loss": 0.0365, "step": 66530 }, { - "epoch": 4.941333729392544, - "grad_norm": 0.47208043932914734, - "learning_rate": 3.519976236447349e-07, + "epoch": 9.882667458785088, + "grad_norm": 0.2909705638885498, + "learning_rate": 1.1733254121491163e-07, "loss": 0.054, "step": 66540 }, { - "epoch": 4.942076340412892, - "grad_norm": 2.0012338161468506, - "learning_rate": 3.4754195752264967e-07, - "loss": 0.0446, + "epoch": 9.884152680825784, + "grad_norm": 1.0502877235412598, + "learning_rate": 1.1584731917421656e-07, + "loss": 0.0515, "step": 66550 }, { - "epoch": 4.942818951433239, - "grad_norm": 6.049137592315674, - "learning_rate": 3.430862914005644e-07, - "loss": 0.0627, + "epoch": 9.88563790286648, + "grad_norm": 1.1342244148254395, + "learning_rate": 1.1436209713352147e-07, + "loss": 0.0417, "step": 66560 }, { - "epoch": 4.943561562453587, - "grad_norm": 1.165906548500061, - "learning_rate": 3.3863062527847913e-07, - "loss": 0.067, + "epoch": 9.887123124907173, + "grad_norm": 0.6780197024345398, + "learning_rate": 1.1287687509282639e-07, + "loss": 0.0581, "step": 66570 }, { - "epoch": 4.944304173473935, - "grad_norm": 2.933342218399048, - "learning_rate": 3.3417495915639386e-07, - "loss": 0.0588, + "epoch": 9.88860834694787, + "grad_norm": 0.6926343441009521, + "learning_rate": 1.113916530521313e-07, + "loss": 0.0429, "step": 66580 }, { - "epoch": 4.945046784494282, - "grad_norm": 0.5011084675788879, - "learning_rate": 3.2971929303430864e-07, - "loss": 0.0507, + "epoch": 9.890093568988563, + "grad_norm": 1.3560590744018555, + "learning_rate": 1.0990643101143622e-07, + "loss": 0.0805, "step": 66590 }, { - "epoch": 4.9457893955146295, - "grad_norm": 0.8596967458724976, - "learning_rate": 3.252636269122234e-07, - "loss": 0.0707, + "epoch": 9.891578791029259, + "grad_norm": 0.8795716166496277, + "learning_rate": 1.0842120897074115e-07, + "loss": 0.0564, "step": 66600 }, { - "epoch": 4.9465320065349765, - "grad_norm": 2.5871288776397705, - "learning_rate": 3.208079607901381e-07, - "loss": 0.0587, + "epoch": 9.893064013069955, + "grad_norm": 0.9842681288719177, + "learning_rate": 1.0693598693004604e-07, + "loss": 0.0617, "step": 66610 }, { - "epoch": 4.947274617555324, - "grad_norm": 0.5067526698112488, - "learning_rate": 3.163522946680529e-07, - "loss": 0.0519, + "epoch": 9.894549235110649, + "grad_norm": 0.8339792490005493, + "learning_rate": 1.0545076488935096e-07, + "loss": 0.064, "step": 66620 }, { - "epoch": 4.948017228575672, - "grad_norm": 0.439694344997406, - "learning_rate": 3.118966285459676e-07, - "loss": 0.0436, + "epoch": 9.896034457151345, + "grad_norm": 0.9595751762390137, + "learning_rate": 1.0396554284865587e-07, + "loss": 0.056, "step": 66630 }, { - "epoch": 4.948759839596019, - "grad_norm": 2.3128857612609863, - "learning_rate": 3.074409624238824e-07, - "loss": 0.0436, + "epoch": 9.897519679192039, + "grad_norm": 1.2626688480377197, + "learning_rate": 1.024803208079608e-07, + "loss": 0.0718, "step": 66640 }, { - "epoch": 4.949502450616367, - "grad_norm": 1.841361403465271, - "learning_rate": 3.029852963017971e-07, - "loss": 0.0492, + "epoch": 9.899004901232734, + "grad_norm": 1.0672602653503418, + "learning_rate": 1.0099509876726572e-07, + "loss": 0.0753, "step": 66650 }, { - "epoch": 4.950245061636715, - "grad_norm": 1.778883457183838, - "learning_rate": 2.9852963017971185e-07, - "loss": 0.038, + "epoch": 9.90049012327343, + "grad_norm": 0.8560225963592529, + "learning_rate": 9.950987672657063e-08, + "loss": 0.0573, "step": 66660 }, { - "epoch": 4.950987672657062, - "grad_norm": 1.8489772081375122, - "learning_rate": 2.9407396405762663e-07, - "loss": 0.0729, + "epoch": 9.901975345314124, + "grad_norm": 1.9473977088928223, + "learning_rate": 9.802465468587555e-08, + "loss": 0.0463, "step": 66670 }, { - "epoch": 4.95173028367741, - "grad_norm": 2.0135104656219482, - "learning_rate": 2.896182979355414e-07, - "loss": 0.0562, + "epoch": 9.90346056735482, + "grad_norm": 0.4703368544578552, + "learning_rate": 9.653943264518048e-08, + "loss": 0.0461, "step": 66680 }, { - "epoch": 4.952472894697757, - "grad_norm": 0.24336954951286316, - "learning_rate": 2.851626318134561e-07, - "loss": 0.0569, + "epoch": 9.904945789395514, + "grad_norm": 1.5709819793701172, + "learning_rate": 9.505421060448537e-08, + "loss": 0.0431, "step": 66690 }, { - "epoch": 4.953215505718105, - "grad_norm": 0.4141107201576233, - "learning_rate": 2.807069656913709e-07, - "loss": 0.0319, + "epoch": 9.90643101143621, + "grad_norm": 1.1744906902313232, + "learning_rate": 9.35689885637903e-08, + "loss": 0.0594, "step": 66700 }, { - "epoch": 4.953958116738453, - "grad_norm": 0.26303309202194214, - "learning_rate": 2.762512995692856e-07, - "loss": 0.0629, + "epoch": 9.907916233476906, + "grad_norm": 1.490570068359375, + "learning_rate": 9.20837665230952e-08, + "loss": 0.0701, "step": 66710 }, { - "epoch": 4.9547007277588, - "grad_norm": 0.37212514877319336, - "learning_rate": 2.717956334472004e-07, - "loss": 0.0382, + "epoch": 9.9094014555176, + "grad_norm": 0.3112143278121948, + "learning_rate": 9.059854448240013e-08, + "loss": 0.0532, "step": 66720 }, { - "epoch": 4.955443338779148, - "grad_norm": 2.371757745742798, - "learning_rate": 2.673399673251151e-07, - "loss": 0.093, + "epoch": 9.910886677558295, + "grad_norm": 0.5256125330924988, + "learning_rate": 8.911332244170505e-08, + "loss": 0.0612, "step": 66730 }, { - "epoch": 4.956185949799495, - "grad_norm": 2.2808210849761963, - "learning_rate": 2.6288430120302985e-07, - "loss": 0.0723, + "epoch": 9.91237189959899, + "grad_norm": 0.4340265989303589, + "learning_rate": 8.762810040100995e-08, + "loss": 0.0495, "step": 66740 }, { - "epoch": 4.956928560819843, - "grad_norm": 2.8832616806030273, - "learning_rate": 2.5842863508094463e-07, - "loss": 0.0392, + "epoch": 9.913857121639685, + "grad_norm": 1.5605214834213257, + "learning_rate": 8.614287836031487e-08, + "loss": 0.0502, "step": 66750 }, { - "epoch": 4.9576711718401905, - "grad_norm": 0.5029025077819824, - "learning_rate": 2.5397296895885936e-07, - "loss": 0.0486, + "epoch": 9.915342343680381, + "grad_norm": 0.9525371193885803, + "learning_rate": 8.465765631961978e-08, + "loss": 0.0493, "step": 66760 }, { - "epoch": 4.9584137828605375, - "grad_norm": 2.287649154663086, - "learning_rate": 2.495173028367741e-07, - "loss": 0.0458, + "epoch": 9.916827565721075, + "grad_norm": 1.05837881565094, + "learning_rate": 8.31724342789247e-08, + "loss": 0.061, "step": 66770 }, { - "epoch": 4.959156393880885, - "grad_norm": 2.1999528408050537, - "learning_rate": 2.4506163671468887e-07, - "loss": 0.0567, + "epoch": 9.91831278776177, + "grad_norm": 1.485148549079895, + "learning_rate": 8.168721223822963e-08, + "loss": 0.058, "step": 66780 }, { - "epoch": 4.959899004901232, - "grad_norm": 3.0611720085144043, - "learning_rate": 2.406059705926036e-07, - "loss": 0.0937, + "epoch": 9.919798009802465, + "grad_norm": 0.5239309072494507, + "learning_rate": 8.020199019753454e-08, + "loss": 0.0506, "step": 66790 }, { - "epoch": 4.96064161592158, - "grad_norm": 2.270627737045288, - "learning_rate": 2.3615030447051836e-07, - "loss": 0.0653, + "epoch": 9.92128323184316, + "grad_norm": 0.8405706286430359, + "learning_rate": 7.871676815683946e-08, + "loss": 0.0613, "step": 66800 }, { - "epoch": 4.961384226941928, - "grad_norm": 0.9296445250511169, - "learning_rate": 2.316946383484331e-07, - "loss": 0.0645, + "epoch": 9.922768453883856, + "grad_norm": 0.7404711246490479, + "learning_rate": 7.723154611614437e-08, + "loss": 0.0368, "step": 66810 }, { - "epoch": 4.962126837962275, - "grad_norm": 3.700183868408203, - "learning_rate": 2.2723897222634784e-07, - "loss": 0.0964, + "epoch": 9.92425367592455, + "grad_norm": 0.8041830658912659, + "learning_rate": 7.574632407544928e-08, + "loss": 0.0613, "step": 66820 }, { - "epoch": 4.962869448982623, - "grad_norm": 1.576913595199585, - "learning_rate": 2.227833061042626e-07, - "loss": 0.0419, + "epoch": 9.925738897965246, + "grad_norm": 0.6040468215942383, + "learning_rate": 7.42611020347542e-08, + "loss": 0.0516, "step": 66830 }, { - "epoch": 4.96361206000297, - "grad_norm": 3.2986557483673096, - "learning_rate": 2.1832763998217733e-07, - "loss": 0.0588, + "epoch": 9.92722412000594, + "grad_norm": 0.5927002429962158, + "learning_rate": 7.277587999405911e-08, + "loss": 0.0505, "step": 66840 }, { - "epoch": 4.964354671023318, - "grad_norm": 1.3685272932052612, - "learning_rate": 2.138719738600921e-07, - "loss": 0.0492, + "epoch": 9.928709342046636, + "grad_norm": 0.4314495921134949, + "learning_rate": 7.129065795336404e-08, + "loss": 0.0552, "step": 66850 }, { - "epoch": 4.965097282043666, - "grad_norm": 0.872087836265564, - "learning_rate": 2.0941630773800684e-07, - "loss": 0.0318, + "epoch": 9.930194564087332, + "grad_norm": 0.20560452342033386, + "learning_rate": 6.980543591266895e-08, + "loss": 0.0408, "step": 66860 }, { - "epoch": 4.965839893064013, - "grad_norm": 0.4379057288169861, - "learning_rate": 2.0496064161592157e-07, - "loss": 0.0529, + "epoch": 9.931679786128026, + "grad_norm": 0.6640859246253967, + "learning_rate": 6.832021387197387e-08, + "loss": 0.0317, "step": 66870 }, { - "epoch": 4.966582504084361, - "grad_norm": 1.2920769453048706, - "learning_rate": 2.0050497549383632e-07, - "loss": 0.0887, + "epoch": 9.933165008168721, + "grad_norm": 1.0623282194137573, + "learning_rate": 6.683499183127878e-08, + "loss": 0.0501, "step": 66880 }, { - "epoch": 4.967325115104709, - "grad_norm": 1.0842275619506836, - "learning_rate": 1.9604930937175108e-07, - "loss": 0.0444, + "epoch": 9.934650230209416, + "grad_norm": 0.9650203585624695, + "learning_rate": 6.53497697905837e-08, + "loss": 0.06, "step": 66890 }, { - "epoch": 4.968067726125056, - "grad_norm": 1.154530644416809, - "learning_rate": 1.9159364324966584e-07, - "loss": 0.0532, + "epoch": 9.936135452250111, + "grad_norm": 0.383526474237442, + "learning_rate": 6.386454774988861e-08, + "loss": 0.0483, "step": 66900 }, { - "epoch": 4.9688103371454035, - "grad_norm": 0.3080504834651947, - "learning_rate": 1.8713797712758057e-07, - "loss": 0.0344, + "epoch": 9.937620674290807, + "grad_norm": 0.9319167137145996, + "learning_rate": 6.237932570919354e-08, + "loss": 0.0456, "step": 66910 }, { - "epoch": 4.9695529481657505, - "grad_norm": 2.1118993759155273, - "learning_rate": 1.8268231100549532e-07, - "loss": 0.063, + "epoch": 9.939105896331501, + "grad_norm": 1.202764630317688, + "learning_rate": 6.089410366849845e-08, + "loss": 0.0466, "step": 66920 }, { - "epoch": 4.9702955591860984, - "grad_norm": 3.921952486038208, - "learning_rate": 1.7822664488341005e-07, - "loss": 0.0543, + "epoch": 9.940591118372197, + "grad_norm": 1.169226050376892, + "learning_rate": 5.9408881627803355e-08, + "loss": 0.0744, "step": 66930 }, { - "epoch": 4.971038170206446, - "grad_norm": 1.0275546312332153, - "learning_rate": 1.7377097876132483e-07, - "loss": 0.0334, + "epoch": 9.942076340412891, + "grad_norm": 1.2179229259490967, + "learning_rate": 5.792365958710828e-08, + "loss": 0.0421, "step": 66940 }, { - "epoch": 4.971780781226793, - "grad_norm": 3.7615277767181396, - "learning_rate": 1.6931531263923956e-07, - "loss": 0.0489, + "epoch": 9.943561562453587, + "grad_norm": 0.5202845335006714, + "learning_rate": 5.6438437546413195e-08, + "loss": 0.0482, "step": 66950 }, { - "epoch": 4.972523392247141, - "grad_norm": 2.601187229156494, - "learning_rate": 1.6485964651715432e-07, - "loss": 0.0696, + "epoch": 9.945046784494282, + "grad_norm": 1.294660210609436, + "learning_rate": 5.495321550571811e-08, + "loss": 0.0549, "step": 66960 }, { - "epoch": 4.973266003267488, - "grad_norm": 2.8414504528045654, - "learning_rate": 1.6040398039506905e-07, - "loss": 0.0683, + "epoch": 9.946532006534976, + "grad_norm": 0.7863031029701233, + "learning_rate": 5.346799346502302e-08, + "loss": 0.0339, "step": 66970 }, { - "epoch": 4.974008614287836, - "grad_norm": 0.3260228633880615, - "learning_rate": 1.559483142729838e-07, - "loss": 0.0238, + "epoch": 9.948017228575672, + "grad_norm": 0.7727733254432678, + "learning_rate": 5.198277142432794e-08, + "loss": 0.0492, "step": 66980 }, { - "epoch": 4.974751225308184, - "grad_norm": 0.6493045091629028, - "learning_rate": 1.5149264815089856e-07, - "loss": 0.0541, + "epoch": 9.949502450616368, + "grad_norm": 0.6926112771034241, + "learning_rate": 5.049754938363286e-08, + "loss": 0.0485, "step": 66990 }, { - "epoch": 4.975493836328531, - "grad_norm": 1.2345525026321411, - "learning_rate": 1.4703698202881332e-07, - "loss": 0.0544, + "epoch": 9.950987672657062, + "grad_norm": 0.7342759370803833, + "learning_rate": 4.901232734293778e-08, + "loss": 0.053, "step": 67000 }, { - "epoch": 4.976236447348879, - "grad_norm": 1.9710088968276978, - "learning_rate": 1.4258131590672805e-07, - "loss": 0.0466, + "epoch": 9.952472894697758, + "grad_norm": 0.6734324097633362, + "learning_rate": 4.7527105302242687e-08, + "loss": 0.0588, "step": 67010 }, { - "epoch": 4.976979058369226, - "grad_norm": 0.2602188289165497, - "learning_rate": 1.381256497846428e-07, - "loss": 0.0513, + "epoch": 9.953958116738452, + "grad_norm": 0.7823164463043213, + "learning_rate": 4.60418832615476e-08, + "loss": 0.0371, "step": 67020 }, { - "epoch": 4.977721669389574, - "grad_norm": 2.1740708351135254, - "learning_rate": 1.3366998366255756e-07, - "loss": 0.0645, + "epoch": 9.955443338779148, + "grad_norm": 0.833852231502533, + "learning_rate": 4.4556661220852526e-08, + "loss": 0.0368, "step": 67030 }, { - "epoch": 4.978464280409922, - "grad_norm": 2.4682114124298096, - "learning_rate": 1.2921431754047231e-07, - "loss": 0.0687, + "epoch": 9.956928560819843, + "grad_norm": 0.5555514097213745, + "learning_rate": 4.3071439180157436e-08, + "loss": 0.0685, "step": 67040 }, { - "epoch": 4.979206891430269, - "grad_norm": 0.23082710802555084, - "learning_rate": 1.2475865141838704e-07, - "loss": 0.0425, + "epoch": 9.958413782860537, + "grad_norm": 0.9500750303268433, + "learning_rate": 4.158621713946235e-08, + "loss": 0.0422, "step": 67050 }, { - "epoch": 4.979949502450617, - "grad_norm": 0.2882836163043976, - "learning_rate": 1.203029852963018e-07, - "loss": 0.0423, + "epoch": 9.959899004901233, + "grad_norm": 0.4888717830181122, + "learning_rate": 4.010099509876727e-08, + "loss": 0.0566, "step": 67060 }, { - "epoch": 4.980692113470964, - "grad_norm": 1.1012338399887085, - "learning_rate": 1.1584731917421656e-07, - "loss": 0.0695, + "epoch": 9.961384226941927, + "grad_norm": 0.9437296986579895, + "learning_rate": 3.8615773058072185e-08, + "loss": 0.0503, "step": 67070 }, { - "epoch": 4.9814347244913115, - "grad_norm": 1.7664963006973267, - "learning_rate": 1.113916530521313e-07, - "loss": 0.051, + "epoch": 9.962869448982623, + "grad_norm": 0.8006777763366699, + "learning_rate": 3.71305510173771e-08, + "loss": 0.0775, "step": 67080 }, { - "epoch": 4.982177335511659, - "grad_norm": 3.3410887718200684, - "learning_rate": 1.0693598693004605e-07, - "loss": 0.0547, + "epoch": 9.964354671023319, + "grad_norm": 0.37248167395591736, + "learning_rate": 3.564532897668202e-08, + "loss": 0.0616, "step": 67090 }, { - "epoch": 4.982919946532006, - "grad_norm": 1.7740800380706787, - "learning_rate": 1.0248032080796078e-07, - "loss": 0.0407, + "epoch": 9.965839893064013, + "grad_norm": 1.2399548292160034, + "learning_rate": 3.4160106935986935e-08, + "loss": 0.0531, "step": 67100 }, { - "epoch": 4.983662557552354, - "grad_norm": 1.093974232673645, - "learning_rate": 9.802465468587554e-08, - "loss": 0.0456, + "epoch": 9.967325115104709, + "grad_norm": 1.3255739212036133, + "learning_rate": 3.267488489529185e-08, + "loss": 0.0571, "step": 67110 }, { - "epoch": 4.984405168572701, - "grad_norm": 1.4569469690322876, - "learning_rate": 9.356898856379028e-08, - "loss": 0.0691, + "epoch": 9.968810337145403, + "grad_norm": 0.7814356684684753, + "learning_rate": 3.118966285459677e-08, + "loss": 0.0501, "step": 67120 }, { - "epoch": 4.985147779593049, - "grad_norm": 2.364649534225464, - "learning_rate": 8.911332244170503e-08, - "loss": 0.0769, + "epoch": 9.970295559186098, + "grad_norm": 0.7996913194656372, + "learning_rate": 2.9704440813901678e-08, + "loss": 0.0431, "step": 67130 }, { - "epoch": 4.985890390613397, - "grad_norm": 0.6497974991798401, - "learning_rate": 8.465765631961978e-08, - "loss": 0.0882, + "epoch": 9.971780781226794, + "grad_norm": 0.25158068537712097, + "learning_rate": 2.8219218773206597e-08, + "loss": 0.042, "step": 67140 }, { - "epoch": 4.986633001633744, - "grad_norm": 1.16084623336792, - "learning_rate": 8.020199019753452e-08, - "loss": 0.0376, + "epoch": 9.973266003267488, + "grad_norm": 0.4499381482601166, + "learning_rate": 2.673399673251151e-08, + "loss": 0.0465, "step": 67150 }, { - "epoch": 4.987375612654092, - "grad_norm": 2.3974056243896484, - "learning_rate": 7.574632407544928e-08, - "loss": 0.03, + "epoch": 9.974751225308184, + "grad_norm": 1.553957462310791, + "learning_rate": 2.524877469181643e-08, + "loss": 0.0501, "step": 67160 }, { - "epoch": 4.988118223674439, - "grad_norm": 0.8052327036857605, - "learning_rate": 7.129065795336402e-08, - "loss": 0.0542, + "epoch": 9.976236447348878, + "grad_norm": 0.17447112500667572, + "learning_rate": 2.3763552651121343e-08, + "loss": 0.0504, "step": 67170 }, { - "epoch": 4.988860834694787, - "grad_norm": 0.5777415037155151, - "learning_rate": 6.683499183127878e-08, - "loss": 0.0494, + "epoch": 9.977721669389574, + "grad_norm": 0.9022453427314758, + "learning_rate": 2.2278330610426263e-08, + "loss": 0.0535, "step": 67180 }, { - "epoch": 4.989603445715135, - "grad_norm": 1.4238885641098022, - "learning_rate": 6.237932570919352e-08, - "loss": 0.0537, + "epoch": 9.97920689143027, + "grad_norm": 1.0982049703598022, + "learning_rate": 2.0793108569731176e-08, + "loss": 0.0615, "step": 67190 }, { - "epoch": 4.990346056735482, - "grad_norm": 3.352360963821411, - "learning_rate": 5.792365958710828e-08, - "loss": 0.0523, + "epoch": 9.980692113470964, + "grad_norm": 0.504771888256073, + "learning_rate": 1.9307886529036093e-08, + "loss": 0.0462, "step": 67200 }, { - "epoch": 4.99108866775583, - "grad_norm": 1.1126995086669922, - "learning_rate": 5.346799346502303e-08, - "loss": 0.05, + "epoch": 9.98217733551166, + "grad_norm": 0.8339830636978149, + "learning_rate": 1.782266448834101e-08, + "loss": 0.06, "step": 67210 }, { - "epoch": 4.991831278776177, - "grad_norm": 1.2887812852859497, - "learning_rate": 4.901232734293777e-08, - "loss": 0.0496, + "epoch": 9.983662557552353, + "grad_norm": 0.9843574166297913, + "learning_rate": 1.6337442447645926e-08, + "loss": 0.0567, "step": 67220 }, { - "epoch": 4.992573889796525, - "grad_norm": 4.443135738372803, - "learning_rate": 4.455666122085251e-08, - "loss": 0.0526, + "epoch": 9.98514777959305, + "grad_norm": 1.6802273988723755, + "learning_rate": 1.4852220406950839e-08, + "loss": 0.0608, "step": 67230 }, { - "epoch": 4.9933165008168725, - "grad_norm": 1.4799822568893433, - "learning_rate": 4.010099509876726e-08, - "loss": 0.0601, + "epoch": 9.986633001633745, + "grad_norm": 0.8817097544670105, + "learning_rate": 1.3366998366255755e-08, + "loss": 0.0537, "step": 67240 }, { - "epoch": 4.9940591118372195, - "grad_norm": 0.5479670166969299, - "learning_rate": 3.564532897668201e-08, - "loss": 0.0607, + "epoch": 9.988118223674439, + "grad_norm": 0.6457740664482117, + "learning_rate": 1.1881776325560672e-08, + "loss": 0.0749, "step": 67250 }, { - "epoch": 4.994801722857567, - "grad_norm": 2.0314948558807373, - "learning_rate": 3.118966285459676e-08, - "loss": 0.0603, + "epoch": 9.989603445715135, + "grad_norm": 1.4911023378372192, + "learning_rate": 1.0396554284865588e-08, + "loss": 0.0494, "step": 67260 }, { - "epoch": 4.995544333877914, - "grad_norm": 0.9696687459945679, - "learning_rate": 2.6733996732511514e-08, - "loss": 0.0572, + "epoch": 9.991088667755829, + "grad_norm": 0.6926244497299194, + "learning_rate": 8.911332244170505e-09, + "loss": 0.047, "step": 67270 }, { - "epoch": 4.996286944898262, - "grad_norm": 1.398507833480835, - "learning_rate": 2.2278330610426256e-08, - "loss": 0.0886, + "epoch": 9.992573889796525, + "grad_norm": 1.2885057926177979, + "learning_rate": 7.426110203475419e-09, + "loss": 0.0553, "step": 67280 }, { - "epoch": 4.99702955591861, - "grad_norm": 1.3897038698196411, - "learning_rate": 1.7822664488341006e-08, - "loss": 0.0742, + "epoch": 9.99405911183722, + "grad_norm": 0.38584211468696594, + "learning_rate": 5.940888162780336e-09, + "loss": 0.052, "step": 67290 }, { - "epoch": 4.997772166938957, - "grad_norm": 1.0769445896148682, - "learning_rate": 1.3366998366255757e-08, - "loss": 0.0252, + "epoch": 9.995544333877914, + "grad_norm": 0.6076240539550781, + "learning_rate": 4.455666122085252e-09, + "loss": 0.0729, "step": 67300 }, { - "epoch": 4.998514777959305, - "grad_norm": 2.8286406993865967, - "learning_rate": 8.911332244170503e-09, - "loss": 0.05, + "epoch": 9.99702955591861, + "grad_norm": 0.7656409740447998, + "learning_rate": 2.970444081390168e-09, + "loss": 0.067, "step": 67310 }, { - "epoch": 4.999257388979652, - "grad_norm": 2.435650110244751, - "learning_rate": 4.4556661220852515e-09, - "loss": 0.0417, + "epoch": 9.998514777959304, + "grad_norm": 0.22936700284481049, + "learning_rate": 1.485222040695084e-09, + "loss": 0.0667, "step": 67320 }, { - "epoch": 5.0, - "grad_norm": 6.435393333435059, + "epoch": 10.0, + "grad_norm": 0.6690473556518555, "learning_rate": 0.0, - "loss": 0.0683, + "loss": 0.0389, "step": 67330 }, { - "epoch": 5.0, - "eval_f1": 0.0, - "eval_loss": 0.05167969688773155, - "eval_runtime": 794.3074, - "eval_samples_per_second": 47.864, - "eval_steps_per_second": 2.993, + "epoch": 10.0, + "eval_accuracy": 0.49727767695099817, + "eval_loss": 0.05480470508337021, + "eval_runtime": 204.2655, + "eval_samples_per_second": 186.125, + "eval_steps_per_second": 5.821, "step": 67330 }, { - "epoch": 5.0, + "epoch": 10.0, "step": 67330, - "total_flos": 8.34383324330106e+19, - "train_loss": 0.0714715420367653, - "train_runtime": 35752.3153, - "train_samples_per_second": 30.13, - "train_steps_per_second": 1.883 + "total_flos": 1.6694802102985576e+20, + "train_loss": 0.0, + "train_runtime": 0.0767, + "train_samples_per_second": 28073613.463, + "train_steps_per_second": 877361.502 } ], "logging_steps": 10, "max_steps": 67330, "num_input_tokens_seen": 0, - "num_train_epochs": 5, + "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { @@ -47211,8 +47256,8 @@ "attributes": {} } }, - "total_flos": 8.34383324330106e+19, - "train_batch_size": 16, + "total_flos": 1.6694802102985576e+20, + "train_batch_size": 32, "trial_name": null, "trial_params": null }