{ "best_metric": 0.05480470508337021, "best_model_checkpoint": "./default_model/checkpoint-67330", "epoch": 10.0, "eval_steps": 500, "global_step": 67330, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0014852220406950838, "grad_norm": 1.9820189476013184, "learning_rate": 9.998514777959307e-06, "loss": 0.3847, "step": 10 }, { "epoch": 0.0029704440813901676, "grad_norm": 1.3927024602890015, "learning_rate": 9.99702955591861e-06, "loss": 0.2215, "step": 20 }, { "epoch": 0.004455666122085252, "grad_norm": 1.5467994213104248, "learning_rate": 9.995544333877916e-06, "loss": 0.1743, "step": 30 }, { "epoch": 0.005940888162780335, "grad_norm": 2.4779608249664307, "learning_rate": 9.99405911183722e-06, "loss": 0.1726, "step": 40 }, { "epoch": 0.007426110203475419, "grad_norm": 1.2798831462860107, "learning_rate": 9.992573889796525e-06, "loss": 0.1409, "step": 50 }, { "epoch": 0.008911332244170504, "grad_norm": 1.459090232849121, "learning_rate": 9.99108866775583e-06, "loss": 0.1564, "step": 60 }, { "epoch": 0.010396554284865587, "grad_norm": 1.5025994777679443, "learning_rate": 9.989603445715135e-06, "loss": 0.148, "step": 70 }, { "epoch": 0.01188177632556067, "grad_norm": 0.8423486948013306, "learning_rate": 9.98811822367444e-06, "loss": 0.1185, "step": 80 }, { "epoch": 0.013366998366255755, "grad_norm": 2.3561830520629883, "learning_rate": 9.986633001633746e-06, "loss": 0.1449, "step": 90 }, { "epoch": 0.014852220406950839, "grad_norm": 1.7730501890182495, "learning_rate": 9.98514777959305e-06, "loss": 0.1161, "step": 100 }, { "epoch": 0.016337442447645924, "grad_norm": 4.120682716369629, "learning_rate": 9.983662557552355e-06, "loss": 0.1291, "step": 110 }, { "epoch": 0.01782266448834101, "grad_norm": 1.2402255535125732, "learning_rate": 9.98217733551166e-06, "loss": 0.1095, "step": 120 }, { "epoch": 0.019307886529036093, "grad_norm": 1.8307442665100098, "learning_rate": 9.980692113470965e-06, "loss": 0.118, "step": 130 }, { "epoch": 0.020793108569731173, "grad_norm": 2.220118761062622, "learning_rate": 9.97920689143027e-06, "loss": 0.1311, "step": 140 }, { "epoch": 0.022278330610426257, "grad_norm": 1.129960060119629, "learning_rate": 9.977721669389576e-06, "loss": 0.1183, "step": 150 }, { "epoch": 0.02376355265112134, "grad_norm": 1.3577617406845093, "learning_rate": 9.976236447348879e-06, "loss": 0.1153, "step": 160 }, { "epoch": 0.025248774691816425, "grad_norm": 2.288347005844116, "learning_rate": 9.974751225308185e-06, "loss": 0.1126, "step": 170 }, { "epoch": 0.02673399673251151, "grad_norm": 1.5640208721160889, "learning_rate": 9.973266003267489e-06, "loss": 0.1199, "step": 180 }, { "epoch": 0.028219218773206593, "grad_norm": 1.3363808393478394, "learning_rate": 9.971780781226794e-06, "loss": 0.109, "step": 190 }, { "epoch": 0.029704440813901677, "grad_norm": 1.5698567628860474, "learning_rate": 9.9702955591861e-06, "loss": 0.121, "step": 200 }, { "epoch": 0.03118966285459676, "grad_norm": 1.7361061573028564, "learning_rate": 9.968810337145404e-06, "loss": 0.1346, "step": 210 }, { "epoch": 0.03267488489529185, "grad_norm": 1.4597039222717285, "learning_rate": 9.967325115104709e-06, "loss": 0.1177, "step": 220 }, { "epoch": 0.03416010693598693, "grad_norm": 0.9901100397109985, "learning_rate": 9.965839893064015e-06, "loss": 0.1083, "step": 230 }, { "epoch": 0.03564532897668202, "grad_norm": 1.2590155601501465, "learning_rate": 9.964354671023319e-06, "loss": 0.1275, "step": 240 }, { "epoch": 0.0371305510173771, "grad_norm": 1.403277039527893, "learning_rate": 9.962869448982624e-06, "loss": 0.125, "step": 250 }, { "epoch": 0.038615773058072185, "grad_norm": 0.7854731678962708, "learning_rate": 9.961384226941928e-06, "loss": 0.1176, "step": 260 }, { "epoch": 0.040100995098767266, "grad_norm": 1.5352309942245483, "learning_rate": 9.959899004901234e-06, "loss": 0.1202, "step": 270 }, { "epoch": 0.041586217139462346, "grad_norm": 2.2147843837738037, "learning_rate": 9.958413782860539e-06, "loss": 0.1113, "step": 280 }, { "epoch": 0.043071439180157434, "grad_norm": 2.406499147415161, "learning_rate": 9.956928560819843e-06, "loss": 0.1182, "step": 290 }, { "epoch": 0.044556661220852514, "grad_norm": 0.9340627193450928, "learning_rate": 9.955443338779149e-06, "loss": 0.1318, "step": 300 }, { "epoch": 0.0460418832615476, "grad_norm": 1.3280363082885742, "learning_rate": 9.953958116738453e-06, "loss": 0.1009, "step": 310 }, { "epoch": 0.04752710530224268, "grad_norm": 2.685227394104004, "learning_rate": 9.952472894697758e-06, "loss": 0.1327, "step": 320 }, { "epoch": 0.04901232734293777, "grad_norm": 2.1112072467803955, "learning_rate": 9.950987672657062e-06, "loss": 0.1405, "step": 330 }, { "epoch": 0.05049754938363285, "grad_norm": 1.7051947116851807, "learning_rate": 9.949502450616368e-06, "loss": 0.1002, "step": 340 }, { "epoch": 0.05198277142432794, "grad_norm": 1.2203819751739502, "learning_rate": 9.948017228575673e-06, "loss": 0.1308, "step": 350 }, { "epoch": 0.05346799346502302, "grad_norm": 1.344948172569275, "learning_rate": 9.946532006534977e-06, "loss": 0.1053, "step": 360 }, { "epoch": 0.054953215505718106, "grad_norm": 1.8618378639221191, "learning_rate": 9.945046784494282e-06, "loss": 0.1331, "step": 370 }, { "epoch": 0.05643843754641319, "grad_norm": 1.7152793407440186, "learning_rate": 9.943561562453588e-06, "loss": 0.1065, "step": 380 }, { "epoch": 0.057923659587108274, "grad_norm": 1.1750344038009644, "learning_rate": 9.942076340412892e-06, "loss": 0.1122, "step": 390 }, { "epoch": 0.059408881627803355, "grad_norm": 1.1157976388931274, "learning_rate": 9.940591118372197e-06, "loss": 0.0954, "step": 400 }, { "epoch": 0.06089410366849844, "grad_norm": 1.8320587873458862, "learning_rate": 9.939105896331503e-06, "loss": 0.1009, "step": 410 }, { "epoch": 0.06237932570919352, "grad_norm": 1.6388369798660278, "learning_rate": 9.937620674290807e-06, "loss": 0.1146, "step": 420 }, { "epoch": 0.0638645477498886, "grad_norm": 1.4120920896530151, "learning_rate": 9.936135452250112e-06, "loss": 0.1177, "step": 430 }, { "epoch": 0.0653497697905837, "grad_norm": 3.5797083377838135, "learning_rate": 9.934650230209418e-06, "loss": 0.0851, "step": 440 }, { "epoch": 0.06683499183127878, "grad_norm": 1.3494925498962402, "learning_rate": 9.933165008168722e-06, "loss": 0.1109, "step": 450 }, { "epoch": 0.06832021387197386, "grad_norm": 2.6755356788635254, "learning_rate": 9.931679786128027e-06, "loss": 0.0913, "step": 460 }, { "epoch": 0.06980543591266894, "grad_norm": 2.111666440963745, "learning_rate": 9.930194564087333e-06, "loss": 0.1176, "step": 470 }, { "epoch": 0.07129065795336403, "grad_norm": 1.5201246738433838, "learning_rate": 9.928709342046636e-06, "loss": 0.1142, "step": 480 }, { "epoch": 0.07277587999405911, "grad_norm": 1.7397887706756592, "learning_rate": 9.927224120005942e-06, "loss": 0.1295, "step": 490 }, { "epoch": 0.0742611020347542, "grad_norm": 0.8776573538780212, "learning_rate": 9.925738897965246e-06, "loss": 0.1038, "step": 500 }, { "epoch": 0.07574632407544928, "grad_norm": 0.9134131669998169, "learning_rate": 9.92425367592455e-06, "loss": 0.1046, "step": 510 }, { "epoch": 0.07723154611614437, "grad_norm": 0.8088383674621582, "learning_rate": 9.922768453883857e-06, "loss": 0.1095, "step": 520 }, { "epoch": 0.07871676815683945, "grad_norm": 1.253274917602539, "learning_rate": 9.921283231843161e-06, "loss": 0.1006, "step": 530 }, { "epoch": 0.08020199019753453, "grad_norm": 0.9179076552391052, "learning_rate": 9.919798009802466e-06, "loss": 0.0918, "step": 540 }, { "epoch": 0.08168721223822961, "grad_norm": 2.49874210357666, "learning_rate": 9.918312787761772e-06, "loss": 0.0909, "step": 550 }, { "epoch": 0.08317243427892469, "grad_norm": 1.1081864833831787, "learning_rate": 9.916827565721076e-06, "loss": 0.0909, "step": 560 }, { "epoch": 0.08465765631961979, "grad_norm": 0.7937701940536499, "learning_rate": 9.91534234368038e-06, "loss": 0.1153, "step": 570 }, { "epoch": 0.08614287836031487, "grad_norm": 1.2835909128189087, "learning_rate": 9.913857121639687e-06, "loss": 0.1001, "step": 580 }, { "epoch": 0.08762810040100995, "grad_norm": 2.2040748596191406, "learning_rate": 9.912371899598991e-06, "loss": 0.1065, "step": 590 }, { "epoch": 0.08911332244170503, "grad_norm": 1.055566430091858, "learning_rate": 9.910886677558296e-06, "loss": 0.1175, "step": 600 }, { "epoch": 0.09059854448240012, "grad_norm": 0.8433722853660583, "learning_rate": 9.909401455517602e-06, "loss": 0.1111, "step": 610 }, { "epoch": 0.0920837665230952, "grad_norm": 2.4051921367645264, "learning_rate": 9.907916233476904e-06, "loss": 0.1074, "step": 620 }, { "epoch": 0.09356898856379028, "grad_norm": 1.3250362873077393, "learning_rate": 9.90643101143621e-06, "loss": 0.1202, "step": 630 }, { "epoch": 0.09505421060448536, "grad_norm": 1.2202482223510742, "learning_rate": 9.904945789395515e-06, "loss": 0.114, "step": 640 }, { "epoch": 0.09653943264518046, "grad_norm": 1.8007020950317383, "learning_rate": 9.90346056735482e-06, "loss": 0.0916, "step": 650 }, { "epoch": 0.09802465468587554, "grad_norm": 1.4209966659545898, "learning_rate": 9.901975345314126e-06, "loss": 0.0859, "step": 660 }, { "epoch": 0.09950987672657062, "grad_norm": 2.0150651931762695, "learning_rate": 9.90049012327343e-06, "loss": 0.1268, "step": 670 }, { "epoch": 0.1009950987672657, "grad_norm": 1.1411141157150269, "learning_rate": 9.899004901232734e-06, "loss": 0.1089, "step": 680 }, { "epoch": 0.1024803208079608, "grad_norm": 2.234036922454834, "learning_rate": 9.89751967919204e-06, "loss": 0.1017, "step": 690 }, { "epoch": 0.10396554284865588, "grad_norm": 1.515994906425476, "learning_rate": 9.896034457151345e-06, "loss": 0.1002, "step": 700 }, { "epoch": 0.10545076488935096, "grad_norm": 1.4167280197143555, "learning_rate": 9.89454923511065e-06, "loss": 0.0884, "step": 710 }, { "epoch": 0.10693598693004604, "grad_norm": 1.0119879245758057, "learning_rate": 9.893064013069956e-06, "loss": 0.0975, "step": 720 }, { "epoch": 0.10842120897074113, "grad_norm": 0.941593587398529, "learning_rate": 9.89157879102926e-06, "loss": 0.0978, "step": 730 }, { "epoch": 0.10990643101143621, "grad_norm": 3.35498309135437, "learning_rate": 9.890093568988564e-06, "loss": 0.111, "step": 740 }, { "epoch": 0.11139165305213129, "grad_norm": 0.6840288043022156, "learning_rate": 9.88860834694787e-06, "loss": 0.085, "step": 750 }, { "epoch": 0.11287687509282637, "grad_norm": 1.4142742156982422, "learning_rate": 9.887123124907175e-06, "loss": 0.1101, "step": 760 }, { "epoch": 0.11436209713352147, "grad_norm": 0.8787768483161926, "learning_rate": 9.88563790286648e-06, "loss": 0.0939, "step": 770 }, { "epoch": 0.11584731917421655, "grad_norm": 1.280203104019165, "learning_rate": 9.884152680825784e-06, "loss": 0.0813, "step": 780 }, { "epoch": 0.11733254121491163, "grad_norm": 1.72808837890625, "learning_rate": 9.882667458785088e-06, "loss": 0.1089, "step": 790 }, { "epoch": 0.11881776325560671, "grad_norm": 0.944834291934967, "learning_rate": 9.881182236744394e-06, "loss": 0.1104, "step": 800 }, { "epoch": 0.1203029852963018, "grad_norm": 1.4327675104141235, "learning_rate": 9.879697014703699e-06, "loss": 0.1108, "step": 810 }, { "epoch": 0.12178820733699688, "grad_norm": 0.7821674346923828, "learning_rate": 9.878211792663003e-06, "loss": 0.1136, "step": 820 }, { "epoch": 0.12327342937769196, "grad_norm": 0.8276019096374512, "learning_rate": 9.87672657062231e-06, "loss": 0.0694, "step": 830 }, { "epoch": 0.12475865141838705, "grad_norm": 2.3967127799987793, "learning_rate": 9.875241348581614e-06, "loss": 0.0881, "step": 840 }, { "epoch": 0.12624387345908214, "grad_norm": 1.1571794748306274, "learning_rate": 9.873756126540918e-06, "loss": 0.0886, "step": 850 }, { "epoch": 0.1277290954997772, "grad_norm": 1.8005186319351196, "learning_rate": 9.872270904500224e-06, "loss": 0.1016, "step": 860 }, { "epoch": 0.1292143175404723, "grad_norm": 0.8932623863220215, "learning_rate": 9.870785682459529e-06, "loss": 0.0908, "step": 870 }, { "epoch": 0.1306995395811674, "grad_norm": 1.1731626987457275, "learning_rate": 9.869300460418833e-06, "loss": 0.0887, "step": 880 }, { "epoch": 0.13218476162186246, "grad_norm": 0.9981728196144104, "learning_rate": 9.867815238378138e-06, "loss": 0.0945, "step": 890 }, { "epoch": 0.13366998366255756, "grad_norm": 0.8230689167976379, "learning_rate": 9.866330016337444e-06, "loss": 0.1331, "step": 900 }, { "epoch": 0.13515520570325262, "grad_norm": 0.873444676399231, "learning_rate": 9.864844794296748e-06, "loss": 0.0934, "step": 910 }, { "epoch": 0.13664042774394772, "grad_norm": 1.5538525581359863, "learning_rate": 9.863359572256053e-06, "loss": 0.0865, "step": 920 }, { "epoch": 0.1381256497846428, "grad_norm": 1.2409332990646362, "learning_rate": 9.861874350215357e-06, "loss": 0.0838, "step": 930 }, { "epoch": 0.13961087182533788, "grad_norm": 1.0957996845245361, "learning_rate": 9.860389128174663e-06, "loss": 0.1009, "step": 940 }, { "epoch": 0.14109609386603297, "grad_norm": 0.722710371017456, "learning_rate": 9.858903906133968e-06, "loss": 0.0902, "step": 950 }, { "epoch": 0.14258131590672807, "grad_norm": 1.6766164302825928, "learning_rate": 9.857418684093272e-06, "loss": 0.1139, "step": 960 }, { "epoch": 0.14406653794742313, "grad_norm": 1.146504521369934, "learning_rate": 9.855933462052578e-06, "loss": 0.1004, "step": 970 }, { "epoch": 0.14555175998811823, "grad_norm": 2.164172410964966, "learning_rate": 9.854448240011883e-06, "loss": 0.0968, "step": 980 }, { "epoch": 0.1470369820288133, "grad_norm": 1.636353611946106, "learning_rate": 9.852963017971187e-06, "loss": 0.1029, "step": 990 }, { "epoch": 0.1485222040695084, "grad_norm": 0.8112787008285522, "learning_rate": 9.851477795930492e-06, "loss": 0.1033, "step": 1000 }, { "epoch": 0.15000742611020348, "grad_norm": 1.385244607925415, "learning_rate": 9.849992573889798e-06, "loss": 0.0872, "step": 1010 }, { "epoch": 0.15149264815089855, "grad_norm": 2.009169816970825, "learning_rate": 9.848507351849102e-06, "loss": 0.0953, "step": 1020 }, { "epoch": 0.15297787019159365, "grad_norm": 0.788632333278656, "learning_rate": 9.847022129808407e-06, "loss": 0.092, "step": 1030 }, { "epoch": 0.15446309223228874, "grad_norm": 1.4505847692489624, "learning_rate": 9.845536907767713e-06, "loss": 0.1187, "step": 1040 }, { "epoch": 0.1559483142729838, "grad_norm": 1.9600352048873901, "learning_rate": 9.844051685727017e-06, "loss": 0.0951, "step": 1050 }, { "epoch": 0.1574335363136789, "grad_norm": 1.3148131370544434, "learning_rate": 9.842566463686322e-06, "loss": 0.1247, "step": 1060 }, { "epoch": 0.15891875835437397, "grad_norm": 0.83713698387146, "learning_rate": 9.841081241645628e-06, "loss": 0.0838, "step": 1070 }, { "epoch": 0.16040398039506906, "grad_norm": 0.9304842352867126, "learning_rate": 9.839596019604932e-06, "loss": 0.1138, "step": 1080 }, { "epoch": 0.16188920243576416, "grad_norm": 1.4497487545013428, "learning_rate": 9.838110797564237e-06, "loss": 0.0867, "step": 1090 }, { "epoch": 0.16337442447645922, "grad_norm": 1.3044580221176147, "learning_rate": 9.836625575523541e-06, "loss": 0.0913, "step": 1100 }, { "epoch": 0.16485964651715432, "grad_norm": 0.8657674193382263, "learning_rate": 9.835140353482845e-06, "loss": 0.0907, "step": 1110 }, { "epoch": 0.16634486855784938, "grad_norm": 1.2274751663208008, "learning_rate": 9.833655131442152e-06, "loss": 0.1128, "step": 1120 }, { "epoch": 0.16783009059854448, "grad_norm": 1.1105313301086426, "learning_rate": 9.832169909401456e-06, "loss": 0.1022, "step": 1130 }, { "epoch": 0.16931531263923957, "grad_norm": 1.0874226093292236, "learning_rate": 9.83068468736076e-06, "loss": 0.1068, "step": 1140 }, { "epoch": 0.17080053467993464, "grad_norm": 0.47556501626968384, "learning_rate": 9.829199465320067e-06, "loss": 0.0902, "step": 1150 }, { "epoch": 0.17228575672062973, "grad_norm": 1.3352073431015015, "learning_rate": 9.827714243279371e-06, "loss": 0.0985, "step": 1160 }, { "epoch": 0.17377097876132483, "grad_norm": 1.3669053316116333, "learning_rate": 9.826229021238675e-06, "loss": 0.0974, "step": 1170 }, { "epoch": 0.1752562008020199, "grad_norm": 1.1422746181488037, "learning_rate": 9.824743799197982e-06, "loss": 0.105, "step": 1180 }, { "epoch": 0.176741422842715, "grad_norm": 1.4345518350601196, "learning_rate": 9.823258577157286e-06, "loss": 0.0856, "step": 1190 }, { "epoch": 0.17822664488341006, "grad_norm": 0.863542377948761, "learning_rate": 9.82177335511659e-06, "loss": 0.1197, "step": 1200 }, { "epoch": 0.17971186692410515, "grad_norm": 1.3736563920974731, "learning_rate": 9.820288133075897e-06, "loss": 0.0897, "step": 1210 }, { "epoch": 0.18119708896480025, "grad_norm": 0.5827713012695312, "learning_rate": 9.8188029110352e-06, "loss": 0.073, "step": 1220 }, { "epoch": 0.1826823110054953, "grad_norm": 1.133959412574768, "learning_rate": 9.817317688994505e-06, "loss": 0.0877, "step": 1230 }, { "epoch": 0.1841675330461904, "grad_norm": 1.5258187055587769, "learning_rate": 9.815832466953812e-06, "loss": 0.0935, "step": 1240 }, { "epoch": 0.1856527550868855, "grad_norm": 1.1951026916503906, "learning_rate": 9.814347244913114e-06, "loss": 0.0975, "step": 1250 }, { "epoch": 0.18713797712758057, "grad_norm": 0.7857174277305603, "learning_rate": 9.81286202287242e-06, "loss": 0.0899, "step": 1260 }, { "epoch": 0.18862319916827566, "grad_norm": 1.260588526725769, "learning_rate": 9.811376800831725e-06, "loss": 0.0998, "step": 1270 }, { "epoch": 0.19010842120897073, "grad_norm": 0.8027825355529785, "learning_rate": 9.80989157879103e-06, "loss": 0.09, "step": 1280 }, { "epoch": 0.19159364324966582, "grad_norm": 1.722459316253662, "learning_rate": 9.808406356750335e-06, "loss": 0.097, "step": 1290 }, { "epoch": 0.19307886529036092, "grad_norm": 0.8458243012428284, "learning_rate": 9.80692113470964e-06, "loss": 0.0969, "step": 1300 }, { "epoch": 0.19456408733105598, "grad_norm": 0.9626701474189758, "learning_rate": 9.805435912668944e-06, "loss": 0.0942, "step": 1310 }, { "epoch": 0.19604930937175108, "grad_norm": 0.7567682862281799, "learning_rate": 9.80395069062825e-06, "loss": 0.1092, "step": 1320 }, { "epoch": 0.19753453141244617, "grad_norm": 1.9160317182540894, "learning_rate": 9.802465468587555e-06, "loss": 0.0882, "step": 1330 }, { "epoch": 0.19901975345314124, "grad_norm": 1.1188064813613892, "learning_rate": 9.80098024654686e-06, "loss": 0.0983, "step": 1340 }, { "epoch": 0.20050497549383633, "grad_norm": 1.1348779201507568, "learning_rate": 9.799495024506165e-06, "loss": 0.1138, "step": 1350 }, { "epoch": 0.2019901975345314, "grad_norm": 0.7097839117050171, "learning_rate": 9.79800980246547e-06, "loss": 0.0989, "step": 1360 }, { "epoch": 0.2034754195752265, "grad_norm": 1.01082444190979, "learning_rate": 9.796524580424774e-06, "loss": 0.0803, "step": 1370 }, { "epoch": 0.2049606416159216, "grad_norm": 0.8688536286354065, "learning_rate": 9.79503935838408e-06, "loss": 0.1079, "step": 1380 }, { "epoch": 0.20644586365661666, "grad_norm": 0.9843276143074036, "learning_rate": 9.793554136343383e-06, "loss": 0.0917, "step": 1390 }, { "epoch": 0.20793108569731175, "grad_norm": 1.4539355039596558, "learning_rate": 9.79206891430269e-06, "loss": 0.1013, "step": 1400 }, { "epoch": 0.20941630773800682, "grad_norm": 0.6660147905349731, "learning_rate": 9.790583692261994e-06, "loss": 0.0806, "step": 1410 }, { "epoch": 0.2109015297787019, "grad_norm": 1.042614459991455, "learning_rate": 9.789098470221298e-06, "loss": 0.083, "step": 1420 }, { "epoch": 0.212386751819397, "grad_norm": 1.052061676979065, "learning_rate": 9.787613248180604e-06, "loss": 0.1052, "step": 1430 }, { "epoch": 0.21387197386009207, "grad_norm": 0.9673222303390503, "learning_rate": 9.786128026139909e-06, "loss": 0.0957, "step": 1440 }, { "epoch": 0.21535719590078717, "grad_norm": 1.9790165424346924, "learning_rate": 9.784642804099213e-06, "loss": 0.0825, "step": 1450 }, { "epoch": 0.21684241794148226, "grad_norm": 1.3546462059020996, "learning_rate": 9.78315758205852e-06, "loss": 0.0858, "step": 1460 }, { "epoch": 0.21832763998217733, "grad_norm": 1.302915096282959, "learning_rate": 9.781672360017824e-06, "loss": 0.0983, "step": 1470 }, { "epoch": 0.21981286202287242, "grad_norm": 0.818485677242279, "learning_rate": 9.780187137977128e-06, "loss": 0.0979, "step": 1480 }, { "epoch": 0.2212980840635675, "grad_norm": 1.091336965560913, "learning_rate": 9.778701915936434e-06, "loss": 0.0885, "step": 1490 }, { "epoch": 0.22278330610426259, "grad_norm": 2.4682865142822266, "learning_rate": 9.777216693895739e-06, "loss": 0.1194, "step": 1500 }, { "epoch": 0.22426852814495768, "grad_norm": 1.5554701089859009, "learning_rate": 9.775731471855043e-06, "loss": 0.1125, "step": 1510 }, { "epoch": 0.22575375018565275, "grad_norm": 1.6344841718673706, "learning_rate": 9.774246249814348e-06, "loss": 0.1015, "step": 1520 }, { "epoch": 0.22723897222634784, "grad_norm": 1.3307982683181763, "learning_rate": 9.772761027773654e-06, "loss": 0.0928, "step": 1530 }, { "epoch": 0.22872419426704294, "grad_norm": 1.5012156963348389, "learning_rate": 9.771275805732958e-06, "loss": 0.0976, "step": 1540 }, { "epoch": 0.230209416307738, "grad_norm": 1.1061965227127075, "learning_rate": 9.769790583692263e-06, "loss": 0.0881, "step": 1550 }, { "epoch": 0.2316946383484331, "grad_norm": 0.9800447225570679, "learning_rate": 9.768305361651567e-06, "loss": 0.094, "step": 1560 }, { "epoch": 0.23317986038912816, "grad_norm": 1.0607389211654663, "learning_rate": 9.766820139610873e-06, "loss": 0.0953, "step": 1570 }, { "epoch": 0.23466508242982326, "grad_norm": 1.320814847946167, "learning_rate": 9.765334917570178e-06, "loss": 0.0763, "step": 1580 }, { "epoch": 0.23615030447051835, "grad_norm": 0.9268600940704346, "learning_rate": 9.763849695529482e-06, "loss": 0.0991, "step": 1590 }, { "epoch": 0.23763552651121342, "grad_norm": 1.741870641708374, "learning_rate": 9.762364473488788e-06, "loss": 0.1003, "step": 1600 }, { "epoch": 0.2391207485519085, "grad_norm": 0.7823505401611328, "learning_rate": 9.760879251448093e-06, "loss": 0.1073, "step": 1610 }, { "epoch": 0.2406059705926036, "grad_norm": 1.4746675491333008, "learning_rate": 9.759394029407397e-06, "loss": 0.0953, "step": 1620 }, { "epoch": 0.24209119263329867, "grad_norm": 1.5444601774215698, "learning_rate": 9.757908807366701e-06, "loss": 0.076, "step": 1630 }, { "epoch": 0.24357641467399377, "grad_norm": 0.9794516563415527, "learning_rate": 9.756423585326008e-06, "loss": 0.1012, "step": 1640 }, { "epoch": 0.24506163671468884, "grad_norm": 0.9550230503082275, "learning_rate": 9.754938363285312e-06, "loss": 0.0837, "step": 1650 }, { "epoch": 0.24654685875538393, "grad_norm": 0.9263174533843994, "learning_rate": 9.753453141244616e-06, "loss": 0.0787, "step": 1660 }, { "epoch": 0.24803208079607902, "grad_norm": 1.3556021451950073, "learning_rate": 9.751967919203923e-06, "loss": 0.1044, "step": 1670 }, { "epoch": 0.2495173028367741, "grad_norm": 0.7733617424964905, "learning_rate": 9.750482697163227e-06, "loss": 0.0835, "step": 1680 }, { "epoch": 0.25100252487746916, "grad_norm": 0.9405840635299683, "learning_rate": 9.748997475122531e-06, "loss": 0.0957, "step": 1690 }, { "epoch": 0.2524877469181643, "grad_norm": 0.7323219180107117, "learning_rate": 9.747512253081838e-06, "loss": 0.0863, "step": 1700 }, { "epoch": 0.25397296895885935, "grad_norm": 1.754362940788269, "learning_rate": 9.74602703104114e-06, "loss": 0.0904, "step": 1710 }, { "epoch": 0.2554581909995544, "grad_norm": 0.9422330856323242, "learning_rate": 9.744541809000446e-06, "loss": 0.118, "step": 1720 }, { "epoch": 0.25694341304024954, "grad_norm": 1.2818480730056763, "learning_rate": 9.74305658695975e-06, "loss": 0.0933, "step": 1730 }, { "epoch": 0.2584286350809446, "grad_norm": 0.8700027465820312, "learning_rate": 9.741571364919055e-06, "loss": 0.0863, "step": 1740 }, { "epoch": 0.25991385712163967, "grad_norm": 1.0074703693389893, "learning_rate": 9.740086142878361e-06, "loss": 0.1151, "step": 1750 }, { "epoch": 0.2613990791623348, "grad_norm": 1.1355704069137573, "learning_rate": 9.738600920837666e-06, "loss": 0.0921, "step": 1760 }, { "epoch": 0.26288430120302986, "grad_norm": 0.9470556378364563, "learning_rate": 9.73711569879697e-06, "loss": 0.0763, "step": 1770 }, { "epoch": 0.2643695232437249, "grad_norm": 1.5006542205810547, "learning_rate": 9.735630476756276e-06, "loss": 0.076, "step": 1780 }, { "epoch": 0.26585474528442005, "grad_norm": 0.9084158539772034, "learning_rate": 9.73414525471558e-06, "loss": 0.091, "step": 1790 }, { "epoch": 0.2673399673251151, "grad_norm": 1.3258038759231567, "learning_rate": 9.732660032674885e-06, "loss": 0.1342, "step": 1800 }, { "epoch": 0.2688251893658102, "grad_norm": 0.9406817555427551, "learning_rate": 9.731174810634191e-06, "loss": 0.0979, "step": 1810 }, { "epoch": 0.27031041140650525, "grad_norm": 0.9855642318725586, "learning_rate": 9.729689588593496e-06, "loss": 0.078, "step": 1820 }, { "epoch": 0.27179563344720037, "grad_norm": 0.9892807006835938, "learning_rate": 9.7282043665528e-06, "loss": 0.0977, "step": 1830 }, { "epoch": 0.27328085548789544, "grad_norm": 0.8949669599533081, "learning_rate": 9.726719144512106e-06, "loss": 0.0822, "step": 1840 }, { "epoch": 0.2747660775285905, "grad_norm": 1.159778118133545, "learning_rate": 9.725233922471409e-06, "loss": 0.1103, "step": 1850 }, { "epoch": 0.2762512995692856, "grad_norm": 1.6085240840911865, "learning_rate": 9.723748700430715e-06, "loss": 0.0864, "step": 1860 }, { "epoch": 0.2777365216099807, "grad_norm": 1.4303113222122192, "learning_rate": 9.72226347839002e-06, "loss": 0.101, "step": 1870 }, { "epoch": 0.27922174365067576, "grad_norm": 1.550205111503601, "learning_rate": 9.720778256349324e-06, "loss": 0.0782, "step": 1880 }, { "epoch": 0.2807069656913709, "grad_norm": 1.5073113441467285, "learning_rate": 9.71929303430863e-06, "loss": 0.0986, "step": 1890 }, { "epoch": 0.28219218773206595, "grad_norm": 1.2778253555297852, "learning_rate": 9.717807812267935e-06, "loss": 0.0731, "step": 1900 }, { "epoch": 0.283677409772761, "grad_norm": 0.669276237487793, "learning_rate": 9.716322590227239e-06, "loss": 0.0902, "step": 1910 }, { "epoch": 0.28516263181345614, "grad_norm": 1.8269588947296143, "learning_rate": 9.714837368186545e-06, "loss": 0.0975, "step": 1920 }, { "epoch": 0.2866478538541512, "grad_norm": 1.0207661390304565, "learning_rate": 9.71335214614585e-06, "loss": 0.0947, "step": 1930 }, { "epoch": 0.28813307589484627, "grad_norm": 1.4014843702316284, "learning_rate": 9.711866924105154e-06, "loss": 0.0655, "step": 1940 }, { "epoch": 0.2896182979355414, "grad_norm": 1.199450969696045, "learning_rate": 9.71038170206446e-06, "loss": 0.0831, "step": 1950 }, { "epoch": 0.29110351997623646, "grad_norm": 0.9979912638664246, "learning_rate": 9.708896480023765e-06, "loss": 0.0745, "step": 1960 }, { "epoch": 0.2925887420169315, "grad_norm": 0.9386908411979675, "learning_rate": 9.707411257983069e-06, "loss": 0.0934, "step": 1970 }, { "epoch": 0.2940739640576266, "grad_norm": 0.9256591200828552, "learning_rate": 9.705926035942375e-06, "loss": 0.1039, "step": 1980 }, { "epoch": 0.2955591860983217, "grad_norm": 1.0602842569351196, "learning_rate": 9.70444081390168e-06, "loss": 0.1308, "step": 1990 }, { "epoch": 0.2970444081390168, "grad_norm": 0.7016430497169495, "learning_rate": 9.702955591860984e-06, "loss": 0.0987, "step": 2000 }, { "epoch": 0.29852963017971185, "grad_norm": 1.3720355033874512, "learning_rate": 9.70147036982029e-06, "loss": 0.1047, "step": 2010 }, { "epoch": 0.30001485222040697, "grad_norm": 0.9285506010055542, "learning_rate": 9.699985147779593e-06, "loss": 0.0884, "step": 2020 }, { "epoch": 0.30150007426110204, "grad_norm": 0.745842456817627, "learning_rate": 9.698499925738899e-06, "loss": 0.0887, "step": 2030 }, { "epoch": 0.3029852963017971, "grad_norm": 1.2370271682739258, "learning_rate": 9.697014703698203e-06, "loss": 0.1043, "step": 2040 }, { "epoch": 0.3044705183424922, "grad_norm": 0.888309121131897, "learning_rate": 9.695529481657508e-06, "loss": 0.1045, "step": 2050 }, { "epoch": 0.3059557403831873, "grad_norm": 0.7352439165115356, "learning_rate": 9.694044259616814e-06, "loss": 0.0787, "step": 2060 }, { "epoch": 0.30744096242388236, "grad_norm": 0.7865113019943237, "learning_rate": 9.692559037576118e-06, "loss": 0.0764, "step": 2070 }, { "epoch": 0.3089261844645775, "grad_norm": 1.0966145992279053, "learning_rate": 9.691073815535423e-06, "loss": 0.0982, "step": 2080 }, { "epoch": 0.31041140650527255, "grad_norm": 0.7263454794883728, "learning_rate": 9.689588593494729e-06, "loss": 0.1118, "step": 2090 }, { "epoch": 0.3118966285459676, "grad_norm": 1.5336846113204956, "learning_rate": 9.688103371454033e-06, "loss": 0.1068, "step": 2100 }, { "epoch": 0.3133818505866627, "grad_norm": 1.1668576002120972, "learning_rate": 9.686618149413338e-06, "loss": 0.1032, "step": 2110 }, { "epoch": 0.3148670726273578, "grad_norm": 0.7677258849143982, "learning_rate": 9.685132927372644e-06, "loss": 0.0741, "step": 2120 }, { "epoch": 0.31635229466805287, "grad_norm": 1.1100083589553833, "learning_rate": 9.683647705331948e-06, "loss": 0.0863, "step": 2130 }, { "epoch": 0.31783751670874794, "grad_norm": 1.2412970066070557, "learning_rate": 9.682162483291253e-06, "loss": 0.0939, "step": 2140 }, { "epoch": 0.31932273874944306, "grad_norm": 0.913336455821991, "learning_rate": 9.680677261250557e-06, "loss": 0.0633, "step": 2150 }, { "epoch": 0.3208079607901381, "grad_norm": 0.5790470242500305, "learning_rate": 9.679192039209862e-06, "loss": 0.1064, "step": 2160 }, { "epoch": 0.3222931828308332, "grad_norm": 1.0939009189605713, "learning_rate": 9.677706817169168e-06, "loss": 0.1065, "step": 2170 }, { "epoch": 0.3237784048715283, "grad_norm": 1.2712286710739136, "learning_rate": 9.676221595128472e-06, "loss": 0.1024, "step": 2180 }, { "epoch": 0.3252636269122234, "grad_norm": 1.3349504470825195, "learning_rate": 9.674736373087777e-06, "loss": 0.089, "step": 2190 }, { "epoch": 0.32674884895291845, "grad_norm": 2.0255956649780273, "learning_rate": 9.673251151047083e-06, "loss": 0.1185, "step": 2200 }, { "epoch": 0.32823407099361357, "grad_norm": 0.5723013281822205, "learning_rate": 9.671765929006387e-06, "loss": 0.0564, "step": 2210 }, { "epoch": 0.32971929303430864, "grad_norm": 1.5544242858886719, "learning_rate": 9.670280706965692e-06, "loss": 0.1056, "step": 2220 }, { "epoch": 0.3312045150750037, "grad_norm": 1.538115382194519, "learning_rate": 9.668795484924996e-06, "loss": 0.0859, "step": 2230 }, { "epoch": 0.33268973711569877, "grad_norm": 0.458325058221817, "learning_rate": 9.667310262884302e-06, "loss": 0.1003, "step": 2240 }, { "epoch": 0.3341749591563939, "grad_norm": 1.3945813179016113, "learning_rate": 9.665825040843607e-06, "loss": 0.0995, "step": 2250 }, { "epoch": 0.33566018119708896, "grad_norm": 1.528809905052185, "learning_rate": 9.664339818802911e-06, "loss": 0.0809, "step": 2260 }, { "epoch": 0.337145403237784, "grad_norm": 0.4207174479961395, "learning_rate": 9.662854596762217e-06, "loss": 0.089, "step": 2270 }, { "epoch": 0.33863062527847915, "grad_norm": 0.9507777094841003, "learning_rate": 9.661369374721522e-06, "loss": 0.0842, "step": 2280 }, { "epoch": 0.3401158473191742, "grad_norm": 0.9284889698028564, "learning_rate": 9.659884152680826e-06, "loss": 0.0803, "step": 2290 }, { "epoch": 0.3416010693598693, "grad_norm": 0.8482735753059387, "learning_rate": 9.658398930640132e-06, "loss": 0.0951, "step": 2300 }, { "epoch": 0.3430862914005644, "grad_norm": 2.1062092781066895, "learning_rate": 9.656913708599437e-06, "loss": 0.095, "step": 2310 }, { "epoch": 0.34457151344125947, "grad_norm": 1.5955981016159058, "learning_rate": 9.655428486558741e-06, "loss": 0.101, "step": 2320 }, { "epoch": 0.34605673548195454, "grad_norm": 1.2699885368347168, "learning_rate": 9.653943264518046e-06, "loss": 0.0987, "step": 2330 }, { "epoch": 0.34754195752264966, "grad_norm": 0.6988068222999573, "learning_rate": 9.65245804247735e-06, "loss": 0.0866, "step": 2340 }, { "epoch": 0.3490271795633447, "grad_norm": 1.0660147666931152, "learning_rate": 9.650972820436656e-06, "loss": 0.0965, "step": 2350 }, { "epoch": 0.3505124016040398, "grad_norm": 0.8632172346115112, "learning_rate": 9.64948759839596e-06, "loss": 0.0884, "step": 2360 }, { "epoch": 0.3519976236447349, "grad_norm": 0.8742761015892029, "learning_rate": 9.648002376355265e-06, "loss": 0.0929, "step": 2370 }, { "epoch": 0.35348284568543, "grad_norm": 1.3093167543411255, "learning_rate": 9.646517154314571e-06, "loss": 0.1031, "step": 2380 }, { "epoch": 0.35496806772612505, "grad_norm": 0.9593400359153748, "learning_rate": 9.645031932273876e-06, "loss": 0.0925, "step": 2390 }, { "epoch": 0.3564532897668201, "grad_norm": 1.3659998178482056, "learning_rate": 9.64354671023318e-06, "loss": 0.0825, "step": 2400 }, { "epoch": 0.35793851180751524, "grad_norm": 0.9901537299156189, "learning_rate": 9.642061488192486e-06, "loss": 0.088, "step": 2410 }, { "epoch": 0.3594237338482103, "grad_norm": 1.5357334613800049, "learning_rate": 9.64057626615179e-06, "loss": 0.0786, "step": 2420 }, { "epoch": 0.36090895588890537, "grad_norm": 0.3341294825077057, "learning_rate": 9.639091044111095e-06, "loss": 0.0797, "step": 2430 }, { "epoch": 0.3623941779296005, "grad_norm": 1.1990760564804077, "learning_rate": 9.637605822070401e-06, "loss": 0.0966, "step": 2440 }, { "epoch": 0.36387939997029556, "grad_norm": 0.9783576726913452, "learning_rate": 9.636120600029704e-06, "loss": 0.0957, "step": 2450 }, { "epoch": 0.3653646220109906, "grad_norm": 0.8899098634719849, "learning_rate": 9.63463537798901e-06, "loss": 0.0987, "step": 2460 }, { "epoch": 0.36684984405168575, "grad_norm": 0.6533779501914978, "learning_rate": 9.633150155948316e-06, "loss": 0.0862, "step": 2470 }, { "epoch": 0.3683350660923808, "grad_norm": 0.6127310991287231, "learning_rate": 9.631664933907619e-06, "loss": 0.0803, "step": 2480 }, { "epoch": 0.3698202881330759, "grad_norm": 1.53936767578125, "learning_rate": 9.630179711866925e-06, "loss": 0.0991, "step": 2490 }, { "epoch": 0.371305510173771, "grad_norm": 0.5968347191810608, "learning_rate": 9.62869448982623e-06, "loss": 0.0865, "step": 2500 }, { "epoch": 0.37279073221446607, "grad_norm": 1.7111512422561646, "learning_rate": 9.627209267785534e-06, "loss": 0.0959, "step": 2510 }, { "epoch": 0.37427595425516114, "grad_norm": 1.5803933143615723, "learning_rate": 9.62572404574484e-06, "loss": 0.1062, "step": 2520 }, { "epoch": 0.3757611762958562, "grad_norm": 1.0247458219528198, "learning_rate": 9.624238823704144e-06, "loss": 0.0937, "step": 2530 }, { "epoch": 0.3772463983365513, "grad_norm": 0.9361761212348938, "learning_rate": 9.622753601663449e-06, "loss": 0.075, "step": 2540 }, { "epoch": 0.3787316203772464, "grad_norm": 1.0401147603988647, "learning_rate": 9.621268379622755e-06, "loss": 0.0856, "step": 2550 }, { "epoch": 0.38021684241794146, "grad_norm": 0.7037819623947144, "learning_rate": 9.61978315758206e-06, "loss": 0.0908, "step": 2560 }, { "epoch": 0.3817020644586366, "grad_norm": 1.608916163444519, "learning_rate": 9.618297935541364e-06, "loss": 0.0975, "step": 2570 }, { "epoch": 0.38318728649933165, "grad_norm": 2.890395402908325, "learning_rate": 9.61681271350067e-06, "loss": 0.1023, "step": 2580 }, { "epoch": 0.3846725085400267, "grad_norm": 0.8799918293952942, "learning_rate": 9.615327491459974e-06, "loss": 0.0717, "step": 2590 }, { "epoch": 0.38615773058072184, "grad_norm": 1.2562460899353027, "learning_rate": 9.613842269419279e-06, "loss": 0.0885, "step": 2600 }, { "epoch": 0.3876429526214169, "grad_norm": 0.7879093289375305, "learning_rate": 9.612357047378585e-06, "loss": 0.0812, "step": 2610 }, { "epoch": 0.38912817466211197, "grad_norm": 2.1101765632629395, "learning_rate": 9.610871825337888e-06, "loss": 0.0847, "step": 2620 }, { "epoch": 0.3906133967028071, "grad_norm": 1.0832703113555908, "learning_rate": 9.609386603297194e-06, "loss": 0.088, "step": 2630 }, { "epoch": 0.39209861874350216, "grad_norm": 0.7375301718711853, "learning_rate": 9.6079013812565e-06, "loss": 0.0829, "step": 2640 }, { "epoch": 0.3935838407841972, "grad_norm": 1.1385936737060547, "learning_rate": 9.606416159215803e-06, "loss": 0.0845, "step": 2650 }, { "epoch": 0.39506906282489235, "grad_norm": 1.1508512496948242, "learning_rate": 9.604930937175109e-06, "loss": 0.0804, "step": 2660 }, { "epoch": 0.3965542848655874, "grad_norm": 1.269167184829712, "learning_rate": 9.603445715134413e-06, "loss": 0.0721, "step": 2670 }, { "epoch": 0.3980395069062825, "grad_norm": 0.8906748294830322, "learning_rate": 9.601960493093718e-06, "loss": 0.0755, "step": 2680 }, { "epoch": 0.39952472894697755, "grad_norm": 0.6662545204162598, "learning_rate": 9.600475271053024e-06, "loss": 0.0669, "step": 2690 }, { "epoch": 0.40100995098767267, "grad_norm": 1.1794975996017456, "learning_rate": 9.598990049012328e-06, "loss": 0.1041, "step": 2700 }, { "epoch": 0.40249517302836774, "grad_norm": 0.8003746867179871, "learning_rate": 9.597504826971633e-06, "loss": 0.0861, "step": 2710 }, { "epoch": 0.4039803950690628, "grad_norm": 0.7886612415313721, "learning_rate": 9.596019604930939e-06, "loss": 0.0897, "step": 2720 }, { "epoch": 0.4054656171097579, "grad_norm": 1.1297708749771118, "learning_rate": 9.594534382890243e-06, "loss": 0.0854, "step": 2730 }, { "epoch": 0.406950839150453, "grad_norm": 0.5781280398368835, "learning_rate": 9.593049160849548e-06, "loss": 0.084, "step": 2740 }, { "epoch": 0.40843606119114806, "grad_norm": 0.6772047281265259, "learning_rate": 9.591563938808852e-06, "loss": 0.0894, "step": 2750 }, { "epoch": 0.4099212832318432, "grad_norm": 1.1391324996948242, "learning_rate": 9.590078716768158e-06, "loss": 0.0632, "step": 2760 }, { "epoch": 0.41140650527253825, "grad_norm": 0.5008231997489929, "learning_rate": 9.588593494727463e-06, "loss": 0.0701, "step": 2770 }, { "epoch": 0.4128917273132333, "grad_norm": 1.8930658102035522, "learning_rate": 9.587108272686767e-06, "loss": 0.0873, "step": 2780 }, { "epoch": 0.41437694935392844, "grad_norm": 2.1659274101257324, "learning_rate": 9.585623050646072e-06, "loss": 0.0929, "step": 2790 }, { "epoch": 0.4158621713946235, "grad_norm": 0.8841612935066223, "learning_rate": 9.584137828605378e-06, "loss": 0.0866, "step": 2800 }, { "epoch": 0.41734739343531857, "grad_norm": 1.3454309701919556, "learning_rate": 9.582652606564682e-06, "loss": 0.083, "step": 2810 }, { "epoch": 0.41883261547601364, "grad_norm": 1.09120774269104, "learning_rate": 9.581167384523987e-06, "loss": 0.0937, "step": 2820 }, { "epoch": 0.42031783751670876, "grad_norm": 0.5597397089004517, "learning_rate": 9.579682162483293e-06, "loss": 0.0787, "step": 2830 }, { "epoch": 0.4218030595574038, "grad_norm": 0.9361597299575806, "learning_rate": 9.578196940442597e-06, "loss": 0.0831, "step": 2840 }, { "epoch": 0.4232882815980989, "grad_norm": 1.678627848625183, "learning_rate": 9.576711718401902e-06, "loss": 0.0799, "step": 2850 }, { "epoch": 0.424773503638794, "grad_norm": 1.2205970287322998, "learning_rate": 9.575226496361206e-06, "loss": 0.0781, "step": 2860 }, { "epoch": 0.4262587256794891, "grad_norm": 1.2254648208618164, "learning_rate": 9.573741274320512e-06, "loss": 0.0769, "step": 2870 }, { "epoch": 0.42774394772018415, "grad_norm": 0.9398304224014282, "learning_rate": 9.572256052279817e-06, "loss": 0.0796, "step": 2880 }, { "epoch": 0.42922916976087927, "grad_norm": 0.6901552081108093, "learning_rate": 9.570770830239121e-06, "loss": 0.087, "step": 2890 }, { "epoch": 0.43071439180157434, "grad_norm": 1.507688045501709, "learning_rate": 9.569285608198427e-06, "loss": 0.0831, "step": 2900 }, { "epoch": 0.4321996138422694, "grad_norm": 0.6589367389678955, "learning_rate": 9.567800386157732e-06, "loss": 0.0951, "step": 2910 }, { "epoch": 0.4336848358829645, "grad_norm": 1.355723261833191, "learning_rate": 9.566315164117036e-06, "loss": 0.0674, "step": 2920 }, { "epoch": 0.4351700579236596, "grad_norm": 1.7150205373764038, "learning_rate": 9.564829942076342e-06, "loss": 0.0994, "step": 2930 }, { "epoch": 0.43665527996435466, "grad_norm": 1.8153376579284668, "learning_rate": 9.563344720035647e-06, "loss": 0.1008, "step": 2940 }, { "epoch": 0.4381405020050498, "grad_norm": 1.1676079034805298, "learning_rate": 9.561859497994951e-06, "loss": 0.0993, "step": 2950 }, { "epoch": 0.43962572404574485, "grad_norm": 0.7941600680351257, "learning_rate": 9.560374275954255e-06, "loss": 0.0837, "step": 2960 }, { "epoch": 0.4411109460864399, "grad_norm": 0.6376268863677979, "learning_rate": 9.55888905391356e-06, "loss": 0.0834, "step": 2970 }, { "epoch": 0.442596168127135, "grad_norm": 0.8009418845176697, "learning_rate": 9.557403831872866e-06, "loss": 0.076, "step": 2980 }, { "epoch": 0.4440813901678301, "grad_norm": 0.4576304256916046, "learning_rate": 9.55591860983217e-06, "loss": 0.0717, "step": 2990 }, { "epoch": 0.44556661220852517, "grad_norm": 1.8865007162094116, "learning_rate": 9.554433387791475e-06, "loss": 0.0771, "step": 3000 }, { "epoch": 0.44705183424922024, "grad_norm": 1.5658719539642334, "learning_rate": 9.552948165750781e-06, "loss": 0.0944, "step": 3010 }, { "epoch": 0.44853705628991536, "grad_norm": 0.936982274055481, "learning_rate": 9.551462943710085e-06, "loss": 0.0847, "step": 3020 }, { "epoch": 0.4500222783306104, "grad_norm": 0.6622723340988159, "learning_rate": 9.54997772166939e-06, "loss": 0.0967, "step": 3030 }, { "epoch": 0.4515075003713055, "grad_norm": 1.0170260667800903, "learning_rate": 9.548492499628696e-06, "loss": 0.0794, "step": 3040 }, { "epoch": 0.4529927224120006, "grad_norm": 0.8602062463760376, "learning_rate": 9.547007277588e-06, "loss": 0.0819, "step": 3050 }, { "epoch": 0.4544779444526957, "grad_norm": 0.7832088470458984, "learning_rate": 9.545522055547305e-06, "loss": 0.0935, "step": 3060 }, { "epoch": 0.45596316649339075, "grad_norm": 1.4716272354125977, "learning_rate": 9.544036833506611e-06, "loss": 0.0802, "step": 3070 }, { "epoch": 0.45744838853408587, "grad_norm": 0.9476587772369385, "learning_rate": 9.542551611465914e-06, "loss": 0.104, "step": 3080 }, { "epoch": 0.45893361057478094, "grad_norm": 0.751997172832489, "learning_rate": 9.54106638942522e-06, "loss": 0.092, "step": 3090 }, { "epoch": 0.460418832615476, "grad_norm": 0.611443817615509, "learning_rate": 9.539581167384526e-06, "loss": 0.0791, "step": 3100 }, { "epoch": 0.46190405465617107, "grad_norm": 1.4013361930847168, "learning_rate": 9.538095945343829e-06, "loss": 0.0855, "step": 3110 }, { "epoch": 0.4633892766968662, "grad_norm": 1.0885292291641235, "learning_rate": 9.536610723303135e-06, "loss": 0.0968, "step": 3120 }, { "epoch": 0.46487449873756126, "grad_norm": 0.7723345160484314, "learning_rate": 9.53512550126244e-06, "loss": 0.0922, "step": 3130 }, { "epoch": 0.4663597207782563, "grad_norm": 1.0908517837524414, "learning_rate": 9.533640279221744e-06, "loss": 0.0898, "step": 3140 }, { "epoch": 0.46784494281895145, "grad_norm": 0.9453380107879639, "learning_rate": 9.53215505718105e-06, "loss": 0.0589, "step": 3150 }, { "epoch": 0.4693301648596465, "grad_norm": 1.3652657270431519, "learning_rate": 9.530669835140354e-06, "loss": 0.0787, "step": 3160 }, { "epoch": 0.4708153869003416, "grad_norm": 1.1908072233200073, "learning_rate": 9.529184613099659e-06, "loss": 0.0658, "step": 3170 }, { "epoch": 0.4723006089410367, "grad_norm": 0.8739597201347351, "learning_rate": 9.527699391058965e-06, "loss": 0.0945, "step": 3180 }, { "epoch": 0.47378583098173177, "grad_norm": 0.9348416328430176, "learning_rate": 9.52621416901827e-06, "loss": 0.1154, "step": 3190 }, { "epoch": 0.47527105302242684, "grad_norm": 1.416810154914856, "learning_rate": 9.524728946977574e-06, "loss": 0.0995, "step": 3200 }, { "epoch": 0.47675627506312196, "grad_norm": 0.7512962818145752, "learning_rate": 9.52324372493688e-06, "loss": 0.0902, "step": 3210 }, { "epoch": 0.478241497103817, "grad_norm": 1.0729320049285889, "learning_rate": 9.521758502896184e-06, "loss": 0.1031, "step": 3220 }, { "epoch": 0.4797267191445121, "grad_norm": 0.9648675322532654, "learning_rate": 9.520273280855489e-06, "loss": 0.0823, "step": 3230 }, { "epoch": 0.4812119411852072, "grad_norm": 1.019823670387268, "learning_rate": 9.518788058814795e-06, "loss": 0.0827, "step": 3240 }, { "epoch": 0.4826971632259023, "grad_norm": 0.8790899515151978, "learning_rate": 9.517302836774098e-06, "loss": 0.0521, "step": 3250 }, { "epoch": 0.48418238526659735, "grad_norm": 1.2139712572097778, "learning_rate": 9.515817614733404e-06, "loss": 0.0806, "step": 3260 }, { "epoch": 0.4856676073072924, "grad_norm": 2.080587148666382, "learning_rate": 9.514332392692708e-06, "loss": 0.1032, "step": 3270 }, { "epoch": 0.48715282934798754, "grad_norm": 1.7146954536437988, "learning_rate": 9.512847170652013e-06, "loss": 0.0949, "step": 3280 }, { "epoch": 0.4886380513886826, "grad_norm": 0.7744117379188538, "learning_rate": 9.511361948611319e-06, "loss": 0.0848, "step": 3290 }, { "epoch": 0.49012327342937767, "grad_norm": 1.0753897428512573, "learning_rate": 9.509876726570623e-06, "loss": 0.0549, "step": 3300 }, { "epoch": 0.4916084954700728, "grad_norm": 0.5599222779273987, "learning_rate": 9.508391504529928e-06, "loss": 0.0696, "step": 3310 }, { "epoch": 0.49309371751076786, "grad_norm": 1.2357051372528076, "learning_rate": 9.506906282489234e-06, "loss": 0.1067, "step": 3320 }, { "epoch": 0.4945789395514629, "grad_norm": 1.6787092685699463, "learning_rate": 9.505421060448538e-06, "loss": 0.08, "step": 3330 }, { "epoch": 0.49606416159215805, "grad_norm": 1.1719763278961182, "learning_rate": 9.503935838407843e-06, "loss": 0.0862, "step": 3340 }, { "epoch": 0.4975493836328531, "grad_norm": 0.7013140916824341, "learning_rate": 9.502450616367149e-06, "loss": 0.0837, "step": 3350 }, { "epoch": 0.4990346056735482, "grad_norm": 1.0084935426712036, "learning_rate": 9.500965394326453e-06, "loss": 0.0825, "step": 3360 }, { "epoch": 0.5005198277142433, "grad_norm": 0.7926396727561951, "learning_rate": 9.499480172285757e-06, "loss": 0.081, "step": 3370 }, { "epoch": 0.5020050497549383, "grad_norm": 0.9179475903511047, "learning_rate": 9.497994950245062e-06, "loss": 0.1042, "step": 3380 }, { "epoch": 0.5034902717956334, "grad_norm": 1.2699346542358398, "learning_rate": 9.496509728204368e-06, "loss": 0.0753, "step": 3390 }, { "epoch": 0.5049754938363286, "grad_norm": 1.430041790008545, "learning_rate": 9.495024506163672e-06, "loss": 0.0838, "step": 3400 }, { "epoch": 0.5064607158770236, "grad_norm": 0.9805778861045837, "learning_rate": 9.493539284122977e-06, "loss": 0.1068, "step": 3410 }, { "epoch": 0.5079459379177187, "grad_norm": 1.0353554487228394, "learning_rate": 9.492054062082281e-06, "loss": 0.0841, "step": 3420 }, { "epoch": 0.5094311599584138, "grad_norm": 0.7422654032707214, "learning_rate": 9.490568840041587e-06, "loss": 0.0889, "step": 3430 }, { "epoch": 0.5109163819991088, "grad_norm": 1.0225118398666382, "learning_rate": 9.489083618000892e-06, "loss": 0.1067, "step": 3440 }, { "epoch": 0.512401604039804, "grad_norm": 1.0774625539779663, "learning_rate": 9.487598395960196e-06, "loss": 0.0737, "step": 3450 }, { "epoch": 0.5138868260804991, "grad_norm": 1.3356751203536987, "learning_rate": 9.486113173919502e-06, "loss": 0.0927, "step": 3460 }, { "epoch": 0.5153720481211941, "grad_norm": 0.96246337890625, "learning_rate": 9.484627951878807e-06, "loss": 0.0977, "step": 3470 }, { "epoch": 0.5168572701618892, "grad_norm": 0.9139418005943298, "learning_rate": 9.483142729838111e-06, "loss": 0.0744, "step": 3480 }, { "epoch": 0.5183424922025843, "grad_norm": 1.0638551712036133, "learning_rate": 9.481657507797416e-06, "loss": 0.0709, "step": 3490 }, { "epoch": 0.5198277142432793, "grad_norm": 1.449756145477295, "learning_rate": 9.480172285756722e-06, "loss": 0.0563, "step": 3500 }, { "epoch": 0.5213129362839745, "grad_norm": 1.2065516710281372, "learning_rate": 9.478687063716026e-06, "loss": 0.0764, "step": 3510 }, { "epoch": 0.5227981583246696, "grad_norm": 0.9641933441162109, "learning_rate": 9.47720184167533e-06, "loss": 0.0422, "step": 3520 }, { "epoch": 0.5242833803653646, "grad_norm": 0.8296038508415222, "learning_rate": 9.475716619634637e-06, "loss": 0.0891, "step": 3530 }, { "epoch": 0.5257686024060597, "grad_norm": 1.7687201499938965, "learning_rate": 9.474231397593941e-06, "loss": 0.0801, "step": 3540 }, { "epoch": 0.5272538244467548, "grad_norm": 1.2099858522415161, "learning_rate": 9.472746175553246e-06, "loss": 0.0918, "step": 3550 }, { "epoch": 0.5287390464874498, "grad_norm": 1.1296342611312866, "learning_rate": 9.47126095351255e-06, "loss": 0.0749, "step": 3560 }, { "epoch": 0.530224268528145, "grad_norm": 0.9852902889251709, "learning_rate": 9.469775731471856e-06, "loss": 0.0759, "step": 3570 }, { "epoch": 0.5317094905688401, "grad_norm": 0.7065750956535339, "learning_rate": 9.46829050943116e-06, "loss": 0.0904, "step": 3580 }, { "epoch": 0.5331947126095351, "grad_norm": 0.8571629524230957, "learning_rate": 9.466805287390465e-06, "loss": 0.0968, "step": 3590 }, { "epoch": 0.5346799346502302, "grad_norm": 1.2015560865402222, "learning_rate": 9.46532006534977e-06, "loss": 0.083, "step": 3600 }, { "epoch": 0.5361651566909253, "grad_norm": 0.9257380962371826, "learning_rate": 9.463834843309076e-06, "loss": 0.0751, "step": 3610 }, { "epoch": 0.5376503787316204, "grad_norm": 0.8309530019760132, "learning_rate": 9.46234962126838e-06, "loss": 0.0798, "step": 3620 }, { "epoch": 0.5391356007723155, "grad_norm": 0.9568300247192383, "learning_rate": 9.460864399227685e-06, "loss": 0.0806, "step": 3630 }, { "epoch": 0.5406208228130105, "grad_norm": 1.2841296195983887, "learning_rate": 9.45937917718699e-06, "loss": 0.0785, "step": 3640 }, { "epoch": 0.5421060448537056, "grad_norm": 1.0622037649154663, "learning_rate": 9.457893955146295e-06, "loss": 0.0866, "step": 3650 }, { "epoch": 0.5435912668944007, "grad_norm": 1.3247896432876587, "learning_rate": 9.4564087331056e-06, "loss": 0.1107, "step": 3660 }, { "epoch": 0.5450764889350957, "grad_norm": 1.0009887218475342, "learning_rate": 9.454923511064906e-06, "loss": 0.0863, "step": 3670 }, { "epoch": 0.5465617109757909, "grad_norm": 0.9625425338745117, "learning_rate": 9.45343828902421e-06, "loss": 0.0836, "step": 3680 }, { "epoch": 0.548046933016486, "grad_norm": 0.9985396862030029, "learning_rate": 9.451953066983515e-06, "loss": 0.0939, "step": 3690 }, { "epoch": 0.549532155057181, "grad_norm": 0.8877044320106506, "learning_rate": 9.45046784494282e-06, "loss": 0.0937, "step": 3700 }, { "epoch": 0.5510173770978761, "grad_norm": 0.5919630527496338, "learning_rate": 9.448982622902123e-06, "loss": 0.0698, "step": 3710 }, { "epoch": 0.5525025991385712, "grad_norm": 1.2577953338623047, "learning_rate": 9.44749740086143e-06, "loss": 0.0818, "step": 3720 }, { "epoch": 0.5539878211792663, "grad_norm": 0.9300584197044373, "learning_rate": 9.446012178820734e-06, "loss": 0.0927, "step": 3730 }, { "epoch": 0.5554730432199614, "grad_norm": 0.8681656122207642, "learning_rate": 9.444526956780038e-06, "loss": 0.1092, "step": 3740 }, { "epoch": 0.5569582652606565, "grad_norm": 1.0126725435256958, "learning_rate": 9.443041734739345e-06, "loss": 0.1075, "step": 3750 }, { "epoch": 0.5584434873013515, "grad_norm": 0.7265552282333374, "learning_rate": 9.441556512698649e-06, "loss": 0.0793, "step": 3760 }, { "epoch": 0.5599287093420466, "grad_norm": 1.078549861907959, "learning_rate": 9.440071290657953e-06, "loss": 0.1038, "step": 3770 }, { "epoch": 0.5614139313827418, "grad_norm": 0.6347576379776001, "learning_rate": 9.43858606861726e-06, "loss": 0.0883, "step": 3780 }, { "epoch": 0.5628991534234368, "grad_norm": 0.949237048625946, "learning_rate": 9.437100846576564e-06, "loss": 0.1049, "step": 3790 }, { "epoch": 0.5643843754641319, "grad_norm": 0.5857681035995483, "learning_rate": 9.435615624535868e-06, "loss": 0.0767, "step": 3800 }, { "epoch": 0.565869597504827, "grad_norm": 0.973402202129364, "learning_rate": 9.434130402495175e-06, "loss": 0.0967, "step": 3810 }, { "epoch": 0.567354819545522, "grad_norm": 0.8165373206138611, "learning_rate": 9.432645180454479e-06, "loss": 0.0912, "step": 3820 }, { "epoch": 0.5688400415862171, "grad_norm": 1.0625662803649902, "learning_rate": 9.431159958413783e-06, "loss": 0.0714, "step": 3830 }, { "epoch": 0.5703252636269123, "grad_norm": 0.6603330373764038, "learning_rate": 9.42967473637309e-06, "loss": 0.0694, "step": 3840 }, { "epoch": 0.5718104856676073, "grad_norm": 1.0427000522613525, "learning_rate": 9.428189514332392e-06, "loss": 0.0961, "step": 3850 }, { "epoch": 0.5732957077083024, "grad_norm": 1.2957814931869507, "learning_rate": 9.426704292291698e-06, "loss": 0.0879, "step": 3860 }, { "epoch": 0.5747809297489975, "grad_norm": 0.7232264280319214, "learning_rate": 9.425219070251005e-06, "loss": 0.067, "step": 3870 }, { "epoch": 0.5762661517896925, "grad_norm": 1.0114610195159912, "learning_rate": 9.423733848210307e-06, "loss": 0.1031, "step": 3880 }, { "epoch": 0.5777513738303877, "grad_norm": 1.2267494201660156, "learning_rate": 9.422248626169613e-06, "loss": 0.0863, "step": 3890 }, { "epoch": 0.5792365958710828, "grad_norm": 0.9242755174636841, "learning_rate": 9.420763404128918e-06, "loss": 0.0846, "step": 3900 }, { "epoch": 0.5807218179117778, "grad_norm": 0.7261422872543335, "learning_rate": 9.419278182088222e-06, "loss": 0.0708, "step": 3910 }, { "epoch": 0.5822070399524729, "grad_norm": 0.7982576489448547, "learning_rate": 9.417792960047528e-06, "loss": 0.0981, "step": 3920 }, { "epoch": 0.5836922619931679, "grad_norm": 1.1539570093154907, "learning_rate": 9.416307738006833e-06, "loss": 0.084, "step": 3930 }, { "epoch": 0.585177484033863, "grad_norm": 0.8333094716072083, "learning_rate": 9.414822515966137e-06, "loss": 0.0766, "step": 3940 }, { "epoch": 0.5866627060745582, "grad_norm": 0.9388213753700256, "learning_rate": 9.413337293925443e-06, "loss": 0.0733, "step": 3950 }, { "epoch": 0.5881479281152532, "grad_norm": 0.8660235404968262, "learning_rate": 9.411852071884748e-06, "loss": 0.091, "step": 3960 }, { "epoch": 0.5896331501559483, "grad_norm": 1.156136155128479, "learning_rate": 9.410366849844052e-06, "loss": 0.0848, "step": 3970 }, { "epoch": 0.5911183721966434, "grad_norm": 1.7612046003341675, "learning_rate": 9.408881627803358e-06, "loss": 0.0893, "step": 3980 }, { "epoch": 0.5926035942373384, "grad_norm": 0.9188507199287415, "learning_rate": 9.407396405762663e-06, "loss": 0.0916, "step": 3990 }, { "epoch": 0.5940888162780336, "grad_norm": 0.682065486907959, "learning_rate": 9.405911183721967e-06, "loss": 0.0862, "step": 4000 }, { "epoch": 0.5955740383187287, "grad_norm": 0.9579351544380188, "learning_rate": 9.404425961681272e-06, "loss": 0.0782, "step": 4010 }, { "epoch": 0.5970592603594237, "grad_norm": 0.5488564372062683, "learning_rate": 9.402940739640576e-06, "loss": 0.0868, "step": 4020 }, { "epoch": 0.5985444824001188, "grad_norm": 0.4922982156276703, "learning_rate": 9.401455517599882e-06, "loss": 0.079, "step": 4030 }, { "epoch": 0.6000297044408139, "grad_norm": 0.7463983297348022, "learning_rate": 9.399970295559187e-06, "loss": 0.0917, "step": 4040 }, { "epoch": 0.601514926481509, "grad_norm": 0.34033700823783875, "learning_rate": 9.398485073518491e-06, "loss": 0.069, "step": 4050 }, { "epoch": 0.6030001485222041, "grad_norm": 0.8693416118621826, "learning_rate": 9.396999851477797e-06, "loss": 0.0917, "step": 4060 }, { "epoch": 0.6044853705628992, "grad_norm": 0.6260632276535034, "learning_rate": 9.395514629437102e-06, "loss": 0.0601, "step": 4070 }, { "epoch": 0.6059705926035942, "grad_norm": 1.0476924180984497, "learning_rate": 9.394029407396406e-06, "loss": 0.1001, "step": 4080 }, { "epoch": 0.6074558146442893, "grad_norm": 1.490868091583252, "learning_rate": 9.392544185355712e-06, "loss": 0.0955, "step": 4090 }, { "epoch": 0.6089410366849844, "grad_norm": 0.8268522620201111, "learning_rate": 9.391058963315017e-06, "loss": 0.0735, "step": 4100 }, { "epoch": 0.6104262587256795, "grad_norm": 0.732266366481781, "learning_rate": 9.389573741274321e-06, "loss": 0.0884, "step": 4110 }, { "epoch": 0.6119114807663746, "grad_norm": 1.1488885879516602, "learning_rate": 9.388088519233626e-06, "loss": 0.0823, "step": 4120 }, { "epoch": 0.6133967028070697, "grad_norm": 1.073241114616394, "learning_rate": 9.386603297192932e-06, "loss": 0.0813, "step": 4130 }, { "epoch": 0.6148819248477647, "grad_norm": 0.7972573041915894, "learning_rate": 9.385118075152236e-06, "loss": 0.0698, "step": 4140 }, { "epoch": 0.6163671468884598, "grad_norm": 1.5724502801895142, "learning_rate": 9.38363285311154e-06, "loss": 0.0873, "step": 4150 }, { "epoch": 0.617852368929155, "grad_norm": 0.7835360169410706, "learning_rate": 9.382147631070847e-06, "loss": 0.0609, "step": 4160 }, { "epoch": 0.61933759096985, "grad_norm": 1.0752160549163818, "learning_rate": 9.380662409030151e-06, "loss": 0.0866, "step": 4170 }, { "epoch": 0.6208228130105451, "grad_norm": 1.1483020782470703, "learning_rate": 9.379177186989456e-06, "loss": 0.0835, "step": 4180 }, { "epoch": 0.6223080350512402, "grad_norm": 0.9119643568992615, "learning_rate": 9.37769196494876e-06, "loss": 0.0961, "step": 4190 }, { "epoch": 0.6237932570919352, "grad_norm": 0.8819010853767395, "learning_rate": 9.376206742908064e-06, "loss": 0.0834, "step": 4200 }, { "epoch": 0.6252784791326303, "grad_norm": 0.7143545746803284, "learning_rate": 9.37472152086737e-06, "loss": 0.0901, "step": 4210 }, { "epoch": 0.6267637011733254, "grad_norm": 0.913231372833252, "learning_rate": 9.373236298826675e-06, "loss": 0.074, "step": 4220 }, { "epoch": 0.6282489232140205, "grad_norm": 2.2083752155303955, "learning_rate": 9.37175107678598e-06, "loss": 0.1012, "step": 4230 }, { "epoch": 0.6297341452547156, "grad_norm": 1.3217167854309082, "learning_rate": 9.370265854745286e-06, "loss": 0.1131, "step": 4240 }, { "epoch": 0.6312193672954106, "grad_norm": 0.7895593643188477, "learning_rate": 9.36878063270459e-06, "loss": 0.0775, "step": 4250 }, { "epoch": 0.6327045893361057, "grad_norm": 0.6688397526741028, "learning_rate": 9.367295410663894e-06, "loss": 0.0823, "step": 4260 }, { "epoch": 0.6341898113768009, "grad_norm": 0.959603488445282, "learning_rate": 9.3658101886232e-06, "loss": 0.0641, "step": 4270 }, { "epoch": 0.6356750334174959, "grad_norm": 0.7541965842247009, "learning_rate": 9.364324966582505e-06, "loss": 0.0808, "step": 4280 }, { "epoch": 0.637160255458191, "grad_norm": 1.5787636041641235, "learning_rate": 9.36283974454181e-06, "loss": 0.0824, "step": 4290 }, { "epoch": 0.6386454774988861, "grad_norm": 1.1910892724990845, "learning_rate": 9.361354522501116e-06, "loss": 0.0828, "step": 4300 }, { "epoch": 0.6401306995395811, "grad_norm": 1.3543577194213867, "learning_rate": 9.359869300460418e-06, "loss": 0.0843, "step": 4310 }, { "epoch": 0.6416159215802762, "grad_norm": 0.8576405048370361, "learning_rate": 9.358384078419724e-06, "loss": 0.088, "step": 4320 }, { "epoch": 0.6431011436209714, "grad_norm": 0.8443158268928528, "learning_rate": 9.35689885637903e-06, "loss": 0.0838, "step": 4330 }, { "epoch": 0.6445863656616664, "grad_norm": 0.6524272561073303, "learning_rate": 9.355413634338333e-06, "loss": 0.11, "step": 4340 }, { "epoch": 0.6460715877023615, "grad_norm": 1.3981050252914429, "learning_rate": 9.35392841229764e-06, "loss": 0.0648, "step": 4350 }, { "epoch": 0.6475568097430566, "grad_norm": 0.7724353671073914, "learning_rate": 9.352443190256944e-06, "loss": 0.1112, "step": 4360 }, { "epoch": 0.6490420317837516, "grad_norm": 0.9034995436668396, "learning_rate": 9.350957968216248e-06, "loss": 0.0696, "step": 4370 }, { "epoch": 0.6505272538244468, "grad_norm": 0.7171207666397095, "learning_rate": 9.349472746175554e-06, "loss": 0.0691, "step": 4380 }, { "epoch": 0.6520124758651419, "grad_norm": 1.1618750095367432, "learning_rate": 9.347987524134859e-06, "loss": 0.1043, "step": 4390 }, { "epoch": 0.6534976979058369, "grad_norm": 1.0285515785217285, "learning_rate": 9.346502302094163e-06, "loss": 0.0683, "step": 4400 }, { "epoch": 0.654982919946532, "grad_norm": 0.7944039702415466, "learning_rate": 9.34501708005347e-06, "loss": 0.0727, "step": 4410 }, { "epoch": 0.6564681419872271, "grad_norm": 1.1061533689498901, "learning_rate": 9.343531858012774e-06, "loss": 0.074, "step": 4420 }, { "epoch": 0.6579533640279221, "grad_norm": 0.9832868576049805, "learning_rate": 9.342046635972078e-06, "loss": 0.0649, "step": 4430 }, { "epoch": 0.6594385860686173, "grad_norm": 0.5685659050941467, "learning_rate": 9.340561413931384e-06, "loss": 0.0808, "step": 4440 }, { "epoch": 0.6609238081093124, "grad_norm": 1.069838523864746, "learning_rate": 9.339076191890689e-06, "loss": 0.0832, "step": 4450 }, { "epoch": 0.6624090301500074, "grad_norm": 1.2676031589508057, "learning_rate": 9.337590969849993e-06, "loss": 0.0941, "step": 4460 }, { "epoch": 0.6638942521907025, "grad_norm": 1.0843100547790527, "learning_rate": 9.3361057478093e-06, "loss": 0.0984, "step": 4470 }, { "epoch": 0.6653794742313975, "grad_norm": 0.958575963973999, "learning_rate": 9.334620525768602e-06, "loss": 0.0962, "step": 4480 }, { "epoch": 0.6668646962720927, "grad_norm": 0.9856349229812622, "learning_rate": 9.333135303727908e-06, "loss": 0.1015, "step": 4490 }, { "epoch": 0.6683499183127878, "grad_norm": 0.6050782203674316, "learning_rate": 9.331650081687214e-06, "loss": 0.0652, "step": 4500 }, { "epoch": 0.6698351403534828, "grad_norm": 0.7362445592880249, "learning_rate": 9.330164859646517e-06, "loss": 0.0692, "step": 4510 }, { "epoch": 0.6713203623941779, "grad_norm": 0.9347787499427795, "learning_rate": 9.328679637605823e-06, "loss": 0.0876, "step": 4520 }, { "epoch": 0.672805584434873, "grad_norm": 1.99434494972229, "learning_rate": 9.327194415565128e-06, "loss": 0.0868, "step": 4530 }, { "epoch": 0.674290806475568, "grad_norm": 0.7971028685569763, "learning_rate": 9.325709193524432e-06, "loss": 0.0735, "step": 4540 }, { "epoch": 0.6757760285162632, "grad_norm": 0.8102898001670837, "learning_rate": 9.324223971483738e-06, "loss": 0.0713, "step": 4550 }, { "epoch": 0.6772612505569583, "grad_norm": 0.6246395707130432, "learning_rate": 9.322738749443043e-06, "loss": 0.0761, "step": 4560 }, { "epoch": 0.6787464725976533, "grad_norm": 0.6454740166664124, "learning_rate": 9.321253527402347e-06, "loss": 0.0869, "step": 4570 }, { "epoch": 0.6802316946383484, "grad_norm": 0.6368558406829834, "learning_rate": 9.319768305361653e-06, "loss": 0.082, "step": 4580 }, { "epoch": 0.6817169166790435, "grad_norm": 0.5236619710922241, "learning_rate": 9.318283083320958e-06, "loss": 0.0726, "step": 4590 }, { "epoch": 0.6832021387197386, "grad_norm": 2.013545036315918, "learning_rate": 9.316797861280262e-06, "loss": 0.098, "step": 4600 }, { "epoch": 0.6846873607604337, "grad_norm": 1.2313984632492065, "learning_rate": 9.315312639239567e-06, "loss": 0.0712, "step": 4610 }, { "epoch": 0.6861725828011288, "grad_norm": 0.9200993776321411, "learning_rate": 9.313827417198873e-06, "loss": 0.0858, "step": 4620 }, { "epoch": 0.6876578048418238, "grad_norm": 0.725279688835144, "learning_rate": 9.312342195158177e-06, "loss": 0.0775, "step": 4630 }, { "epoch": 0.6891430268825189, "grad_norm": 1.057055115699768, "learning_rate": 9.310856973117482e-06, "loss": 0.0921, "step": 4640 }, { "epoch": 0.6906282489232141, "grad_norm": 0.567707896232605, "learning_rate": 9.309371751076786e-06, "loss": 0.0942, "step": 4650 }, { "epoch": 0.6921134709639091, "grad_norm": 1.4313199520111084, "learning_rate": 9.307886529036092e-06, "loss": 0.0752, "step": 4660 }, { "epoch": 0.6935986930046042, "grad_norm": 0.7691679000854492, "learning_rate": 9.306401306995397e-06, "loss": 0.0784, "step": 4670 }, { "epoch": 0.6950839150452993, "grad_norm": 1.1163687705993652, "learning_rate": 9.304916084954701e-06, "loss": 0.0698, "step": 4680 }, { "epoch": 0.6965691370859943, "grad_norm": 0.7844257950782776, "learning_rate": 9.303430862914007e-06, "loss": 0.0697, "step": 4690 }, { "epoch": 0.6980543591266894, "grad_norm": 1.8008625507354736, "learning_rate": 9.301945640873312e-06, "loss": 0.0866, "step": 4700 }, { "epoch": 0.6995395811673846, "grad_norm": 1.1329940557479858, "learning_rate": 9.300460418832616e-06, "loss": 0.0722, "step": 4710 }, { "epoch": 0.7010248032080796, "grad_norm": 1.0240144729614258, "learning_rate": 9.29897519679192e-06, "loss": 0.0707, "step": 4720 }, { "epoch": 0.7025100252487747, "grad_norm": 1.1364368200302124, "learning_rate": 9.297489974751227e-06, "loss": 0.0837, "step": 4730 }, { "epoch": 0.7039952472894698, "grad_norm": 0.9570313096046448, "learning_rate": 9.296004752710531e-06, "loss": 0.0711, "step": 4740 }, { "epoch": 0.7054804693301648, "grad_norm": 1.208341360092163, "learning_rate": 9.294519530669835e-06, "loss": 0.0762, "step": 4750 }, { "epoch": 0.70696569137086, "grad_norm": 0.8793076276779175, "learning_rate": 9.293034308629142e-06, "loss": 0.0722, "step": 4760 }, { "epoch": 0.708450913411555, "grad_norm": 1.1761753559112549, "learning_rate": 9.291549086588446e-06, "loss": 0.0874, "step": 4770 }, { "epoch": 0.7099361354522501, "grad_norm": 1.384575605392456, "learning_rate": 9.29006386454775e-06, "loss": 0.0855, "step": 4780 }, { "epoch": 0.7114213574929452, "grad_norm": 1.1813658475875854, "learning_rate": 9.288578642507056e-06, "loss": 0.0775, "step": 4790 }, { "epoch": 0.7129065795336402, "grad_norm": 0.9031196236610413, "learning_rate": 9.287093420466361e-06, "loss": 0.0854, "step": 4800 }, { "epoch": 0.7143918015743354, "grad_norm": 1.7310291528701782, "learning_rate": 9.285608198425665e-06, "loss": 0.0893, "step": 4810 }, { "epoch": 0.7158770236150305, "grad_norm": 0.6749886870384216, "learning_rate": 9.28412297638497e-06, "loss": 0.0928, "step": 4820 }, { "epoch": 0.7173622456557255, "grad_norm": 1.170947790145874, "learning_rate": 9.282637754344274e-06, "loss": 0.0974, "step": 4830 }, { "epoch": 0.7188474676964206, "grad_norm": 0.5311076045036316, "learning_rate": 9.28115253230358e-06, "loss": 0.0891, "step": 4840 }, { "epoch": 0.7203326897371157, "grad_norm": 0.5467868447303772, "learning_rate": 9.279667310262885e-06, "loss": 0.0788, "step": 4850 }, { "epoch": 0.7218179117778107, "grad_norm": 1.394687294960022, "learning_rate": 9.27818208822219e-06, "loss": 0.0939, "step": 4860 }, { "epoch": 0.7233031338185059, "grad_norm": 1.7100565433502197, "learning_rate": 9.276696866181495e-06, "loss": 0.0985, "step": 4870 }, { "epoch": 0.724788355859201, "grad_norm": 1.3609228134155273, "learning_rate": 9.2752116441408e-06, "loss": 0.0685, "step": 4880 }, { "epoch": 0.726273577899896, "grad_norm": 0.7217347621917725, "learning_rate": 9.273726422100104e-06, "loss": 0.0702, "step": 4890 }, { "epoch": 0.7277587999405911, "grad_norm": 1.1523468494415283, "learning_rate": 9.27224120005941e-06, "loss": 0.1019, "step": 4900 }, { "epoch": 0.7292440219812862, "grad_norm": 1.0476207733154297, "learning_rate": 9.270755978018715e-06, "loss": 0.0729, "step": 4910 }, { "epoch": 0.7307292440219813, "grad_norm": 1.555390477180481, "learning_rate": 9.26927075597802e-06, "loss": 0.0845, "step": 4920 }, { "epoch": 0.7322144660626764, "grad_norm": 1.0015746355056763, "learning_rate": 9.267785533937325e-06, "loss": 0.079, "step": 4930 }, { "epoch": 0.7336996881033715, "grad_norm": 0.8952016830444336, "learning_rate": 9.266300311896628e-06, "loss": 0.0992, "step": 4940 }, { "epoch": 0.7351849101440665, "grad_norm": 0.7995119690895081, "learning_rate": 9.264815089855934e-06, "loss": 0.0843, "step": 4950 }, { "epoch": 0.7366701321847616, "grad_norm": 0.6183965802192688, "learning_rate": 9.263329867815239e-06, "loss": 0.0838, "step": 4960 }, { "epoch": 0.7381553542254568, "grad_norm": 1.539196252822876, "learning_rate": 9.261844645774543e-06, "loss": 0.0888, "step": 4970 }, { "epoch": 0.7396405762661518, "grad_norm": 1.1313135623931885, "learning_rate": 9.26035942373385e-06, "loss": 0.0899, "step": 4980 }, { "epoch": 0.7411257983068469, "grad_norm": 1.304632306098938, "learning_rate": 9.258874201693154e-06, "loss": 0.0799, "step": 4990 }, { "epoch": 0.742611020347542, "grad_norm": 0.8942739963531494, "learning_rate": 9.257388979652458e-06, "loss": 0.0756, "step": 5000 }, { "epoch": 0.744096242388237, "grad_norm": 1.0307176113128662, "learning_rate": 9.255903757611764e-06, "loss": 0.0953, "step": 5010 }, { "epoch": 0.7455814644289321, "grad_norm": 0.9079128503799438, "learning_rate": 9.254418535571069e-06, "loss": 0.0883, "step": 5020 }, { "epoch": 0.7470666864696273, "grad_norm": 0.6662859916687012, "learning_rate": 9.252933313530373e-06, "loss": 0.0635, "step": 5030 }, { "epoch": 0.7485519085103223, "grad_norm": 0.5575108528137207, "learning_rate": 9.25144809148968e-06, "loss": 0.0763, "step": 5040 }, { "epoch": 0.7500371305510174, "grad_norm": 1.2261484861373901, "learning_rate": 9.249962869448984e-06, "loss": 0.079, "step": 5050 }, { "epoch": 0.7515223525917124, "grad_norm": 0.442999005317688, "learning_rate": 9.248477647408288e-06, "loss": 0.0529, "step": 5060 }, { "epoch": 0.7530075746324075, "grad_norm": 1.6518497467041016, "learning_rate": 9.246992425367594e-06, "loss": 0.0973, "step": 5070 }, { "epoch": 0.7544927966731027, "grad_norm": 0.7751675844192505, "learning_rate": 9.245507203326899e-06, "loss": 0.0614, "step": 5080 }, { "epoch": 0.7559780187137977, "grad_norm": 0.7261990904808044, "learning_rate": 9.244021981286203e-06, "loss": 0.0768, "step": 5090 }, { "epoch": 0.7574632407544928, "grad_norm": 0.9539375901222229, "learning_rate": 9.24253675924551e-06, "loss": 0.0946, "step": 5100 }, { "epoch": 0.7589484627951879, "grad_norm": 0.43877437710762024, "learning_rate": 9.241051537204812e-06, "loss": 0.0586, "step": 5110 }, { "epoch": 0.7604336848358829, "grad_norm": 1.3418385982513428, "learning_rate": 9.239566315164118e-06, "loss": 0.1075, "step": 5120 }, { "epoch": 0.761918906876578, "grad_norm": 1.0130970478057861, "learning_rate": 9.238081093123422e-06, "loss": 0.0606, "step": 5130 }, { "epoch": 0.7634041289172732, "grad_norm": 1.10467529296875, "learning_rate": 9.236595871082727e-06, "loss": 0.0827, "step": 5140 }, { "epoch": 0.7648893509579682, "grad_norm": 1.0823795795440674, "learning_rate": 9.235110649042033e-06, "loss": 0.0758, "step": 5150 }, { "epoch": 0.7663745729986633, "grad_norm": 0.8557460904121399, "learning_rate": 9.233625427001337e-06, "loss": 0.0875, "step": 5160 }, { "epoch": 0.7678597950393584, "grad_norm": 0.6835376620292664, "learning_rate": 9.232140204960642e-06, "loss": 0.0777, "step": 5170 }, { "epoch": 0.7693450170800534, "grad_norm": 0.653923749923706, "learning_rate": 9.230654982919948e-06, "loss": 0.0615, "step": 5180 }, { "epoch": 0.7708302391207486, "grad_norm": 0.7122613787651062, "learning_rate": 9.229169760879252e-06, "loss": 0.0759, "step": 5190 }, { "epoch": 0.7723154611614437, "grad_norm": 1.2723838090896606, "learning_rate": 9.227684538838557e-06, "loss": 0.1033, "step": 5200 }, { "epoch": 0.7738006832021387, "grad_norm": 0.43908363580703735, "learning_rate": 9.226199316797863e-06, "loss": 0.06, "step": 5210 }, { "epoch": 0.7752859052428338, "grad_norm": 1.225393295288086, "learning_rate": 9.224714094757167e-06, "loss": 0.069, "step": 5220 }, { "epoch": 0.7767711272835289, "grad_norm": 0.8042909502983093, "learning_rate": 9.223228872716472e-06, "loss": 0.0789, "step": 5230 }, { "epoch": 0.7782563493242239, "grad_norm": 0.4831196665763855, "learning_rate": 9.221743650675776e-06, "loss": 0.0792, "step": 5240 }, { "epoch": 0.7797415713649191, "grad_norm": 1.2344239950180054, "learning_rate": 9.22025842863508e-06, "loss": 0.0726, "step": 5250 }, { "epoch": 0.7812267934056142, "grad_norm": 0.7011733651161194, "learning_rate": 9.218773206594387e-06, "loss": 0.0883, "step": 5260 }, { "epoch": 0.7827120154463092, "grad_norm": 0.8087848424911499, "learning_rate": 9.217287984553691e-06, "loss": 0.0719, "step": 5270 }, { "epoch": 0.7841972374870043, "grad_norm": 0.5542543530464172, "learning_rate": 9.215802762512996e-06, "loss": 0.0736, "step": 5280 }, { "epoch": 0.7856824595276994, "grad_norm": 1.5584278106689453, "learning_rate": 9.214317540472302e-06, "loss": 0.0762, "step": 5290 }, { "epoch": 0.7871676815683945, "grad_norm": 0.9587001204490662, "learning_rate": 9.212832318431606e-06, "loss": 0.079, "step": 5300 }, { "epoch": 0.7886529036090896, "grad_norm": 1.045442819595337, "learning_rate": 9.21134709639091e-06, "loss": 0.0693, "step": 5310 }, { "epoch": 0.7901381256497847, "grad_norm": 0.9065925478935242, "learning_rate": 9.209861874350217e-06, "loss": 0.0902, "step": 5320 }, { "epoch": 0.7916233476904797, "grad_norm": 1.3168227672576904, "learning_rate": 9.208376652309521e-06, "loss": 0.0935, "step": 5330 }, { "epoch": 0.7931085697311748, "grad_norm": 1.3936703205108643, "learning_rate": 9.206891430268826e-06, "loss": 0.0922, "step": 5340 }, { "epoch": 0.7945937917718698, "grad_norm": 1.0532509088516235, "learning_rate": 9.20540620822813e-06, "loss": 0.0876, "step": 5350 }, { "epoch": 0.796079013812565, "grad_norm": 0.8267485499382019, "learning_rate": 9.203920986187436e-06, "loss": 0.0803, "step": 5360 }, { "epoch": 0.7975642358532601, "grad_norm": 1.8309590816497803, "learning_rate": 9.20243576414674e-06, "loss": 0.0817, "step": 5370 }, { "epoch": 0.7990494578939551, "grad_norm": 0.9805804491043091, "learning_rate": 9.200950542106045e-06, "loss": 0.0701, "step": 5380 }, { "epoch": 0.8005346799346502, "grad_norm": 0.8274025321006775, "learning_rate": 9.199465320065351e-06, "loss": 0.068, "step": 5390 }, { "epoch": 0.8020199019753453, "grad_norm": 0.9127753376960754, "learning_rate": 9.197980098024656e-06, "loss": 0.0736, "step": 5400 }, { "epoch": 0.8035051240160404, "grad_norm": 0.9885143637657166, "learning_rate": 9.19649487598396e-06, "loss": 0.0669, "step": 5410 }, { "epoch": 0.8049903460567355, "grad_norm": 1.3231810331344604, "learning_rate": 9.195009653943265e-06, "loss": 0.075, "step": 5420 }, { "epoch": 0.8064755680974306, "grad_norm": 0.8889951705932617, "learning_rate": 9.19352443190257e-06, "loss": 0.0954, "step": 5430 }, { "epoch": 0.8079607901381256, "grad_norm": 0.4237781763076782, "learning_rate": 9.192039209861875e-06, "loss": 0.0456, "step": 5440 }, { "epoch": 0.8094460121788207, "grad_norm": 1.211202621459961, "learning_rate": 9.19055398782118e-06, "loss": 0.0769, "step": 5450 }, { "epoch": 0.8109312342195159, "grad_norm": 0.7493748664855957, "learning_rate": 9.189068765780484e-06, "loss": 0.0805, "step": 5460 }, { "epoch": 0.8124164562602109, "grad_norm": 0.38762542605400085, "learning_rate": 9.18758354373979e-06, "loss": 0.0562, "step": 5470 }, { "epoch": 0.813901678300906, "grad_norm": 1.0665141344070435, "learning_rate": 9.186098321699095e-06, "loss": 0.0936, "step": 5480 }, { "epoch": 0.8153869003416011, "grad_norm": 1.0364309549331665, "learning_rate": 9.184613099658399e-06, "loss": 0.0994, "step": 5490 }, { "epoch": 0.8168721223822961, "grad_norm": 1.3025943040847778, "learning_rate": 9.183127877617705e-06, "loss": 0.082, "step": 5500 }, { "epoch": 0.8183573444229912, "grad_norm": 0.35092493891716003, "learning_rate": 9.18164265557701e-06, "loss": 0.0562, "step": 5510 }, { "epoch": 0.8198425664636864, "grad_norm": 0.7516621351242065, "learning_rate": 9.180157433536314e-06, "loss": 0.0919, "step": 5520 }, { "epoch": 0.8213277885043814, "grad_norm": 1.3148735761642456, "learning_rate": 9.17867221149562e-06, "loss": 0.0795, "step": 5530 }, { "epoch": 0.8228130105450765, "grad_norm": 0.4802815616130829, "learning_rate": 9.177186989454925e-06, "loss": 0.0753, "step": 5540 }, { "epoch": 0.8242982325857716, "grad_norm": 0.9008163213729858, "learning_rate": 9.175701767414229e-06, "loss": 0.0959, "step": 5550 }, { "epoch": 0.8257834546264666, "grad_norm": 0.9163670539855957, "learning_rate": 9.174216545373535e-06, "loss": 0.0688, "step": 5560 }, { "epoch": 0.8272686766671618, "grad_norm": 0.994637131690979, "learning_rate": 9.172731323332838e-06, "loss": 0.0776, "step": 5570 }, { "epoch": 0.8287538987078569, "grad_norm": 0.8866167068481445, "learning_rate": 9.171246101292144e-06, "loss": 0.0845, "step": 5580 }, { "epoch": 0.8302391207485519, "grad_norm": 0.8306211829185486, "learning_rate": 9.169760879251448e-06, "loss": 0.0658, "step": 5590 }, { "epoch": 0.831724342789247, "grad_norm": 0.5935698747634888, "learning_rate": 9.168275657210753e-06, "loss": 0.0941, "step": 5600 }, { "epoch": 0.8332095648299421, "grad_norm": 1.102920651435852, "learning_rate": 9.166790435170059e-06, "loss": 0.0695, "step": 5610 }, { "epoch": 0.8346947868706371, "grad_norm": 1.0735467672348022, "learning_rate": 9.165305213129363e-06, "loss": 0.0748, "step": 5620 }, { "epoch": 0.8361800089113323, "grad_norm": 0.9008534550666809, "learning_rate": 9.163819991088668e-06, "loss": 0.094, "step": 5630 }, { "epoch": 0.8376652309520273, "grad_norm": 0.8445830941200256, "learning_rate": 9.162334769047974e-06, "loss": 0.0787, "step": 5640 }, { "epoch": 0.8391504529927224, "grad_norm": 0.711890697479248, "learning_rate": 9.160849547007278e-06, "loss": 0.0663, "step": 5650 }, { "epoch": 0.8406356750334175, "grad_norm": 1.391710877418518, "learning_rate": 9.159364324966583e-06, "loss": 0.0802, "step": 5660 }, { "epoch": 0.8421208970741125, "grad_norm": 0.9550698399543762, "learning_rate": 9.157879102925889e-06, "loss": 0.0661, "step": 5670 }, { "epoch": 0.8436061191148077, "grad_norm": 0.8969228267669678, "learning_rate": 9.156393880885193e-06, "loss": 0.0731, "step": 5680 }, { "epoch": 0.8450913411555028, "grad_norm": 1.3309462070465088, "learning_rate": 9.154908658844498e-06, "loss": 0.0781, "step": 5690 }, { "epoch": 0.8465765631961978, "grad_norm": 0.9650948643684387, "learning_rate": 9.153423436803804e-06, "loss": 0.1078, "step": 5700 }, { "epoch": 0.8480617852368929, "grad_norm": 1.1342687606811523, "learning_rate": 9.151938214763107e-06, "loss": 0.0752, "step": 5710 }, { "epoch": 0.849547007277588, "grad_norm": 0.8959386348724365, "learning_rate": 9.150452992722413e-06, "loss": 0.0816, "step": 5720 }, { "epoch": 0.851032229318283, "grad_norm": 0.9059763550758362, "learning_rate": 9.148967770681719e-06, "loss": 0.0939, "step": 5730 }, { "epoch": 0.8525174513589782, "grad_norm": 0.9173917174339294, "learning_rate": 9.147482548641022e-06, "loss": 0.081, "step": 5740 }, { "epoch": 0.8540026733996733, "grad_norm": 1.3770872354507446, "learning_rate": 9.145997326600328e-06, "loss": 0.0718, "step": 5750 }, { "epoch": 0.8554878954403683, "grad_norm": 0.6253504157066345, "learning_rate": 9.144512104559632e-06, "loss": 0.1021, "step": 5760 }, { "epoch": 0.8569731174810634, "grad_norm": 0.6649258136749268, "learning_rate": 9.143026882518937e-06, "loss": 0.0719, "step": 5770 }, { "epoch": 0.8584583395217585, "grad_norm": 1.2232184410095215, "learning_rate": 9.141541660478243e-06, "loss": 0.092, "step": 5780 }, { "epoch": 0.8599435615624536, "grad_norm": 1.0452312231063843, "learning_rate": 9.140056438437547e-06, "loss": 0.0703, "step": 5790 }, { "epoch": 0.8614287836031487, "grad_norm": 0.5171612501144409, "learning_rate": 9.138571216396852e-06, "loss": 0.0653, "step": 5800 }, { "epoch": 0.8629140056438438, "grad_norm": 1.0178922414779663, "learning_rate": 9.137085994356158e-06, "loss": 0.0834, "step": 5810 }, { "epoch": 0.8643992276845388, "grad_norm": 1.6475141048431396, "learning_rate": 9.135600772315462e-06, "loss": 0.0833, "step": 5820 }, { "epoch": 0.8658844497252339, "grad_norm": 1.547343373298645, "learning_rate": 9.134115550274767e-06, "loss": 0.0805, "step": 5830 }, { "epoch": 0.867369671765929, "grad_norm": 0.6915701031684875, "learning_rate": 9.132630328234073e-06, "loss": 0.0791, "step": 5840 }, { "epoch": 0.8688548938066241, "grad_norm": 1.050729513168335, "learning_rate": 9.131145106193377e-06, "loss": 0.066, "step": 5850 }, { "epoch": 0.8703401158473192, "grad_norm": 0.6713865995407104, "learning_rate": 9.129659884152682e-06, "loss": 0.0828, "step": 5860 }, { "epoch": 0.8718253378880143, "grad_norm": 1.028895378112793, "learning_rate": 9.128174662111986e-06, "loss": 0.096, "step": 5870 }, { "epoch": 0.8733105599287093, "grad_norm": 0.8316362500190735, "learning_rate": 9.12668944007129e-06, "loss": 0.0713, "step": 5880 }, { "epoch": 0.8747957819694044, "grad_norm": 1.0333486795425415, "learning_rate": 9.125204218030597e-06, "loss": 0.0715, "step": 5890 }, { "epoch": 0.8762810040100996, "grad_norm": 0.9072849750518799, "learning_rate": 9.123718995989901e-06, "loss": 0.0811, "step": 5900 }, { "epoch": 0.8777662260507946, "grad_norm": 0.565025269985199, "learning_rate": 9.122233773949206e-06, "loss": 0.0723, "step": 5910 }, { "epoch": 0.8792514480914897, "grad_norm": 0.9661470055580139, "learning_rate": 9.120748551908512e-06, "loss": 0.0705, "step": 5920 }, { "epoch": 0.8807366701321847, "grad_norm": 0.7308884263038635, "learning_rate": 9.119263329867816e-06, "loss": 0.0713, "step": 5930 }, { "epoch": 0.8822218921728798, "grad_norm": 1.2283244132995605, "learning_rate": 9.11777810782712e-06, "loss": 0.0845, "step": 5940 }, { "epoch": 0.883707114213575, "grad_norm": 0.7272588610649109, "learning_rate": 9.116292885786427e-06, "loss": 0.0751, "step": 5950 }, { "epoch": 0.88519233625427, "grad_norm": 0.6600573658943176, "learning_rate": 9.114807663745731e-06, "loss": 0.073, "step": 5960 }, { "epoch": 0.8866775582949651, "grad_norm": 0.5860625505447388, "learning_rate": 9.113322441705036e-06, "loss": 0.0865, "step": 5970 }, { "epoch": 0.8881627803356602, "grad_norm": 0.6887400150299072, "learning_rate": 9.11183721966434e-06, "loss": 0.0774, "step": 5980 }, { "epoch": 0.8896480023763552, "grad_norm": 1.1417862176895142, "learning_rate": 9.110351997623646e-06, "loss": 0.0644, "step": 5990 }, { "epoch": 0.8911332244170503, "grad_norm": 1.1381356716156006, "learning_rate": 9.10886677558295e-06, "loss": 0.0769, "step": 6000 }, { "epoch": 0.8926184464577455, "grad_norm": 1.168483018875122, "learning_rate": 9.107381553542255e-06, "loss": 0.0914, "step": 6010 }, { "epoch": 0.8941036684984405, "grad_norm": 0.7276975512504578, "learning_rate": 9.105896331501561e-06, "loss": 0.0807, "step": 6020 }, { "epoch": 0.8955888905391356, "grad_norm": 2.201474905014038, "learning_rate": 9.104411109460866e-06, "loss": 0.0853, "step": 6030 }, { "epoch": 0.8970741125798307, "grad_norm": 0.44225212931632996, "learning_rate": 9.10292588742017e-06, "loss": 0.068, "step": 6040 }, { "epoch": 0.8985593346205257, "grad_norm": 0.4434727132320404, "learning_rate": 9.101440665379474e-06, "loss": 0.0694, "step": 6050 }, { "epoch": 0.9000445566612209, "grad_norm": 0.5112833380699158, "learning_rate": 9.099955443338779e-06, "loss": 0.0783, "step": 6060 }, { "epoch": 0.901529778701916, "grad_norm": 2.047451972961426, "learning_rate": 9.098470221298085e-06, "loss": 0.0835, "step": 6070 }, { "epoch": 0.903015000742611, "grad_norm": 0.5268721580505371, "learning_rate": 9.09698499925739e-06, "loss": 0.0714, "step": 6080 }, { "epoch": 0.9045002227833061, "grad_norm": 1.2867953777313232, "learning_rate": 9.095499777216694e-06, "loss": 0.0864, "step": 6090 }, { "epoch": 0.9059854448240012, "grad_norm": 0.9601038098335266, "learning_rate": 9.094014555176e-06, "loss": 0.0961, "step": 6100 }, { "epoch": 0.9074706668646962, "grad_norm": 1.6264362335205078, "learning_rate": 9.092529333135304e-06, "loss": 0.0735, "step": 6110 }, { "epoch": 0.9089558889053914, "grad_norm": 0.8122678399085999, "learning_rate": 9.091044111094609e-06, "loss": 0.0902, "step": 6120 }, { "epoch": 0.9104411109460865, "grad_norm": 0.6474285125732422, "learning_rate": 9.089558889053915e-06, "loss": 0.0775, "step": 6130 }, { "epoch": 0.9119263329867815, "grad_norm": 0.8998765349388123, "learning_rate": 9.08807366701322e-06, "loss": 0.0968, "step": 6140 }, { "epoch": 0.9134115550274766, "grad_norm": 0.934053897857666, "learning_rate": 9.086588444972524e-06, "loss": 0.0772, "step": 6150 }, { "epoch": 0.9148967770681717, "grad_norm": 1.0640918016433716, "learning_rate": 9.08510322293183e-06, "loss": 0.0964, "step": 6160 }, { "epoch": 0.9163819991088668, "grad_norm": 0.7316854596138, "learning_rate": 9.083618000891133e-06, "loss": 0.0798, "step": 6170 }, { "epoch": 0.9178672211495619, "grad_norm": 1.0811574459075928, "learning_rate": 9.082132778850439e-06, "loss": 0.0741, "step": 6180 }, { "epoch": 0.919352443190257, "grad_norm": 0.9577917456626892, "learning_rate": 9.080647556809743e-06, "loss": 0.0882, "step": 6190 }, { "epoch": 0.920837665230952, "grad_norm": 1.3443588018417358, "learning_rate": 9.079162334769048e-06, "loss": 0.0722, "step": 6200 }, { "epoch": 0.9223228872716471, "grad_norm": 0.8847956657409668, "learning_rate": 9.077677112728354e-06, "loss": 0.0959, "step": 6210 }, { "epoch": 0.9238081093123421, "grad_norm": 1.149559736251831, "learning_rate": 9.076191890687658e-06, "loss": 0.0884, "step": 6220 }, { "epoch": 0.9252933313530373, "grad_norm": 1.0926103591918945, "learning_rate": 9.074706668646963e-06, "loss": 0.0812, "step": 6230 }, { "epoch": 0.9267785533937324, "grad_norm": 0.3773317337036133, "learning_rate": 9.073221446606269e-06, "loss": 0.0829, "step": 6240 }, { "epoch": 0.9282637754344274, "grad_norm": 0.872042179107666, "learning_rate": 9.071736224565573e-06, "loss": 0.0871, "step": 6250 }, { "epoch": 0.9297489974751225, "grad_norm": 0.8606436848640442, "learning_rate": 9.070251002524878e-06, "loss": 0.0694, "step": 6260 }, { "epoch": 0.9312342195158176, "grad_norm": 0.8578999638557434, "learning_rate": 9.068765780484184e-06, "loss": 0.0848, "step": 6270 }, { "epoch": 0.9327194415565127, "grad_norm": 0.8462734818458557, "learning_rate": 9.067280558443488e-06, "loss": 0.0765, "step": 6280 }, { "epoch": 0.9342046635972078, "grad_norm": 0.7472706437110901, "learning_rate": 9.065795336402793e-06, "loss": 0.1077, "step": 6290 }, { "epoch": 0.9356898856379029, "grad_norm": 0.6326286792755127, "learning_rate": 9.064310114362099e-06, "loss": 0.0686, "step": 6300 }, { "epoch": 0.9371751076785979, "grad_norm": 0.9380270838737488, "learning_rate": 9.062824892321403e-06, "loss": 0.0684, "step": 6310 }, { "epoch": 0.938660329719293, "grad_norm": 0.7247831225395203, "learning_rate": 9.061339670280708e-06, "loss": 0.0975, "step": 6320 }, { "epoch": 0.9401455517599882, "grad_norm": 0.8656736612319946, "learning_rate": 9.059854448240014e-06, "loss": 0.0647, "step": 6330 }, { "epoch": 0.9416307738006832, "grad_norm": 0.6879330277442932, "learning_rate": 9.058369226199317e-06, "loss": 0.0686, "step": 6340 }, { "epoch": 0.9431159958413783, "grad_norm": 1.0881211757659912, "learning_rate": 9.056884004158623e-06, "loss": 0.0892, "step": 6350 }, { "epoch": 0.9446012178820734, "grad_norm": 0.8151282668113708, "learning_rate": 9.055398782117927e-06, "loss": 0.0723, "step": 6360 }, { "epoch": 0.9460864399227684, "grad_norm": 0.6185230612754822, "learning_rate": 9.053913560077232e-06, "loss": 0.1003, "step": 6370 }, { "epoch": 0.9475716619634635, "grad_norm": 1.3565391302108765, "learning_rate": 9.052428338036538e-06, "loss": 0.0949, "step": 6380 }, { "epoch": 0.9490568840041587, "grad_norm": 0.659494936466217, "learning_rate": 9.050943115995842e-06, "loss": 0.0682, "step": 6390 }, { "epoch": 0.9505421060448537, "grad_norm": 0.8106864094734192, "learning_rate": 9.049457893955147e-06, "loss": 0.0706, "step": 6400 }, { "epoch": 0.9520273280855488, "grad_norm": 0.6366297006607056, "learning_rate": 9.047972671914453e-06, "loss": 0.0788, "step": 6410 }, { "epoch": 0.9535125501262439, "grad_norm": 1.2151988744735718, "learning_rate": 9.046487449873757e-06, "loss": 0.0686, "step": 6420 }, { "epoch": 0.9549977721669389, "grad_norm": 1.2140624523162842, "learning_rate": 9.045002227833061e-06, "loss": 0.0747, "step": 6430 }, { "epoch": 0.956482994207634, "grad_norm": 0.8682563304901123, "learning_rate": 9.043517005792368e-06, "loss": 0.0802, "step": 6440 }, { "epoch": 0.9579682162483292, "grad_norm": 1.3332148790359497, "learning_rate": 9.042031783751672e-06, "loss": 0.0852, "step": 6450 }, { "epoch": 0.9594534382890242, "grad_norm": 0.9207971096038818, "learning_rate": 9.040546561710976e-06, "loss": 0.0718, "step": 6460 }, { "epoch": 0.9609386603297193, "grad_norm": 0.8784974217414856, "learning_rate": 9.039061339670283e-06, "loss": 0.0819, "step": 6470 }, { "epoch": 0.9624238823704144, "grad_norm": 0.6948875784873962, "learning_rate": 9.037576117629585e-06, "loss": 0.0792, "step": 6480 }, { "epoch": 0.9639091044111094, "grad_norm": 0.8131117820739746, "learning_rate": 9.036090895588891e-06, "loss": 0.0798, "step": 6490 }, { "epoch": 0.9653943264518046, "grad_norm": 1.1777347326278687, "learning_rate": 9.034605673548196e-06, "loss": 0.0735, "step": 6500 }, { "epoch": 0.9668795484924996, "grad_norm": 0.9856818318367004, "learning_rate": 9.0331204515075e-06, "loss": 0.0768, "step": 6510 }, { "epoch": 0.9683647705331947, "grad_norm": 1.2949861288070679, "learning_rate": 9.031635229466806e-06, "loss": 0.0716, "step": 6520 }, { "epoch": 0.9698499925738898, "grad_norm": 0.6529346704483032, "learning_rate": 9.030150007426111e-06, "loss": 0.0784, "step": 6530 }, { "epoch": 0.9713352146145848, "grad_norm": 1.8309189081192017, "learning_rate": 9.028664785385415e-06, "loss": 0.0673, "step": 6540 }, { "epoch": 0.97282043665528, "grad_norm": 0.7805710434913635, "learning_rate": 9.027179563344721e-06, "loss": 0.0959, "step": 6550 }, { "epoch": 0.9743056586959751, "grad_norm": 0.4617927670478821, "learning_rate": 9.025694341304026e-06, "loss": 0.0785, "step": 6560 }, { "epoch": 0.9757908807366701, "grad_norm": 1.0437848567962646, "learning_rate": 9.02420911926333e-06, "loss": 0.0685, "step": 6570 }, { "epoch": 0.9772761027773652, "grad_norm": 0.7941612005233765, "learning_rate": 9.022723897222635e-06, "loss": 0.0838, "step": 6580 }, { "epoch": 0.9787613248180603, "grad_norm": 0.5064259171485901, "learning_rate": 9.021238675181941e-06, "loss": 0.0695, "step": 6590 }, { "epoch": 0.9802465468587553, "grad_norm": 0.9026811718940735, "learning_rate": 9.019753453141245e-06, "loss": 0.0648, "step": 6600 }, { "epoch": 0.9817317688994505, "grad_norm": 0.8138523101806641, "learning_rate": 9.01826823110055e-06, "loss": 0.0638, "step": 6610 }, { "epoch": 0.9832169909401456, "grad_norm": 0.5671774744987488, "learning_rate": 9.016783009059856e-06, "loss": 0.0544, "step": 6620 }, { "epoch": 0.9847022129808406, "grad_norm": 1.2543549537658691, "learning_rate": 9.01529778701916e-06, "loss": 0.0736, "step": 6630 }, { "epoch": 0.9861874350215357, "grad_norm": 0.5101591944694519, "learning_rate": 9.013812564978465e-06, "loss": 0.0779, "step": 6640 }, { "epoch": 0.9876726570622308, "grad_norm": 0.8760504126548767, "learning_rate": 9.01232734293777e-06, "loss": 0.0792, "step": 6650 }, { "epoch": 0.9891578791029259, "grad_norm": 1.0827103853225708, "learning_rate": 9.010842120897075e-06, "loss": 0.0853, "step": 6660 }, { "epoch": 0.990643101143621, "grad_norm": 0.6509794592857361, "learning_rate": 9.00935689885638e-06, "loss": 0.0632, "step": 6670 }, { "epoch": 0.9921283231843161, "grad_norm": 1.0729295015335083, "learning_rate": 9.007871676815684e-06, "loss": 0.082, "step": 6680 }, { "epoch": 0.9936135452250111, "grad_norm": 0.7828879356384277, "learning_rate": 9.006386454774989e-06, "loss": 0.0745, "step": 6690 }, { "epoch": 0.9950987672657062, "grad_norm": 0.7636898756027222, "learning_rate": 9.004901232734295e-06, "loss": 0.0851, "step": 6700 }, { "epoch": 0.9965839893064014, "grad_norm": 0.9901930689811707, "learning_rate": 9.0034160106936e-06, "loss": 0.0589, "step": 6710 }, { "epoch": 0.9980692113470964, "grad_norm": 1.4099174737930298, "learning_rate": 9.001930788652904e-06, "loss": 0.0877, "step": 6720 }, { "epoch": 0.9995544333877915, "grad_norm": 1.0965951681137085, "learning_rate": 9.00044556661221e-06, "loss": 0.0769, "step": 6730 }, { "epoch": 1.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.06574511528015137, "eval_runtime": 208.1528, "eval_samples_per_second": 182.65, "eval_steps_per_second": 5.712, "step": 6733 }, { "epoch": 1.0010396554284866, "grad_norm": 0.6327619552612305, "learning_rate": 8.998960344571514e-06, "loss": 0.0635, "step": 6740 }, { "epoch": 1.0025248774691817, "grad_norm": 0.9295978546142578, "learning_rate": 8.997475122530819e-06, "loss": 0.0849, "step": 6750 }, { "epoch": 1.0040100995098766, "grad_norm": 0.9286435842514038, "learning_rate": 8.995989900490125e-06, "loss": 0.0643, "step": 6760 }, { "epoch": 1.0054953215505718, "grad_norm": 1.0415282249450684, "learning_rate": 8.99450467844943e-06, "loss": 0.0922, "step": 6770 }, { "epoch": 1.0069805435912669, "grad_norm": 0.9151238799095154, "learning_rate": 8.993019456408734e-06, "loss": 0.0825, "step": 6780 }, { "epoch": 1.008465765631962, "grad_norm": 1.2567110061645508, "learning_rate": 8.99153423436804e-06, "loss": 0.0789, "step": 6790 }, { "epoch": 1.0099509876726571, "grad_norm": 0.7727917432785034, "learning_rate": 8.990049012327342e-06, "loss": 0.0759, "step": 6800 }, { "epoch": 1.0114362097133522, "grad_norm": 1.1036444902420044, "learning_rate": 8.988563790286649e-06, "loss": 0.0775, "step": 6810 }, { "epoch": 1.0129214317540471, "grad_norm": 0.6470946073532104, "learning_rate": 8.987078568245953e-06, "loss": 0.0671, "step": 6820 }, { "epoch": 1.0144066537947423, "grad_norm": 0.7344804406166077, "learning_rate": 8.985593346205257e-06, "loss": 0.0814, "step": 6830 }, { "epoch": 1.0158918758354374, "grad_norm": 0.8188076019287109, "learning_rate": 8.984108124164564e-06, "loss": 0.055, "step": 6840 }, { "epoch": 1.0173770978761325, "grad_norm": 1.11733078956604, "learning_rate": 8.982622902123868e-06, "loss": 0.0975, "step": 6850 }, { "epoch": 1.0188623199168276, "grad_norm": 0.87245774269104, "learning_rate": 8.981137680083172e-06, "loss": 0.0761, "step": 6860 }, { "epoch": 1.0203475419575228, "grad_norm": 1.0849484205245972, "learning_rate": 8.979652458042479e-06, "loss": 0.0813, "step": 6870 }, { "epoch": 1.0218327639982177, "grad_norm": 1.1357029676437378, "learning_rate": 8.978167236001783e-06, "loss": 0.0899, "step": 6880 }, { "epoch": 1.0233179860389128, "grad_norm": 1.1423171758651733, "learning_rate": 8.976682013961087e-06, "loss": 0.0981, "step": 6890 }, { "epoch": 1.024803208079608, "grad_norm": 0.6787160038948059, "learning_rate": 8.975196791920394e-06, "loss": 0.0804, "step": 6900 }, { "epoch": 1.026288430120303, "grad_norm": 0.884209394454956, "learning_rate": 8.973711569879698e-06, "loss": 0.0762, "step": 6910 }, { "epoch": 1.0277736521609981, "grad_norm": 0.9835346937179565, "learning_rate": 8.972226347839002e-06, "loss": 0.0772, "step": 6920 }, { "epoch": 1.0292588742016933, "grad_norm": 0.6878470778465271, "learning_rate": 8.970741125798309e-06, "loss": 0.061, "step": 6930 }, { "epoch": 1.0307440962423882, "grad_norm": 1.2484465837478638, "learning_rate": 8.969255903757611e-06, "loss": 0.0858, "step": 6940 }, { "epoch": 1.0322293182830833, "grad_norm": 0.6175143122673035, "learning_rate": 8.967770681716917e-06, "loss": 0.0654, "step": 6950 }, { "epoch": 1.0337145403237784, "grad_norm": 1.05460786819458, "learning_rate": 8.966285459676224e-06, "loss": 0.0791, "step": 6960 }, { "epoch": 1.0351997623644735, "grad_norm": 1.1853206157684326, "learning_rate": 8.964800237635526e-06, "loss": 0.0986, "step": 6970 }, { "epoch": 1.0366849844051687, "grad_norm": 1.2502192258834839, "learning_rate": 8.963315015594832e-06, "loss": 0.086, "step": 6980 }, { "epoch": 1.0381702064458636, "grad_norm": 0.9244298934936523, "learning_rate": 8.961829793554137e-06, "loss": 0.0813, "step": 6990 }, { "epoch": 1.0396554284865587, "grad_norm": 0.7741544842720032, "learning_rate": 8.960344571513441e-06, "loss": 0.0782, "step": 7000 }, { "epoch": 1.0411406505272538, "grad_norm": 0.9779021143913269, "learning_rate": 8.958859349472747e-06, "loss": 0.0823, "step": 7010 }, { "epoch": 1.042625872567949, "grad_norm": 0.8948909640312195, "learning_rate": 8.957374127432052e-06, "loss": 0.0856, "step": 7020 }, { "epoch": 1.044111094608644, "grad_norm": 0.5438656210899353, "learning_rate": 8.955888905391356e-06, "loss": 0.0776, "step": 7030 }, { "epoch": 1.0455963166493392, "grad_norm": 0.8294696807861328, "learning_rate": 8.954403683350662e-06, "loss": 0.0874, "step": 7040 }, { "epoch": 1.047081538690034, "grad_norm": 0.8454133868217468, "learning_rate": 8.952918461309967e-06, "loss": 0.0689, "step": 7050 }, { "epoch": 1.0485667607307292, "grad_norm": 1.180791974067688, "learning_rate": 8.951433239269271e-06, "loss": 0.111, "step": 7060 }, { "epoch": 1.0500519827714243, "grad_norm": 1.108608365058899, "learning_rate": 8.949948017228577e-06, "loss": 0.0797, "step": 7070 }, { "epoch": 1.0515372048121194, "grad_norm": 0.8830604553222656, "learning_rate": 8.948462795187882e-06, "loss": 0.09, "step": 7080 }, { "epoch": 1.0530224268528146, "grad_norm": 0.6073690056800842, "learning_rate": 8.946977573147186e-06, "loss": 0.0806, "step": 7090 }, { "epoch": 1.0545076488935097, "grad_norm": 1.0500218868255615, "learning_rate": 8.94549235110649e-06, "loss": 0.0637, "step": 7100 }, { "epoch": 1.0559928709342046, "grad_norm": 1.460571527481079, "learning_rate": 8.944007129065795e-06, "loss": 0.0845, "step": 7110 }, { "epoch": 1.0574780929748997, "grad_norm": 1.5901087522506714, "learning_rate": 8.942521907025101e-06, "loss": 0.0919, "step": 7120 }, { "epoch": 1.0589633150155948, "grad_norm": 0.7899160981178284, "learning_rate": 8.941036684984406e-06, "loss": 0.0682, "step": 7130 }, { "epoch": 1.06044853705629, "grad_norm": 0.6329823732376099, "learning_rate": 8.93955146294371e-06, "loss": 0.0978, "step": 7140 }, { "epoch": 1.061933759096985, "grad_norm": 0.9899727702140808, "learning_rate": 8.938066240903016e-06, "loss": 0.059, "step": 7150 }, { "epoch": 1.0634189811376802, "grad_norm": 0.7236705422401428, "learning_rate": 8.93658101886232e-06, "loss": 0.0793, "step": 7160 }, { "epoch": 1.064904203178375, "grad_norm": 0.7325375080108643, "learning_rate": 8.935095796821625e-06, "loss": 0.0656, "step": 7170 }, { "epoch": 1.0663894252190702, "grad_norm": 0.44422733783721924, "learning_rate": 8.933610574780931e-06, "loss": 0.0677, "step": 7180 }, { "epoch": 1.0678746472597653, "grad_norm": 1.4638272523880005, "learning_rate": 8.932125352740236e-06, "loss": 0.0797, "step": 7190 }, { "epoch": 1.0693598693004605, "grad_norm": 0.761106014251709, "learning_rate": 8.93064013069954e-06, "loss": 0.0653, "step": 7200 }, { "epoch": 1.0708450913411556, "grad_norm": 0.7225255370140076, "learning_rate": 8.929154908658845e-06, "loss": 0.0757, "step": 7210 }, { "epoch": 1.0723303133818507, "grad_norm": 0.7644005417823792, "learning_rate": 8.92766968661815e-06, "loss": 0.0851, "step": 7220 }, { "epoch": 1.0738155354225456, "grad_norm": 1.4343347549438477, "learning_rate": 8.926184464577455e-06, "loss": 0.0987, "step": 7230 }, { "epoch": 1.0753007574632407, "grad_norm": 1.44232177734375, "learning_rate": 8.92469924253676e-06, "loss": 0.0762, "step": 7240 }, { "epoch": 1.0767859795039358, "grad_norm": 1.0850433111190796, "learning_rate": 8.923214020496066e-06, "loss": 0.0647, "step": 7250 }, { "epoch": 1.078271201544631, "grad_norm": 1.018131136894226, "learning_rate": 8.92172879845537e-06, "loss": 0.0659, "step": 7260 }, { "epoch": 1.079756423585326, "grad_norm": 0.7677077054977417, "learning_rate": 8.920243576414675e-06, "loss": 0.055, "step": 7270 }, { "epoch": 1.081241645626021, "grad_norm": 1.0646945238113403, "learning_rate": 8.918758354373979e-06, "loss": 0.0818, "step": 7280 }, { "epoch": 1.082726867666716, "grad_norm": 1.020941138267517, "learning_rate": 8.917273132333285e-06, "loss": 0.0949, "step": 7290 }, { "epoch": 1.0842120897074112, "grad_norm": 0.8514700531959534, "learning_rate": 8.91578791029259e-06, "loss": 0.0887, "step": 7300 }, { "epoch": 1.0856973117481064, "grad_norm": 1.0452910661697388, "learning_rate": 8.914302688251894e-06, "loss": 0.0897, "step": 7310 }, { "epoch": 1.0871825337888015, "grad_norm": 0.6905787587165833, "learning_rate": 8.912817466211198e-06, "loss": 0.0632, "step": 7320 }, { "epoch": 1.0886677558294966, "grad_norm": 1.0077743530273438, "learning_rate": 8.911332244170505e-06, "loss": 0.0552, "step": 7330 }, { "epoch": 1.0901529778701915, "grad_norm": 0.8955633044242859, "learning_rate": 8.909847022129809e-06, "loss": 0.0685, "step": 7340 }, { "epoch": 1.0916381999108866, "grad_norm": 0.7802417874336243, "learning_rate": 8.908361800089113e-06, "loss": 0.0632, "step": 7350 }, { "epoch": 1.0931234219515817, "grad_norm": 0.8733623027801514, "learning_rate": 8.90687657804842e-06, "loss": 0.074, "step": 7360 }, { "epoch": 1.0946086439922769, "grad_norm": 0.8323423266410828, "learning_rate": 8.905391356007724e-06, "loss": 0.0713, "step": 7370 }, { "epoch": 1.096093866032972, "grad_norm": 0.5534403324127197, "learning_rate": 8.903906133967028e-06, "loss": 0.0965, "step": 7380 }, { "epoch": 1.097579088073667, "grad_norm": 1.587559700012207, "learning_rate": 8.902420911926335e-06, "loss": 0.0879, "step": 7390 }, { "epoch": 1.099064310114362, "grad_norm": 0.9629437327384949, "learning_rate": 8.900935689885639e-06, "loss": 0.0612, "step": 7400 }, { "epoch": 1.1005495321550571, "grad_norm": 0.6589235067367554, "learning_rate": 8.899450467844943e-06, "loss": 0.0443, "step": 7410 }, { "epoch": 1.1020347541957523, "grad_norm": 1.0450845956802368, "learning_rate": 8.89796524580425e-06, "loss": 0.0619, "step": 7420 }, { "epoch": 1.1035199762364474, "grad_norm": 0.8385497331619263, "learning_rate": 8.896480023763552e-06, "loss": 0.0836, "step": 7430 }, { "epoch": 1.1050051982771425, "grad_norm": 1.0231484174728394, "learning_rate": 8.894994801722858e-06, "loss": 0.0764, "step": 7440 }, { "epoch": 1.1064904203178376, "grad_norm": 0.6052321195602417, "learning_rate": 8.893509579682163e-06, "loss": 0.0749, "step": 7450 }, { "epoch": 1.1079756423585325, "grad_norm": 0.5850058197975159, "learning_rate": 8.892024357641467e-06, "loss": 0.0707, "step": 7460 }, { "epoch": 1.1094608643992276, "grad_norm": 0.8802666664123535, "learning_rate": 8.890539135600773e-06, "loss": 0.0778, "step": 7470 }, { "epoch": 1.1109460864399228, "grad_norm": 1.2311819791793823, "learning_rate": 8.889053913560078e-06, "loss": 0.0889, "step": 7480 }, { "epoch": 1.1124313084806179, "grad_norm": 0.708466649055481, "learning_rate": 8.887568691519382e-06, "loss": 0.0906, "step": 7490 }, { "epoch": 1.113916530521313, "grad_norm": 1.0136058330535889, "learning_rate": 8.886083469478688e-06, "loss": 0.0673, "step": 7500 }, { "epoch": 1.1154017525620081, "grad_norm": 1.758001446723938, "learning_rate": 8.884598247437993e-06, "loss": 0.0994, "step": 7510 }, { "epoch": 1.116886974602703, "grad_norm": 1.1525338888168335, "learning_rate": 8.883113025397297e-06, "loss": 0.0551, "step": 7520 }, { "epoch": 1.1183721966433982, "grad_norm": 0.47890013456344604, "learning_rate": 8.881627803356603e-06, "loss": 0.0703, "step": 7530 }, { "epoch": 1.1198574186840933, "grad_norm": 1.017448902130127, "learning_rate": 8.880142581315908e-06, "loss": 0.0866, "step": 7540 }, { "epoch": 1.1213426407247884, "grad_norm": 0.5454069375991821, "learning_rate": 8.878657359275212e-06, "loss": 0.0641, "step": 7550 }, { "epoch": 1.1228278627654835, "grad_norm": 1.2257490158081055, "learning_rate": 8.877172137234518e-06, "loss": 0.0748, "step": 7560 }, { "epoch": 1.1243130848061784, "grad_norm": 1.6047561168670654, "learning_rate": 8.875686915193821e-06, "loss": 0.0897, "step": 7570 }, { "epoch": 1.1257983068468735, "grad_norm": 0.7648993730545044, "learning_rate": 8.874201693153127e-06, "loss": 0.0707, "step": 7580 }, { "epoch": 1.1272835288875687, "grad_norm": 0.9259288907051086, "learning_rate": 8.872716471112432e-06, "loss": 0.0869, "step": 7590 }, { "epoch": 1.1287687509282638, "grad_norm": 0.8534162640571594, "learning_rate": 8.871231249071736e-06, "loss": 0.0969, "step": 7600 }, { "epoch": 1.130253972968959, "grad_norm": 0.9752341508865356, "learning_rate": 8.869746027031042e-06, "loss": 0.0661, "step": 7610 }, { "epoch": 1.131739195009654, "grad_norm": 0.9760097861289978, "learning_rate": 8.868260804990347e-06, "loss": 0.086, "step": 7620 }, { "epoch": 1.133224417050349, "grad_norm": 1.0568870306015015, "learning_rate": 8.866775582949651e-06, "loss": 0.0717, "step": 7630 }, { "epoch": 1.134709639091044, "grad_norm": 0.6620248556137085, "learning_rate": 8.865290360908957e-06, "loss": 0.0764, "step": 7640 }, { "epoch": 1.1361948611317392, "grad_norm": 0.6814618706703186, "learning_rate": 8.863805138868262e-06, "loss": 0.0878, "step": 7650 }, { "epoch": 1.1376800831724343, "grad_norm": 0.7755681276321411, "learning_rate": 8.862319916827566e-06, "loss": 0.0881, "step": 7660 }, { "epoch": 1.1391653052131294, "grad_norm": 1.2066744565963745, "learning_rate": 8.860834694786872e-06, "loss": 0.0802, "step": 7670 }, { "epoch": 1.1406505272538245, "grad_norm": 0.7700151801109314, "learning_rate": 8.859349472746177e-06, "loss": 0.0635, "step": 7680 }, { "epoch": 1.1421357492945194, "grad_norm": 0.769986629486084, "learning_rate": 8.857864250705481e-06, "loss": 0.0758, "step": 7690 }, { "epoch": 1.1436209713352146, "grad_norm": 1.4506511688232422, "learning_rate": 8.856379028664787e-06, "loss": 0.0762, "step": 7700 }, { "epoch": 1.1451061933759097, "grad_norm": 0.9908505082130432, "learning_rate": 8.854893806624092e-06, "loss": 0.0516, "step": 7710 }, { "epoch": 1.1465914154166048, "grad_norm": 0.6923515200614929, "learning_rate": 8.853408584583396e-06, "loss": 0.0843, "step": 7720 }, { "epoch": 1.1480766374573, "grad_norm": 0.9555417895317078, "learning_rate": 8.8519233625427e-06, "loss": 0.0599, "step": 7730 }, { "epoch": 1.1495618594979948, "grad_norm": 1.2183533906936646, "learning_rate": 8.850438140502005e-06, "loss": 0.107, "step": 7740 }, { "epoch": 1.15104708153869, "grad_norm": 1.6629198789596558, "learning_rate": 8.848952918461311e-06, "loss": 0.0905, "step": 7750 }, { "epoch": 1.152532303579385, "grad_norm": 0.6286592483520508, "learning_rate": 8.847467696420616e-06, "loss": 0.0552, "step": 7760 }, { "epoch": 1.1540175256200802, "grad_norm": 0.8661282062530518, "learning_rate": 8.84598247437992e-06, "loss": 0.0988, "step": 7770 }, { "epoch": 1.1555027476607753, "grad_norm": 0.8588027358055115, "learning_rate": 8.844497252339226e-06, "loss": 0.063, "step": 7780 }, { "epoch": 1.1569879697014704, "grad_norm": 0.9548928141593933, "learning_rate": 8.84301203029853e-06, "loss": 0.0968, "step": 7790 }, { "epoch": 1.1584731917421656, "grad_norm": 0.8153151273727417, "learning_rate": 8.841526808257835e-06, "loss": 0.0866, "step": 7800 }, { "epoch": 1.1599584137828605, "grad_norm": 1.0286554098129272, "learning_rate": 8.840041586217141e-06, "loss": 0.09, "step": 7810 }, { "epoch": 1.1614436358235556, "grad_norm": 0.9278287291526794, "learning_rate": 8.838556364176445e-06, "loss": 0.053, "step": 7820 }, { "epoch": 1.1629288578642507, "grad_norm": 0.8616372346878052, "learning_rate": 8.83707114213575e-06, "loss": 0.0755, "step": 7830 }, { "epoch": 1.1644140799049458, "grad_norm": 2.0150420665740967, "learning_rate": 8.835585920095054e-06, "loss": 0.1022, "step": 7840 }, { "epoch": 1.165899301945641, "grad_norm": 0.729625940322876, "learning_rate": 8.83410069805436e-06, "loss": 0.0548, "step": 7850 }, { "epoch": 1.1673845239863359, "grad_norm": 0.643584668636322, "learning_rate": 8.832615476013665e-06, "loss": 0.0702, "step": 7860 }, { "epoch": 1.168869746027031, "grad_norm": 0.6835783123970032, "learning_rate": 8.83113025397297e-06, "loss": 0.057, "step": 7870 }, { "epoch": 1.170354968067726, "grad_norm": 1.8533565998077393, "learning_rate": 8.829645031932274e-06, "loss": 0.0726, "step": 7880 }, { "epoch": 1.1718401901084212, "grad_norm": 0.8263382911682129, "learning_rate": 8.82815980989158e-06, "loss": 0.0695, "step": 7890 }, { "epoch": 1.1733254121491163, "grad_norm": 0.6495395302772522, "learning_rate": 8.826674587850884e-06, "loss": 0.0593, "step": 7900 }, { "epoch": 1.1748106341898115, "grad_norm": 0.5065099000930786, "learning_rate": 8.825189365810189e-06, "loss": 0.0544, "step": 7910 }, { "epoch": 1.1762958562305064, "grad_norm": 0.8669567108154297, "learning_rate": 8.823704143769495e-06, "loss": 0.0947, "step": 7920 }, { "epoch": 1.1777810782712015, "grad_norm": 0.9733984470367432, "learning_rate": 8.8222189217288e-06, "loss": 0.1057, "step": 7930 }, { "epoch": 1.1792663003118966, "grad_norm": 1.1606910228729248, "learning_rate": 8.820733699688104e-06, "loss": 0.0854, "step": 7940 }, { "epoch": 1.1807515223525917, "grad_norm": 1.0298819541931152, "learning_rate": 8.819248477647408e-06, "loss": 0.068, "step": 7950 }, { "epoch": 1.1822367443932869, "grad_norm": 0.5206531882286072, "learning_rate": 8.817763255606714e-06, "loss": 0.0684, "step": 7960 }, { "epoch": 1.183721966433982, "grad_norm": 0.8717692494392395, "learning_rate": 8.816278033566019e-06, "loss": 0.0844, "step": 7970 }, { "epoch": 1.1852071884746769, "grad_norm": 1.166616439819336, "learning_rate": 8.814792811525323e-06, "loss": 0.0636, "step": 7980 }, { "epoch": 1.186692410515372, "grad_norm": 1.462874412536621, "learning_rate": 8.81330758948463e-06, "loss": 0.0764, "step": 7990 }, { "epoch": 1.1881776325560671, "grad_norm": 0.7792450189590454, "learning_rate": 8.811822367443934e-06, "loss": 0.0603, "step": 8000 }, { "epoch": 1.1896628545967622, "grad_norm": 0.9004107117652893, "learning_rate": 8.810337145403238e-06, "loss": 0.0753, "step": 8010 }, { "epoch": 1.1911480766374574, "grad_norm": 0.97831130027771, "learning_rate": 8.808851923362544e-06, "loss": 0.066, "step": 8020 }, { "epoch": 1.1926332986781523, "grad_norm": 0.7797183990478516, "learning_rate": 8.807366701321847e-06, "loss": 0.083, "step": 8030 }, { "epoch": 1.1941185207188474, "grad_norm": 0.5634157657623291, "learning_rate": 8.805881479281153e-06, "loss": 0.0778, "step": 8040 }, { "epoch": 1.1956037427595425, "grad_norm": 0.5460097193717957, "learning_rate": 8.804396257240458e-06, "loss": 0.0744, "step": 8050 }, { "epoch": 1.1970889648002376, "grad_norm": 0.8219186067581177, "learning_rate": 8.802911035199762e-06, "loss": 0.0696, "step": 8060 }, { "epoch": 1.1985741868409328, "grad_norm": 1.00192391872406, "learning_rate": 8.801425813159068e-06, "loss": 0.0998, "step": 8070 }, { "epoch": 1.2000594088816279, "grad_norm": 0.5614769458770752, "learning_rate": 8.799940591118373e-06, "loss": 0.0612, "step": 8080 }, { "epoch": 1.201544630922323, "grad_norm": 0.6137503981590271, "learning_rate": 8.798455369077677e-06, "loss": 0.0624, "step": 8090 }, { "epoch": 1.203029852963018, "grad_norm": 0.8568705916404724, "learning_rate": 8.796970147036983e-06, "loss": 0.0826, "step": 8100 }, { "epoch": 1.204515075003713, "grad_norm": 1.0990469455718994, "learning_rate": 8.795484924996288e-06, "loss": 0.0755, "step": 8110 }, { "epoch": 1.2060002970444081, "grad_norm": 0.5455279350280762, "learning_rate": 8.793999702955592e-06, "loss": 0.0614, "step": 8120 }, { "epoch": 1.2074855190851033, "grad_norm": 1.0191229581832886, "learning_rate": 8.792514480914898e-06, "loss": 0.0721, "step": 8130 }, { "epoch": 1.2089707411257984, "grad_norm": 1.4468733072280884, "learning_rate": 8.791029258874203e-06, "loss": 0.0728, "step": 8140 }, { "epoch": 1.2104559631664933, "grad_norm": 0.4742625653743744, "learning_rate": 8.789544036833507e-06, "loss": 0.0678, "step": 8150 }, { "epoch": 1.2119411852071884, "grad_norm": 0.7147884368896484, "learning_rate": 8.788058814792813e-06, "loss": 0.0823, "step": 8160 }, { "epoch": 1.2134264072478835, "grad_norm": 0.9481222629547119, "learning_rate": 8.786573592752116e-06, "loss": 0.0847, "step": 8170 }, { "epoch": 1.2149116292885787, "grad_norm": 0.30851230025291443, "learning_rate": 8.785088370711422e-06, "loss": 0.0688, "step": 8180 }, { "epoch": 1.2163968513292738, "grad_norm": 1.409021258354187, "learning_rate": 8.783603148670728e-06, "loss": 0.0663, "step": 8190 }, { "epoch": 1.217882073369969, "grad_norm": 0.8977643847465515, "learning_rate": 8.782117926630031e-06, "loss": 0.0757, "step": 8200 }, { "epoch": 1.2193672954106638, "grad_norm": 0.9754537343978882, "learning_rate": 8.780632704589337e-06, "loss": 0.0637, "step": 8210 }, { "epoch": 1.220852517451359, "grad_norm": 0.9389632344245911, "learning_rate": 8.779147482548641e-06, "loss": 0.0804, "step": 8220 }, { "epoch": 1.222337739492054, "grad_norm": 0.9132267236709595, "learning_rate": 8.777662260507946e-06, "loss": 0.0666, "step": 8230 }, { "epoch": 1.2238229615327492, "grad_norm": 1.6683911085128784, "learning_rate": 8.776177038467252e-06, "loss": 0.0733, "step": 8240 }, { "epoch": 1.2253081835734443, "grad_norm": 0.4304860532283783, "learning_rate": 8.774691816426556e-06, "loss": 0.0705, "step": 8250 }, { "epoch": 1.2267934056141394, "grad_norm": 0.8889797329902649, "learning_rate": 8.773206594385861e-06, "loss": 0.1116, "step": 8260 }, { "epoch": 1.2282786276548343, "grad_norm": 0.8538875579833984, "learning_rate": 8.771721372345167e-06, "loss": 0.1, "step": 8270 }, { "epoch": 1.2297638496955294, "grad_norm": 1.2892072200775146, "learning_rate": 8.770236150304471e-06, "loss": 0.0844, "step": 8280 }, { "epoch": 1.2312490717362246, "grad_norm": 1.4475103616714478, "learning_rate": 8.768750928263776e-06, "loss": 0.0649, "step": 8290 }, { "epoch": 1.2327342937769197, "grad_norm": 0.9254520535469055, "learning_rate": 8.767265706223082e-06, "loss": 0.0577, "step": 8300 }, { "epoch": 1.2342195158176148, "grad_norm": 0.4279841482639313, "learning_rate": 8.765780484182386e-06, "loss": 0.0777, "step": 8310 }, { "epoch": 1.2357047378583097, "grad_norm": 0.579756498336792, "learning_rate": 8.764295262141691e-06, "loss": 0.0858, "step": 8320 }, { "epoch": 1.2371899598990048, "grad_norm": 0.5551837086677551, "learning_rate": 8.762810040100997e-06, "loss": 0.0794, "step": 8330 }, { "epoch": 1.2386751819397, "grad_norm": 1.2064443826675415, "learning_rate": 8.7613248180603e-06, "loss": 0.0644, "step": 8340 }, { "epoch": 1.240160403980395, "grad_norm": 1.0003966093063354, "learning_rate": 8.759839596019606e-06, "loss": 0.0757, "step": 8350 }, { "epoch": 1.2416456260210902, "grad_norm": 0.8278673887252808, "learning_rate": 8.75835437397891e-06, "loss": 0.0797, "step": 8360 }, { "epoch": 1.2431308480617853, "grad_norm": 0.7456088066101074, "learning_rate": 8.756869151938215e-06, "loss": 0.0763, "step": 8370 }, { "epoch": 1.2446160701024804, "grad_norm": 0.812319815158844, "learning_rate": 8.755383929897521e-06, "loss": 0.0918, "step": 8380 }, { "epoch": 1.2461012921431753, "grad_norm": 0.9069823026657104, "learning_rate": 8.753898707856825e-06, "loss": 0.0558, "step": 8390 }, { "epoch": 1.2475865141838705, "grad_norm": 1.3703268766403198, "learning_rate": 8.75241348581613e-06, "loss": 0.0841, "step": 8400 }, { "epoch": 1.2490717362245656, "grad_norm": 0.5158610343933105, "learning_rate": 8.750928263775436e-06, "loss": 0.0826, "step": 8410 }, { "epoch": 1.2505569582652607, "grad_norm": 0.916165292263031, "learning_rate": 8.74944304173474e-06, "loss": 0.0682, "step": 8420 }, { "epoch": 1.2520421803059558, "grad_norm": 0.6337945461273193, "learning_rate": 8.747957819694045e-06, "loss": 0.0843, "step": 8430 }, { "epoch": 1.2535274023466507, "grad_norm": 0.45069465041160583, "learning_rate": 8.746472597653351e-06, "loss": 0.0725, "step": 8440 }, { "epoch": 1.2550126243873458, "grad_norm": 0.5957522988319397, "learning_rate": 8.744987375612655e-06, "loss": 0.0728, "step": 8450 }, { "epoch": 1.256497846428041, "grad_norm": 0.9234186410903931, "learning_rate": 8.74350215357196e-06, "loss": 0.0751, "step": 8460 }, { "epoch": 1.257983068468736, "grad_norm": 1.337033987045288, "learning_rate": 8.742016931531264e-06, "loss": 0.0743, "step": 8470 }, { "epoch": 1.2594682905094312, "grad_norm": 0.7764634490013123, "learning_rate": 8.74053170949057e-06, "loss": 0.0659, "step": 8480 }, { "epoch": 1.260953512550126, "grad_norm": 0.7629880905151367, "learning_rate": 8.739046487449875e-06, "loss": 0.0865, "step": 8490 }, { "epoch": 1.2624387345908215, "grad_norm": 0.7267442941665649, "learning_rate": 8.73756126540918e-06, "loss": 0.0788, "step": 8500 }, { "epoch": 1.2639239566315164, "grad_norm": 0.7518733143806458, "learning_rate": 8.736076043368484e-06, "loss": 0.075, "step": 8510 }, { "epoch": 1.2654091786722115, "grad_norm": 0.6665178537368774, "learning_rate": 8.73459082132779e-06, "loss": 0.066, "step": 8520 }, { "epoch": 1.2668944007129066, "grad_norm": 1.201786756515503, "learning_rate": 8.733105599287094e-06, "loss": 0.0843, "step": 8530 }, { "epoch": 1.2683796227536017, "grad_norm": 0.5769052505493164, "learning_rate": 8.731620377246399e-06, "loss": 0.0721, "step": 8540 }, { "epoch": 1.2698648447942968, "grad_norm": 0.56085205078125, "learning_rate": 8.730135155205703e-06, "loss": 0.0684, "step": 8550 }, { "epoch": 1.2713500668349917, "grad_norm": 0.7175249457359314, "learning_rate": 8.728649933165009e-06, "loss": 0.0693, "step": 8560 }, { "epoch": 1.2728352888756869, "grad_norm": 0.6406940221786499, "learning_rate": 8.727164711124314e-06, "loss": 0.0802, "step": 8570 }, { "epoch": 1.274320510916382, "grad_norm": 0.6077917218208313, "learning_rate": 8.725679489083618e-06, "loss": 0.0711, "step": 8580 }, { "epoch": 1.275805732957077, "grad_norm": 0.6041744947433472, "learning_rate": 8.724194267042924e-06, "loss": 0.0625, "step": 8590 }, { "epoch": 1.2772909549977722, "grad_norm": 0.5732704401016235, "learning_rate": 8.722709045002229e-06, "loss": 0.0795, "step": 8600 }, { "epoch": 1.2787761770384671, "grad_norm": 0.7540349364280701, "learning_rate": 8.721223822961533e-06, "loss": 0.0585, "step": 8610 }, { "epoch": 1.2802613990791623, "grad_norm": 0.8742958903312683, "learning_rate": 8.719738600920839e-06, "loss": 0.0833, "step": 8620 }, { "epoch": 1.2817466211198574, "grad_norm": 1.4646620750427246, "learning_rate": 8.718253378880144e-06, "loss": 0.11, "step": 8630 }, { "epoch": 1.2832318431605525, "grad_norm": 1.2368395328521729, "learning_rate": 8.716768156839448e-06, "loss": 0.0799, "step": 8640 }, { "epoch": 1.2847170652012476, "grad_norm": 1.3969573974609375, "learning_rate": 8.715282934798754e-06, "loss": 0.0703, "step": 8650 }, { "epoch": 1.2862022872419427, "grad_norm": 0.9338309168815613, "learning_rate": 8.713797712758057e-06, "loss": 0.0826, "step": 8660 }, { "epoch": 1.2876875092826379, "grad_norm": 0.808988094329834, "learning_rate": 8.712312490717363e-06, "loss": 0.0809, "step": 8670 }, { "epoch": 1.2891727313233328, "grad_norm": 1.0200146436691284, "learning_rate": 8.710827268676667e-06, "loss": 0.0738, "step": 8680 }, { "epoch": 1.2906579533640279, "grad_norm": 0.5110995173454285, "learning_rate": 8.709342046635972e-06, "loss": 0.0787, "step": 8690 }, { "epoch": 1.292143175404723, "grad_norm": 0.5956591963768005, "learning_rate": 8.707856824595278e-06, "loss": 0.0571, "step": 8700 }, { "epoch": 1.2936283974454181, "grad_norm": 0.5521881580352783, "learning_rate": 8.706371602554582e-06, "loss": 0.0896, "step": 8710 }, { "epoch": 1.2951136194861133, "grad_norm": 0.5107810497283936, "learning_rate": 8.704886380513887e-06, "loss": 0.0755, "step": 8720 }, { "epoch": 1.2965988415268082, "grad_norm": 1.5277434587478638, "learning_rate": 8.703401158473193e-06, "loss": 0.0691, "step": 8730 }, { "epoch": 1.2980840635675033, "grad_norm": 0.8021115660667419, "learning_rate": 8.701915936432497e-06, "loss": 0.0826, "step": 8740 }, { "epoch": 1.2995692856081984, "grad_norm": 0.7268383502960205, "learning_rate": 8.700430714391802e-06, "loss": 0.0752, "step": 8750 }, { "epoch": 1.3010545076488935, "grad_norm": 0.7827374339103699, "learning_rate": 8.698945492351108e-06, "loss": 0.0718, "step": 8760 }, { "epoch": 1.3025397296895886, "grad_norm": 0.793857753276825, "learning_rate": 8.697460270310412e-06, "loss": 0.0532, "step": 8770 }, { "epoch": 1.3040249517302835, "grad_norm": 0.9983842968940735, "learning_rate": 8.695975048269717e-06, "loss": 0.0745, "step": 8780 }, { "epoch": 1.3055101737709789, "grad_norm": 0.48715001344680786, "learning_rate": 8.694489826229023e-06, "loss": 0.069, "step": 8790 }, { "epoch": 1.3069953958116738, "grad_norm": 1.0066664218902588, "learning_rate": 8.693004604188326e-06, "loss": 0.1001, "step": 8800 }, { "epoch": 1.308480617852369, "grad_norm": 0.587794303894043, "learning_rate": 8.691519382147632e-06, "loss": 0.0761, "step": 8810 }, { "epoch": 1.309965839893064, "grad_norm": 0.6250380873680115, "learning_rate": 8.690034160106938e-06, "loss": 0.0826, "step": 8820 }, { "epoch": 1.3114510619337592, "grad_norm": 1.0876086950302124, "learning_rate": 8.68854893806624e-06, "loss": 0.0909, "step": 8830 }, { "epoch": 1.3129362839744543, "grad_norm": 0.633162260055542, "learning_rate": 8.687063716025547e-06, "loss": 0.0915, "step": 8840 }, { "epoch": 1.3144215060151492, "grad_norm": 0.5343165397644043, "learning_rate": 8.685578493984851e-06, "loss": 0.0747, "step": 8850 }, { "epoch": 1.3159067280558443, "grad_norm": 1.269322156906128, "learning_rate": 8.684093271944156e-06, "loss": 0.0633, "step": 8860 }, { "epoch": 1.3173919500965394, "grad_norm": 1.0117168426513672, "learning_rate": 8.682608049903462e-06, "loss": 0.0793, "step": 8870 }, { "epoch": 1.3188771721372345, "grad_norm": 0.692861795425415, "learning_rate": 8.681122827862766e-06, "loss": 0.0819, "step": 8880 }, { "epoch": 1.3203623941779297, "grad_norm": 0.8399760723114014, "learning_rate": 8.67963760582207e-06, "loss": 0.0673, "step": 8890 }, { "epoch": 1.3218476162186246, "grad_norm": 1.1416538953781128, "learning_rate": 8.678152383781377e-06, "loss": 0.058, "step": 8900 }, { "epoch": 1.3233328382593197, "grad_norm": 0.5253177881240845, "learning_rate": 8.676667161740681e-06, "loss": 0.061, "step": 8910 }, { "epoch": 1.3248180603000148, "grad_norm": 0.8652992844581604, "learning_rate": 8.675181939699986e-06, "loss": 0.0655, "step": 8920 }, { "epoch": 1.32630328234071, "grad_norm": 0.769469141960144, "learning_rate": 8.673696717659292e-06, "loss": 0.0464, "step": 8930 }, { "epoch": 1.327788504381405, "grad_norm": 0.8097725510597229, "learning_rate": 8.672211495618596e-06, "loss": 0.0656, "step": 8940 }, { "epoch": 1.3292737264221002, "grad_norm": 1.2059855461120605, "learning_rate": 8.6707262735779e-06, "loss": 0.0676, "step": 8950 }, { "epoch": 1.3307589484627953, "grad_norm": 1.0563139915466309, "learning_rate": 8.669241051537207e-06, "loss": 0.0624, "step": 8960 }, { "epoch": 1.3322441705034902, "grad_norm": 1.094596266746521, "learning_rate": 8.66775582949651e-06, "loss": 0.0755, "step": 8970 }, { "epoch": 1.3337293925441853, "grad_norm": 0.9370036125183105, "learning_rate": 8.666270607455816e-06, "loss": 0.0868, "step": 8980 }, { "epoch": 1.3352146145848804, "grad_norm": 1.1293418407440186, "learning_rate": 8.66478538541512e-06, "loss": 0.0707, "step": 8990 }, { "epoch": 1.3366998366255756, "grad_norm": 1.6295610666275024, "learning_rate": 8.663300163374425e-06, "loss": 0.0655, "step": 9000 }, { "epoch": 1.3381850586662707, "grad_norm": 1.074697494506836, "learning_rate": 8.66181494133373e-06, "loss": 0.0617, "step": 9010 }, { "epoch": 1.3396702807069656, "grad_norm": 0.906007707118988, "learning_rate": 8.660329719293035e-06, "loss": 0.077, "step": 9020 }, { "epoch": 1.3411555027476607, "grad_norm": 0.7099418044090271, "learning_rate": 8.65884449725234e-06, "loss": 0.085, "step": 9030 }, { "epoch": 1.3426407247883558, "grad_norm": 0.9479149580001831, "learning_rate": 8.657359275211646e-06, "loss": 0.0784, "step": 9040 }, { "epoch": 1.344125946829051, "grad_norm": 0.6403526067733765, "learning_rate": 8.65587405317095e-06, "loss": 0.0639, "step": 9050 }, { "epoch": 1.345611168869746, "grad_norm": 1.300971269607544, "learning_rate": 8.654388831130255e-06, "loss": 0.0795, "step": 9060 }, { "epoch": 1.347096390910441, "grad_norm": 0.9193373918533325, "learning_rate": 8.652903609089559e-06, "loss": 0.0877, "step": 9070 }, { "epoch": 1.3485816129511363, "grad_norm": 1.093873381614685, "learning_rate": 8.651418387048865e-06, "loss": 0.0671, "step": 9080 }, { "epoch": 1.3500668349918312, "grad_norm": 0.40460872650146484, "learning_rate": 8.64993316500817e-06, "loss": 0.0754, "step": 9090 }, { "epoch": 1.3515520570325263, "grad_norm": 0.5930770039558411, "learning_rate": 8.648447942967474e-06, "loss": 0.0636, "step": 9100 }, { "epoch": 1.3530372790732215, "grad_norm": 0.925861120223999, "learning_rate": 8.64696272092678e-06, "loss": 0.0823, "step": 9110 }, { "epoch": 1.3545225011139166, "grad_norm": 0.7909414768218994, "learning_rate": 8.645477498886085e-06, "loss": 0.0783, "step": 9120 }, { "epoch": 1.3560077231546117, "grad_norm": 0.7667768597602844, "learning_rate": 8.643992276845389e-06, "loss": 0.0712, "step": 9130 }, { "epoch": 1.3574929451953066, "grad_norm": 0.738560140132904, "learning_rate": 8.642507054804693e-06, "loss": 0.0909, "step": 9140 }, { "epoch": 1.3589781672360017, "grad_norm": 1.9173917770385742, "learning_rate": 8.641021832764e-06, "loss": 0.0746, "step": 9150 }, { "epoch": 1.3604633892766969, "grad_norm": 1.3944460153579712, "learning_rate": 8.639536610723304e-06, "loss": 0.0663, "step": 9160 }, { "epoch": 1.361948611317392, "grad_norm": 0.8866845965385437, "learning_rate": 8.638051388682608e-06, "loss": 0.0816, "step": 9170 }, { "epoch": 1.363433833358087, "grad_norm": 1.4640837907791138, "learning_rate": 8.636566166641913e-06, "loss": 0.1226, "step": 9180 }, { "epoch": 1.364919055398782, "grad_norm": 0.7436324954032898, "learning_rate": 8.635080944601219e-06, "loss": 0.0692, "step": 9190 }, { "epoch": 1.3664042774394771, "grad_norm": 0.8947879076004028, "learning_rate": 8.633595722560523e-06, "loss": 0.0933, "step": 9200 }, { "epoch": 1.3678894994801722, "grad_norm": 0.7265494465827942, "learning_rate": 8.632110500519828e-06, "loss": 0.0803, "step": 9210 }, { "epoch": 1.3693747215208674, "grad_norm": 1.0423144102096558, "learning_rate": 8.630625278479134e-06, "loss": 0.0862, "step": 9220 }, { "epoch": 1.3708599435615625, "grad_norm": 0.8711174726486206, "learning_rate": 8.629140056438438e-06, "loss": 0.069, "step": 9230 }, { "epoch": 1.3723451656022576, "grad_norm": 1.1988892555236816, "learning_rate": 8.627654834397743e-06, "loss": 0.076, "step": 9240 }, { "epoch": 1.3738303876429527, "grad_norm": 0.5930973887443542, "learning_rate": 8.626169612357049e-06, "loss": 0.0658, "step": 9250 }, { "epoch": 1.3753156096836476, "grad_norm": 1.127451777458191, "learning_rate": 8.624684390316353e-06, "loss": 0.0772, "step": 9260 }, { "epoch": 1.3768008317243428, "grad_norm": 0.7104337215423584, "learning_rate": 8.623199168275658e-06, "loss": 0.0856, "step": 9270 }, { "epoch": 1.3782860537650379, "grad_norm": 1.0597851276397705, "learning_rate": 8.621713946234962e-06, "loss": 0.0742, "step": 9280 }, { "epoch": 1.379771275805733, "grad_norm": 1.0876798629760742, "learning_rate": 8.620228724194267e-06, "loss": 0.0851, "step": 9290 }, { "epoch": 1.3812564978464281, "grad_norm": 0.7666727900505066, "learning_rate": 8.618743502153573e-06, "loss": 0.0463, "step": 9300 }, { "epoch": 1.382741719887123, "grad_norm": 1.0942326784133911, "learning_rate": 8.617258280112877e-06, "loss": 0.0617, "step": 9310 }, { "epoch": 1.3842269419278181, "grad_norm": 1.3110612630844116, "learning_rate": 8.615773058072182e-06, "loss": 0.092, "step": 9320 }, { "epoch": 1.3857121639685133, "grad_norm": 0.7441218495368958, "learning_rate": 8.614287836031488e-06, "loss": 0.0598, "step": 9330 }, { "epoch": 1.3871973860092084, "grad_norm": 0.9740796089172363, "learning_rate": 8.612802613990792e-06, "loss": 0.0654, "step": 9340 }, { "epoch": 1.3886826080499035, "grad_norm": 0.8730571269989014, "learning_rate": 8.611317391950097e-06, "loss": 0.0648, "step": 9350 }, { "epoch": 1.3901678300905984, "grad_norm": 0.9170101881027222, "learning_rate": 8.609832169909403e-06, "loss": 0.0776, "step": 9360 }, { "epoch": 1.3916530521312938, "grad_norm": 0.4509689211845398, "learning_rate": 8.608346947868707e-06, "loss": 0.0655, "step": 9370 }, { "epoch": 1.3931382741719887, "grad_norm": 2.2347559928894043, "learning_rate": 8.606861725828012e-06, "loss": 0.0879, "step": 9380 }, { "epoch": 1.3946234962126838, "grad_norm": 1.4302359819412231, "learning_rate": 8.605376503787318e-06, "loss": 0.0884, "step": 9390 }, { "epoch": 1.396108718253379, "grad_norm": 0.33778825402259827, "learning_rate": 8.60389128174662e-06, "loss": 0.0447, "step": 9400 }, { "epoch": 1.397593940294074, "grad_norm": 0.8757361769676208, "learning_rate": 8.602406059705927e-06, "loss": 0.0752, "step": 9410 }, { "epoch": 1.3990791623347691, "grad_norm": 0.8796318769454956, "learning_rate": 8.600920837665233e-06, "loss": 0.0763, "step": 9420 }, { "epoch": 1.400564384375464, "grad_norm": 1.1154508590698242, "learning_rate": 8.599435615624536e-06, "loss": 0.067, "step": 9430 }, { "epoch": 1.4020496064161592, "grad_norm": 0.722909152507782, "learning_rate": 8.597950393583842e-06, "loss": 0.0682, "step": 9440 }, { "epoch": 1.4035348284568543, "grad_norm": 1.0541070699691772, "learning_rate": 8.596465171543146e-06, "loss": 0.0538, "step": 9450 }, { "epoch": 1.4050200504975494, "grad_norm": 0.6118191480636597, "learning_rate": 8.59497994950245e-06, "loss": 0.0667, "step": 9460 }, { "epoch": 1.4065052725382445, "grad_norm": 1.3948619365692139, "learning_rate": 8.593494727461757e-06, "loss": 0.0651, "step": 9470 }, { "epoch": 1.4079904945789394, "grad_norm": 1.4375851154327393, "learning_rate": 8.592009505421061e-06, "loss": 0.0651, "step": 9480 }, { "epoch": 1.4094757166196346, "grad_norm": 0.42622682452201843, "learning_rate": 8.590524283380365e-06, "loss": 0.0744, "step": 9490 }, { "epoch": 1.4109609386603297, "grad_norm": 0.7068700790405273, "learning_rate": 8.589039061339672e-06, "loss": 0.0754, "step": 9500 }, { "epoch": 1.4124461607010248, "grad_norm": 0.5672358870506287, "learning_rate": 8.587553839298976e-06, "loss": 0.0678, "step": 9510 }, { "epoch": 1.41393138274172, "grad_norm": 1.3151499032974243, "learning_rate": 8.58606861725828e-06, "loss": 0.0843, "step": 9520 }, { "epoch": 1.415416604782415, "grad_norm": 0.579703688621521, "learning_rate": 8.584583395217587e-06, "loss": 0.0808, "step": 9530 }, { "epoch": 1.4169018268231102, "grad_norm": 0.921262800693512, "learning_rate": 8.583098173176891e-06, "loss": 0.0837, "step": 9540 }, { "epoch": 1.418387048863805, "grad_norm": 0.6830768585205078, "learning_rate": 8.581612951136195e-06, "loss": 0.065, "step": 9550 }, { "epoch": 1.4198722709045002, "grad_norm": 0.6493134498596191, "learning_rate": 8.580127729095502e-06, "loss": 0.06, "step": 9560 }, { "epoch": 1.4213574929451953, "grad_norm": 1.3778793811798096, "learning_rate": 8.578642507054804e-06, "loss": 0.0637, "step": 9570 }, { "epoch": 1.4228427149858904, "grad_norm": 0.6053674221038818, "learning_rate": 8.57715728501411e-06, "loss": 0.0717, "step": 9580 }, { "epoch": 1.4243279370265856, "grad_norm": 1.0402705669403076, "learning_rate": 8.575672062973415e-06, "loss": 0.0727, "step": 9590 }, { "epoch": 1.4258131590672805, "grad_norm": 1.1577720642089844, "learning_rate": 8.57418684093272e-06, "loss": 0.0726, "step": 9600 }, { "epoch": 1.4272983811079756, "grad_norm": 0.6212295889854431, "learning_rate": 8.572701618892025e-06, "loss": 0.0785, "step": 9610 }, { "epoch": 1.4287836031486707, "grad_norm": 0.5646659731864929, "learning_rate": 8.57121639685133e-06, "loss": 0.06, "step": 9620 }, { "epoch": 1.4302688251893658, "grad_norm": 0.2798670828342438, "learning_rate": 8.569731174810634e-06, "loss": 0.083, "step": 9630 }, { "epoch": 1.431754047230061, "grad_norm": 0.8584413528442383, "learning_rate": 8.56824595276994e-06, "loss": 0.0845, "step": 9640 }, { "epoch": 1.4332392692707558, "grad_norm": 0.6633872985839844, "learning_rate": 8.566760730729245e-06, "loss": 0.0932, "step": 9650 }, { "epoch": 1.4347244913114512, "grad_norm": 1.4569209814071655, "learning_rate": 8.56527550868855e-06, "loss": 0.0811, "step": 9660 }, { "epoch": 1.436209713352146, "grad_norm": 1.3260440826416016, "learning_rate": 8.563790286647855e-06, "loss": 0.055, "step": 9670 }, { "epoch": 1.4376949353928412, "grad_norm": 1.1164923906326294, "learning_rate": 8.56230506460716e-06, "loss": 0.0877, "step": 9680 }, { "epoch": 1.4391801574335363, "grad_norm": 0.9574961066246033, "learning_rate": 8.560819842566464e-06, "loss": 0.0736, "step": 9690 }, { "epoch": 1.4406653794742315, "grad_norm": 1.2333389520645142, "learning_rate": 8.559334620525769e-06, "loss": 0.0861, "step": 9700 }, { "epoch": 1.4421506015149266, "grad_norm": 0.932621955871582, "learning_rate": 8.557849398485075e-06, "loss": 0.0778, "step": 9710 }, { "epoch": 1.4436358235556215, "grad_norm": 0.7551608085632324, "learning_rate": 8.55636417644438e-06, "loss": 0.0593, "step": 9720 }, { "epoch": 1.4451210455963166, "grad_norm": 0.7052642107009888, "learning_rate": 8.554878954403684e-06, "loss": 0.0467, "step": 9730 }, { "epoch": 1.4466062676370117, "grad_norm": 2.466881036758423, "learning_rate": 8.553393732362988e-06, "loss": 0.0794, "step": 9740 }, { "epoch": 1.4480914896777068, "grad_norm": 0.7573568224906921, "learning_rate": 8.551908510322294e-06, "loss": 0.065, "step": 9750 }, { "epoch": 1.449576711718402, "grad_norm": 1.1070822477340698, "learning_rate": 8.550423288281599e-06, "loss": 0.0812, "step": 9760 }, { "epoch": 1.4510619337590969, "grad_norm": 0.355160117149353, "learning_rate": 8.548938066240903e-06, "loss": 0.0664, "step": 9770 }, { "epoch": 1.452547155799792, "grad_norm": 1.617323398590088, "learning_rate": 8.54745284420021e-06, "loss": 0.0887, "step": 9780 }, { "epoch": 1.4540323778404871, "grad_norm": 0.6931330561637878, "learning_rate": 8.545967622159514e-06, "loss": 0.0678, "step": 9790 }, { "epoch": 1.4555175998811822, "grad_norm": 0.6744693517684937, "learning_rate": 8.544482400118818e-06, "loss": 0.0754, "step": 9800 }, { "epoch": 1.4570028219218774, "grad_norm": 1.0592255592346191, "learning_rate": 8.542997178078123e-06, "loss": 0.0632, "step": 9810 }, { "epoch": 1.4584880439625725, "grad_norm": 0.8541384339332581, "learning_rate": 8.541511956037429e-06, "loss": 0.0736, "step": 9820 }, { "epoch": 1.4599732660032676, "grad_norm": 0.8458141684532166, "learning_rate": 8.540026733996733e-06, "loss": 0.0563, "step": 9830 }, { "epoch": 1.4614584880439625, "grad_norm": 0.6952449083328247, "learning_rate": 8.538541511956038e-06, "loss": 0.0789, "step": 9840 }, { "epoch": 1.4629437100846576, "grad_norm": 0.8950325846672058, "learning_rate": 8.537056289915344e-06, "loss": 0.0683, "step": 9850 }, { "epoch": 1.4644289321253527, "grad_norm": 0.9191376566886902, "learning_rate": 8.535571067874648e-06, "loss": 0.0629, "step": 9860 }, { "epoch": 1.4659141541660479, "grad_norm": 0.8940352201461792, "learning_rate": 8.534085845833953e-06, "loss": 0.0674, "step": 9870 }, { "epoch": 1.467399376206743, "grad_norm": 0.9834596514701843, "learning_rate": 8.532600623793259e-06, "loss": 0.073, "step": 9880 }, { "epoch": 1.468884598247438, "grad_norm": 0.592193603515625, "learning_rate": 8.531115401752563e-06, "loss": 0.0619, "step": 9890 }, { "epoch": 1.470369820288133, "grad_norm": 0.8372761011123657, "learning_rate": 8.529630179711868e-06, "loss": 0.0832, "step": 9900 }, { "epoch": 1.4718550423288281, "grad_norm": 0.7133629322052002, "learning_rate": 8.528144957671172e-06, "loss": 0.0592, "step": 9910 }, { "epoch": 1.4733402643695233, "grad_norm": 0.4714134633541107, "learning_rate": 8.526659735630476e-06, "loss": 0.0669, "step": 9920 }, { "epoch": 1.4748254864102184, "grad_norm": 1.3863356113433838, "learning_rate": 8.525174513589783e-06, "loss": 0.067, "step": 9930 }, { "epoch": 1.4763107084509133, "grad_norm": 1.3102223873138428, "learning_rate": 8.523689291549087e-06, "loss": 0.0734, "step": 9940 }, { "epoch": 1.4777959304916086, "grad_norm": 1.2417453527450562, "learning_rate": 8.522204069508391e-06, "loss": 0.0668, "step": 9950 }, { "epoch": 1.4792811525323035, "grad_norm": 0.8425410389900208, "learning_rate": 8.520718847467698e-06, "loss": 0.0542, "step": 9960 }, { "epoch": 1.4807663745729986, "grad_norm": 0.3685753643512726, "learning_rate": 8.519233625427002e-06, "loss": 0.0675, "step": 9970 }, { "epoch": 1.4822515966136938, "grad_norm": 0.9921769499778748, "learning_rate": 8.517748403386306e-06, "loss": 0.0739, "step": 9980 }, { "epoch": 1.483736818654389, "grad_norm": 0.7862120270729065, "learning_rate": 8.516263181345613e-06, "loss": 0.051, "step": 9990 }, { "epoch": 1.485222040695084, "grad_norm": 0.8443083763122559, "learning_rate": 8.514777959304917e-06, "loss": 0.0634, "step": 10000 }, { "epoch": 1.486707262735779, "grad_norm": 0.7549251914024353, "learning_rate": 8.513292737264221e-06, "loss": 0.0729, "step": 10010 }, { "epoch": 1.488192484776474, "grad_norm": 0.8449190855026245, "learning_rate": 8.511807515223528e-06, "loss": 0.0838, "step": 10020 }, { "epoch": 1.4896777068171692, "grad_norm": 0.5930236577987671, "learning_rate": 8.51032229318283e-06, "loss": 0.0778, "step": 10030 }, { "epoch": 1.4911629288578643, "grad_norm": 1.0783277750015259, "learning_rate": 8.508837071142136e-06, "loss": 0.0951, "step": 10040 }, { "epoch": 1.4926481508985594, "grad_norm": 0.6660608053207397, "learning_rate": 8.507351849101443e-06, "loss": 0.0804, "step": 10050 }, { "epoch": 1.4941333729392543, "grad_norm": 0.7329056262969971, "learning_rate": 8.505866627060745e-06, "loss": 0.0658, "step": 10060 }, { "epoch": 1.4956185949799494, "grad_norm": 0.9868343472480774, "learning_rate": 8.504381405020051e-06, "loss": 0.0598, "step": 10070 }, { "epoch": 1.4971038170206445, "grad_norm": 0.851028323173523, "learning_rate": 8.502896182979356e-06, "loss": 0.0823, "step": 10080 }, { "epoch": 1.4985890390613397, "grad_norm": 0.9429115056991577, "learning_rate": 8.50141096093866e-06, "loss": 0.0762, "step": 10090 }, { "epoch": 1.5000742611020348, "grad_norm": 1.0672980546951294, "learning_rate": 8.499925738897966e-06, "loss": 0.1005, "step": 10100 }, { "epoch": 1.5015594831427297, "grad_norm": 0.5856247544288635, "learning_rate": 8.498440516857271e-06, "loss": 0.0566, "step": 10110 }, { "epoch": 1.503044705183425, "grad_norm": 0.8225235939025879, "learning_rate": 8.496955294816575e-06, "loss": 0.0627, "step": 10120 }, { "epoch": 1.50452992722412, "grad_norm": 0.6043829917907715, "learning_rate": 8.495470072775881e-06, "loss": 0.0823, "step": 10130 }, { "epoch": 1.506015149264815, "grad_norm": 1.159949541091919, "learning_rate": 8.493984850735186e-06, "loss": 0.0661, "step": 10140 }, { "epoch": 1.5075003713055102, "grad_norm": 0.4439176917076111, "learning_rate": 8.49249962869449e-06, "loss": 0.084, "step": 10150 }, { "epoch": 1.5089855933462053, "grad_norm": 0.9722158908843994, "learning_rate": 8.491014406653796e-06, "loss": 0.0814, "step": 10160 }, { "epoch": 1.5104708153869004, "grad_norm": 0.7936907410621643, "learning_rate": 8.4895291846131e-06, "loss": 0.0846, "step": 10170 }, { "epoch": 1.5119560374275953, "grad_norm": 1.1649501323699951, "learning_rate": 8.488043962572405e-06, "loss": 0.0571, "step": 10180 }, { "epoch": 1.5134412594682907, "grad_norm": 0.690048098564148, "learning_rate": 8.486558740531711e-06, "loss": 0.0976, "step": 10190 }, { "epoch": 1.5149264815089856, "grad_norm": 1.2330018281936646, "learning_rate": 8.485073518491014e-06, "loss": 0.0663, "step": 10200 }, { "epoch": 1.5164117035496807, "grad_norm": 0.9722781777381897, "learning_rate": 8.48358829645032e-06, "loss": 0.0618, "step": 10210 }, { "epoch": 1.5178969255903758, "grad_norm": 0.9569472074508667, "learning_rate": 8.482103074409625e-06, "loss": 0.0753, "step": 10220 }, { "epoch": 1.5193821476310707, "grad_norm": 0.5346283316612244, "learning_rate": 8.480617852368929e-06, "loss": 0.0699, "step": 10230 }, { "epoch": 1.520867369671766, "grad_norm": 0.6866028308868408, "learning_rate": 8.479132630328235e-06, "loss": 0.0764, "step": 10240 }, { "epoch": 1.522352591712461, "grad_norm": 1.042005181312561, "learning_rate": 8.47764740828754e-06, "loss": 0.0731, "step": 10250 }, { "epoch": 1.523837813753156, "grad_norm": 0.5577150583267212, "learning_rate": 8.476162186246844e-06, "loss": 0.0851, "step": 10260 }, { "epoch": 1.5253230357938512, "grad_norm": 0.5276694893836975, "learning_rate": 8.47467696420615e-06, "loss": 0.0534, "step": 10270 }, { "epoch": 1.526808257834546, "grad_norm": 0.6905069947242737, "learning_rate": 8.473191742165455e-06, "loss": 0.091, "step": 10280 }, { "epoch": 1.5282934798752414, "grad_norm": 0.3890121877193451, "learning_rate": 8.471706520124759e-06, "loss": 0.0794, "step": 10290 }, { "epoch": 1.5297787019159363, "grad_norm": 1.8997670412063599, "learning_rate": 8.470221298084065e-06, "loss": 0.0778, "step": 10300 }, { "epoch": 1.5312639239566315, "grad_norm": 1.0573625564575195, "learning_rate": 8.46873607604337e-06, "loss": 0.0758, "step": 10310 }, { "epoch": 1.5327491459973266, "grad_norm": 0.6973865628242493, "learning_rate": 8.467250854002674e-06, "loss": 0.0797, "step": 10320 }, { "epoch": 1.5342343680380217, "grad_norm": 1.8891311883926392, "learning_rate": 8.465765631961979e-06, "loss": 0.0709, "step": 10330 }, { "epoch": 1.5357195900787168, "grad_norm": 1.0354690551757812, "learning_rate": 8.464280409921285e-06, "loss": 0.0747, "step": 10340 }, { "epoch": 1.5372048121194117, "grad_norm": 0.9112366437911987, "learning_rate": 8.462795187880589e-06, "loss": 0.0794, "step": 10350 }, { "epoch": 1.538690034160107, "grad_norm": 1.0346076488494873, "learning_rate": 8.461309965839894e-06, "loss": 0.074, "step": 10360 }, { "epoch": 1.540175256200802, "grad_norm": 1.108900547027588, "learning_rate": 8.459824743799198e-06, "loss": 0.0797, "step": 10370 }, { "epoch": 1.541660478241497, "grad_norm": 0.5557714700698853, "learning_rate": 8.458339521758504e-06, "loss": 0.0792, "step": 10380 }, { "epoch": 1.5431457002821922, "grad_norm": 0.8850210309028625, "learning_rate": 8.456854299717809e-06, "loss": 0.061, "step": 10390 }, { "epoch": 1.5446309223228871, "grad_norm": 1.057712435722351, "learning_rate": 8.455369077677113e-06, "loss": 0.0889, "step": 10400 }, { "epoch": 1.5461161443635825, "grad_norm": 1.0709397792816162, "learning_rate": 8.453883855636419e-06, "loss": 0.0661, "step": 10410 }, { "epoch": 1.5476013664042774, "grad_norm": 1.3140729665756226, "learning_rate": 8.452398633595724e-06, "loss": 0.0694, "step": 10420 }, { "epoch": 1.5490865884449725, "grad_norm": 0.6008923053741455, "learning_rate": 8.450913411555028e-06, "loss": 0.0644, "step": 10430 }, { "epoch": 1.5505718104856676, "grad_norm": 0.8166418075561523, "learning_rate": 8.449428189514332e-06, "loss": 0.0616, "step": 10440 }, { "epoch": 1.5520570325263627, "grad_norm": 1.0653516054153442, "learning_rate": 8.447942967473639e-06, "loss": 0.0842, "step": 10450 }, { "epoch": 1.5535422545670579, "grad_norm": 1.1640278100967407, "learning_rate": 8.446457745432943e-06, "loss": 0.0669, "step": 10460 }, { "epoch": 1.5550274766077528, "grad_norm": 1.5584324598312378, "learning_rate": 8.444972523392247e-06, "loss": 0.0671, "step": 10470 }, { "epoch": 1.556512698648448, "grad_norm": 0.6926531195640564, "learning_rate": 8.443487301351554e-06, "loss": 0.0703, "step": 10480 }, { "epoch": 1.557997920689143, "grad_norm": 1.2360625267028809, "learning_rate": 8.442002079310858e-06, "loss": 0.0771, "step": 10490 }, { "epoch": 1.5594831427298381, "grad_norm": 1.1805577278137207, "learning_rate": 8.440516857270162e-06, "loss": 0.0665, "step": 10500 }, { "epoch": 1.5609683647705332, "grad_norm": 0.6443304419517517, "learning_rate": 8.439031635229467e-06, "loss": 0.0645, "step": 10510 }, { "epoch": 1.5624535868112281, "grad_norm": 0.92369145154953, "learning_rate": 8.437546413188771e-06, "loss": 0.0547, "step": 10520 }, { "epoch": 1.5639388088519235, "grad_norm": 0.5048189759254456, "learning_rate": 8.436061191148077e-06, "loss": 0.0737, "step": 10530 }, { "epoch": 1.5654240308926184, "grad_norm": 0.6624378561973572, "learning_rate": 8.434575969107382e-06, "loss": 0.0757, "step": 10540 }, { "epoch": 1.5669092529333135, "grad_norm": 0.5888051986694336, "learning_rate": 8.433090747066686e-06, "loss": 0.0664, "step": 10550 }, { "epoch": 1.5683944749740086, "grad_norm": 1.3048902750015259, "learning_rate": 8.431605525025992e-06, "loss": 0.071, "step": 10560 }, { "epoch": 1.5698796970147035, "grad_norm": 0.6339612007141113, "learning_rate": 8.430120302985297e-06, "loss": 0.0737, "step": 10570 }, { "epoch": 1.5713649190553989, "grad_norm": 0.9797409176826477, "learning_rate": 8.428635080944601e-06, "loss": 0.0743, "step": 10580 }, { "epoch": 1.5728501410960938, "grad_norm": 0.8972368240356445, "learning_rate": 8.427149858903907e-06, "loss": 0.0625, "step": 10590 }, { "epoch": 1.574335363136789, "grad_norm": 0.6709739565849304, "learning_rate": 8.425664636863212e-06, "loss": 0.0602, "step": 10600 }, { "epoch": 1.575820585177484, "grad_norm": 0.9978169202804565, "learning_rate": 8.424179414822516e-06, "loss": 0.0593, "step": 10610 }, { "epoch": 1.5773058072181791, "grad_norm": 1.1661869287490845, "learning_rate": 8.422694192781822e-06, "loss": 0.0692, "step": 10620 }, { "epoch": 1.5787910292588743, "grad_norm": 0.9158114194869995, "learning_rate": 8.421208970741127e-06, "loss": 0.0609, "step": 10630 }, { "epoch": 1.5802762512995692, "grad_norm": 0.8171471953392029, "learning_rate": 8.419723748700431e-06, "loss": 0.0645, "step": 10640 }, { "epoch": 1.5817614733402645, "grad_norm": 0.5352485179901123, "learning_rate": 8.418238526659737e-06, "loss": 0.0776, "step": 10650 }, { "epoch": 1.5832466953809594, "grad_norm": 0.9403135776519775, "learning_rate": 8.41675330461904e-06, "loss": 0.0865, "step": 10660 }, { "epoch": 1.5847319174216545, "grad_norm": 0.7482515573501587, "learning_rate": 8.415268082578346e-06, "loss": 0.056, "step": 10670 }, { "epoch": 1.5862171394623497, "grad_norm": 0.3600064814090729, "learning_rate": 8.41378286053765e-06, "loss": 0.0766, "step": 10680 }, { "epoch": 1.5877023615030446, "grad_norm": 1.0959473848342896, "learning_rate": 8.412297638496955e-06, "loss": 0.0699, "step": 10690 }, { "epoch": 1.58918758354374, "grad_norm": 0.479476660490036, "learning_rate": 8.410812416456261e-06, "loss": 0.0676, "step": 10700 }, { "epoch": 1.5906728055844348, "grad_norm": 1.0282557010650635, "learning_rate": 8.409327194415566e-06, "loss": 0.0993, "step": 10710 }, { "epoch": 1.59215802762513, "grad_norm": 0.6192666888237, "learning_rate": 8.40784197237487e-06, "loss": 0.0599, "step": 10720 }, { "epoch": 1.593643249665825, "grad_norm": 0.8534938097000122, "learning_rate": 8.406356750334176e-06, "loss": 0.0635, "step": 10730 }, { "epoch": 1.5951284717065202, "grad_norm": 1.2471647262573242, "learning_rate": 8.40487152829348e-06, "loss": 0.0793, "step": 10740 }, { "epoch": 1.5966136937472153, "grad_norm": 0.9096415638923645, "learning_rate": 8.403386306252785e-06, "loss": 0.0877, "step": 10750 }, { "epoch": 1.5980989157879102, "grad_norm": 0.5800266861915588, "learning_rate": 8.401901084212091e-06, "loss": 0.0669, "step": 10760 }, { "epoch": 1.5995841378286055, "grad_norm": 1.3576295375823975, "learning_rate": 8.400415862171396e-06, "loss": 0.0812, "step": 10770 }, { "epoch": 1.6010693598693004, "grad_norm": 1.076351523399353, "learning_rate": 8.3989306401307e-06, "loss": 0.0692, "step": 10780 }, { "epoch": 1.6025545819099956, "grad_norm": 1.3364313840866089, "learning_rate": 8.397445418090006e-06, "loss": 0.0735, "step": 10790 }, { "epoch": 1.6040398039506907, "grad_norm": 0.7803967595100403, "learning_rate": 8.395960196049309e-06, "loss": 0.0639, "step": 10800 }, { "epoch": 1.6055250259913856, "grad_norm": 0.8426043391227722, "learning_rate": 8.394474974008615e-06, "loss": 0.0795, "step": 10810 }, { "epoch": 1.607010248032081, "grad_norm": 0.7319867014884949, "learning_rate": 8.392989751967921e-06, "loss": 0.0739, "step": 10820 }, { "epoch": 1.6084954700727758, "grad_norm": 0.9259944558143616, "learning_rate": 8.391504529927224e-06, "loss": 0.0585, "step": 10830 }, { "epoch": 1.609980692113471, "grad_norm": 0.9031527042388916, "learning_rate": 8.39001930788653e-06, "loss": 0.0805, "step": 10840 }, { "epoch": 1.611465914154166, "grad_norm": 0.9274294972419739, "learning_rate": 8.388534085845835e-06, "loss": 0.0867, "step": 10850 }, { "epoch": 1.612951136194861, "grad_norm": 1.1121625900268555, "learning_rate": 8.387048863805139e-06, "loss": 0.0728, "step": 10860 }, { "epoch": 1.6144363582355563, "grad_norm": 1.631706714630127, "learning_rate": 8.385563641764445e-06, "loss": 0.0841, "step": 10870 }, { "epoch": 1.6159215802762512, "grad_norm": 0.4203210175037384, "learning_rate": 8.38407841972375e-06, "loss": 0.0599, "step": 10880 }, { "epoch": 1.6174068023169463, "grad_norm": 0.3640812039375305, "learning_rate": 8.382593197683054e-06, "loss": 0.0574, "step": 10890 }, { "epoch": 1.6188920243576415, "grad_norm": 1.4708698987960815, "learning_rate": 8.38110797564236e-06, "loss": 0.0679, "step": 10900 }, { "epoch": 1.6203772463983366, "grad_norm": 0.6763873100280762, "learning_rate": 8.379622753601664e-06, "loss": 0.0818, "step": 10910 }, { "epoch": 1.6218624684390317, "grad_norm": 0.653501033782959, "learning_rate": 8.378137531560969e-06, "loss": 0.0709, "step": 10920 }, { "epoch": 1.6233476904797266, "grad_norm": 1.1871000528335571, "learning_rate": 8.376652309520273e-06, "loss": 0.0776, "step": 10930 }, { "epoch": 1.624832912520422, "grad_norm": 1.2294886112213135, "learning_rate": 8.37516708747958e-06, "loss": 0.0795, "step": 10940 }, { "epoch": 1.6263181345611168, "grad_norm": 1.0966838598251343, "learning_rate": 8.373681865438884e-06, "loss": 0.0715, "step": 10950 }, { "epoch": 1.627803356601812, "grad_norm": 0.4268195331096649, "learning_rate": 8.372196643398188e-06, "loss": 0.0572, "step": 10960 }, { "epoch": 1.629288578642507, "grad_norm": 0.8819934129714966, "learning_rate": 8.370711421357493e-06, "loss": 0.0737, "step": 10970 }, { "epoch": 1.630773800683202, "grad_norm": 0.6157822012901306, "learning_rate": 8.369226199316799e-06, "loss": 0.0777, "step": 10980 }, { "epoch": 1.6322590227238973, "grad_norm": 1.0847469568252563, "learning_rate": 8.367740977276103e-06, "loss": 0.0871, "step": 10990 }, { "epoch": 1.6337442447645922, "grad_norm": 1.0257930755615234, "learning_rate": 8.366255755235408e-06, "loss": 0.0679, "step": 11000 }, { "epoch": 1.6352294668052874, "grad_norm": 0.9728336334228516, "learning_rate": 8.364770533194714e-06, "loss": 0.0565, "step": 11010 }, { "epoch": 1.6367146888459825, "grad_norm": 1.1196662187576294, "learning_rate": 8.363285311154018e-06, "loss": 0.0632, "step": 11020 }, { "epoch": 1.6381999108866776, "grad_norm": 0.7816924452781677, "learning_rate": 8.361800089113323e-06, "loss": 0.0894, "step": 11030 }, { "epoch": 1.6396851329273727, "grad_norm": 1.277018427848816, "learning_rate": 8.360314867072627e-06, "loss": 0.0642, "step": 11040 }, { "epoch": 1.6411703549680676, "grad_norm": 0.7299898862838745, "learning_rate": 8.358829645031933e-06, "loss": 0.0779, "step": 11050 }, { "epoch": 1.642655577008763, "grad_norm": 1.1374343633651733, "learning_rate": 8.357344422991238e-06, "loss": 0.062, "step": 11060 }, { "epoch": 1.6441407990494579, "grad_norm": 1.098451018333435, "learning_rate": 8.355859200950542e-06, "loss": 0.074, "step": 11070 }, { "epoch": 1.645626021090153, "grad_norm": 0.7910609245300293, "learning_rate": 8.354373978909848e-06, "loss": 0.0675, "step": 11080 }, { "epoch": 1.6471112431308481, "grad_norm": 0.676892876625061, "learning_rate": 8.352888756869153e-06, "loss": 0.0681, "step": 11090 }, { "epoch": 1.648596465171543, "grad_norm": 1.1646522283554077, "learning_rate": 8.351403534828457e-06, "loss": 0.0691, "step": 11100 }, { "epoch": 1.6500816872122384, "grad_norm": 1.6074599027633667, "learning_rate": 8.349918312787763e-06, "loss": 0.0718, "step": 11110 }, { "epoch": 1.6515669092529333, "grad_norm": 0.8327904939651489, "learning_rate": 8.348433090747068e-06, "loss": 0.0817, "step": 11120 }, { "epoch": 1.6530521312936284, "grad_norm": 0.8040626645088196, "learning_rate": 8.346947868706372e-06, "loss": 0.0774, "step": 11130 }, { "epoch": 1.6545373533343235, "grad_norm": 0.6327669620513916, "learning_rate": 8.345462646665677e-06, "loss": 0.0855, "step": 11140 }, { "epoch": 1.6560225753750184, "grad_norm": 1.1430256366729736, "learning_rate": 8.343977424624981e-06, "loss": 0.0673, "step": 11150 }, { "epoch": 1.6575077974157137, "grad_norm": 0.7978767156600952, "learning_rate": 8.342492202584287e-06, "loss": 0.0803, "step": 11160 }, { "epoch": 1.6589930194564086, "grad_norm": 0.7723683714866638, "learning_rate": 8.341006980543592e-06, "loss": 0.0841, "step": 11170 }, { "epoch": 1.6604782414971038, "grad_norm": 0.4924938380718231, "learning_rate": 8.339521758502896e-06, "loss": 0.0562, "step": 11180 }, { "epoch": 1.661963463537799, "grad_norm": 0.5342621207237244, "learning_rate": 8.338036536462202e-06, "loss": 0.0738, "step": 11190 }, { "epoch": 1.663448685578494, "grad_norm": 0.4038882851600647, "learning_rate": 8.336551314421507e-06, "loss": 0.0536, "step": 11200 }, { "epoch": 1.6649339076191891, "grad_norm": 1.0539206266403198, "learning_rate": 8.335066092380811e-06, "loss": 0.0806, "step": 11210 }, { "epoch": 1.666419129659884, "grad_norm": 0.8110040426254272, "learning_rate": 8.333580870340117e-06, "loss": 0.0718, "step": 11220 }, { "epoch": 1.6679043517005794, "grad_norm": 1.0522854328155518, "learning_rate": 8.332095648299422e-06, "loss": 0.0784, "step": 11230 }, { "epoch": 1.6693895737412743, "grad_norm": 1.0400513410568237, "learning_rate": 8.330610426258726e-06, "loss": 0.058, "step": 11240 }, { "epoch": 1.6708747957819694, "grad_norm": 0.8211175799369812, "learning_rate": 8.329125204218032e-06, "loss": 0.0707, "step": 11250 }, { "epoch": 1.6723600178226645, "grad_norm": 1.2590583562850952, "learning_rate": 8.327639982177335e-06, "loss": 0.0787, "step": 11260 }, { "epoch": 1.6738452398633594, "grad_norm": 1.1944266557693481, "learning_rate": 8.326154760136641e-06, "loss": 0.0774, "step": 11270 }, { "epoch": 1.6753304619040548, "grad_norm": 0.900720477104187, "learning_rate": 8.324669538095947e-06, "loss": 0.0661, "step": 11280 }, { "epoch": 1.6768156839447497, "grad_norm": 0.4314388036727905, "learning_rate": 8.32318431605525e-06, "loss": 0.0628, "step": 11290 }, { "epoch": 1.6783009059854448, "grad_norm": 0.7443414330482483, "learning_rate": 8.321699094014556e-06, "loss": 0.0806, "step": 11300 }, { "epoch": 1.67978612802614, "grad_norm": 0.8457760214805603, "learning_rate": 8.32021387197386e-06, "loss": 0.0639, "step": 11310 }, { "epoch": 1.681271350066835, "grad_norm": 1.0876080989837646, "learning_rate": 8.318728649933165e-06, "loss": 0.0857, "step": 11320 }, { "epoch": 1.6827565721075302, "grad_norm": 0.6526848077774048, "learning_rate": 8.317243427892471e-06, "loss": 0.0603, "step": 11330 }, { "epoch": 1.684241794148225, "grad_norm": 1.3263145685195923, "learning_rate": 8.315758205851775e-06, "loss": 0.0819, "step": 11340 }, { "epoch": 1.6857270161889204, "grad_norm": 0.781700611114502, "learning_rate": 8.31427298381108e-06, "loss": 0.0758, "step": 11350 }, { "epoch": 1.6872122382296153, "grad_norm": 0.9908173680305481, "learning_rate": 8.312787761770386e-06, "loss": 0.0681, "step": 11360 }, { "epoch": 1.6886974602703104, "grad_norm": 1.1831738948822021, "learning_rate": 8.31130253972969e-06, "loss": 0.0985, "step": 11370 }, { "epoch": 1.6901826823110055, "grad_norm": 0.9816341400146484, "learning_rate": 8.309817317688995e-06, "loss": 0.0777, "step": 11380 }, { "epoch": 1.6916679043517004, "grad_norm": 0.4548829197883606, "learning_rate": 8.308332095648301e-06, "loss": 0.0643, "step": 11390 }, { "epoch": 1.6931531263923958, "grad_norm": 0.7465964555740356, "learning_rate": 8.306846873607605e-06, "loss": 0.0554, "step": 11400 }, { "epoch": 1.6946383484330907, "grad_norm": 0.7259760499000549, "learning_rate": 8.30536165156691e-06, "loss": 0.0694, "step": 11410 }, { "epoch": 1.6961235704737858, "grad_norm": 1.0586191415786743, "learning_rate": 8.303876429526216e-06, "loss": 0.0796, "step": 11420 }, { "epoch": 1.697608792514481, "grad_norm": 1.0120733976364136, "learning_rate": 8.302391207485519e-06, "loss": 0.0755, "step": 11430 }, { "epoch": 1.6990940145551758, "grad_norm": 0.8327755928039551, "learning_rate": 8.300905985444825e-06, "loss": 0.0774, "step": 11440 }, { "epoch": 1.7005792365958712, "grad_norm": 0.8092065453529358, "learning_rate": 8.29942076340413e-06, "loss": 0.0659, "step": 11450 }, { "epoch": 1.702064458636566, "grad_norm": 0.67973792552948, "learning_rate": 8.297935541363434e-06, "loss": 0.081, "step": 11460 }, { "epoch": 1.7035496806772612, "grad_norm": 0.8815967440605164, "learning_rate": 8.29645031932274e-06, "loss": 0.0709, "step": 11470 }, { "epoch": 1.7050349027179563, "grad_norm": 0.5321938395500183, "learning_rate": 8.294965097282044e-06, "loss": 0.0701, "step": 11480 }, { "epoch": 1.7065201247586514, "grad_norm": 0.9086109399795532, "learning_rate": 8.293479875241349e-06, "loss": 0.0784, "step": 11490 }, { "epoch": 1.7080053467993466, "grad_norm": 0.6548341512680054, "learning_rate": 8.291994653200655e-06, "loss": 0.0796, "step": 11500 }, { "epoch": 1.7094905688400415, "grad_norm": 0.8495252728462219, "learning_rate": 8.29050943115996e-06, "loss": 0.0722, "step": 11510 }, { "epoch": 1.7109757908807368, "grad_norm": 0.9425396919250488, "learning_rate": 8.289024209119264e-06, "loss": 0.0788, "step": 11520 }, { "epoch": 1.7124610129214317, "grad_norm": 0.9021424055099487, "learning_rate": 8.28753898707857e-06, "loss": 0.0528, "step": 11530 }, { "epoch": 1.7139462349621268, "grad_norm": 0.8903309106826782, "learning_rate": 8.286053765037874e-06, "loss": 0.089, "step": 11540 }, { "epoch": 1.715431457002822, "grad_norm": 0.8740152716636658, "learning_rate": 8.284568542997179e-06, "loss": 0.0725, "step": 11550 }, { "epoch": 1.7169166790435169, "grad_norm": 0.9269596934318542, "learning_rate": 8.283083320956483e-06, "loss": 0.0617, "step": 11560 }, { "epoch": 1.7184019010842122, "grad_norm": 0.9568021893501282, "learning_rate": 8.28159809891579e-06, "loss": 0.079, "step": 11570 }, { "epoch": 1.719887123124907, "grad_norm": 0.48596277832984924, "learning_rate": 8.280112876875094e-06, "loss": 0.0717, "step": 11580 }, { "epoch": 1.7213723451656022, "grad_norm": 0.8595500588417053, "learning_rate": 8.278627654834398e-06, "loss": 0.0821, "step": 11590 }, { "epoch": 1.7228575672062973, "grad_norm": 1.2430893182754517, "learning_rate": 8.277142432793703e-06, "loss": 0.0834, "step": 11600 }, { "epoch": 1.7243427892469925, "grad_norm": 1.033395528793335, "learning_rate": 8.275657210753009e-06, "loss": 0.0747, "step": 11610 }, { "epoch": 1.7258280112876876, "grad_norm": 1.0437674522399902, "learning_rate": 8.274171988712313e-06, "loss": 0.0724, "step": 11620 }, { "epoch": 1.7273132333283825, "grad_norm": 0.4802999496459961, "learning_rate": 8.272686766671618e-06, "loss": 0.0665, "step": 11630 }, { "epoch": 1.7287984553690778, "grad_norm": 0.8245465159416199, "learning_rate": 8.271201544630924e-06, "loss": 0.0748, "step": 11640 }, { "epoch": 1.7302836774097727, "grad_norm": 1.0788676738739014, "learning_rate": 8.269716322590228e-06, "loss": 0.0703, "step": 11650 }, { "epoch": 1.7317688994504679, "grad_norm": 0.7828205227851868, "learning_rate": 8.268231100549533e-06, "loss": 0.087, "step": 11660 }, { "epoch": 1.733254121491163, "grad_norm": 0.7612335085868835, "learning_rate": 8.266745878508837e-06, "loss": 0.0559, "step": 11670 }, { "epoch": 1.7347393435318579, "grad_norm": 1.6535578966140747, "learning_rate": 8.265260656468143e-06, "loss": 0.098, "step": 11680 }, { "epoch": 1.7362245655725532, "grad_norm": 1.2606977224349976, "learning_rate": 8.263775434427448e-06, "loss": 0.0772, "step": 11690 }, { "epoch": 1.7377097876132481, "grad_norm": 0.5617197155952454, "learning_rate": 8.262290212386752e-06, "loss": 0.0788, "step": 11700 }, { "epoch": 1.7391950096539432, "grad_norm": 0.8515493869781494, "learning_rate": 8.260804990346058e-06, "loss": 0.0857, "step": 11710 }, { "epoch": 1.7406802316946384, "grad_norm": 0.7699450254440308, "learning_rate": 8.259319768305363e-06, "loss": 0.0731, "step": 11720 }, { "epoch": 1.7421654537353333, "grad_norm": 0.876996636390686, "learning_rate": 8.257834546264667e-06, "loss": 0.072, "step": 11730 }, { "epoch": 1.7436506757760286, "grad_norm": 0.9236237406730652, "learning_rate": 8.256349324223973e-06, "loss": 0.0795, "step": 11740 }, { "epoch": 1.7451358978167235, "grad_norm": 0.9495792388916016, "learning_rate": 8.254864102183278e-06, "loss": 0.0971, "step": 11750 }, { "epoch": 1.7466211198574186, "grad_norm": 1.4671180248260498, "learning_rate": 8.253378880142582e-06, "loss": 0.0752, "step": 11760 }, { "epoch": 1.7481063418981138, "grad_norm": 0.5850898027420044, "learning_rate": 8.251893658101886e-06, "loss": 0.0797, "step": 11770 }, { "epoch": 1.7495915639388089, "grad_norm": 0.6802064180374146, "learning_rate": 8.250408436061191e-06, "loss": 0.0686, "step": 11780 }, { "epoch": 1.751076785979504, "grad_norm": 1.377536654472351, "learning_rate": 8.248923214020497e-06, "loss": 0.0803, "step": 11790 }, { "epoch": 1.752562008020199, "grad_norm": 0.8861923217773438, "learning_rate": 8.247437991979801e-06, "loss": 0.0711, "step": 11800 }, { "epoch": 1.7540472300608942, "grad_norm": 0.9996696710586548, "learning_rate": 8.245952769939106e-06, "loss": 0.0676, "step": 11810 }, { "epoch": 1.7555324521015891, "grad_norm": 0.7519062757492065, "learning_rate": 8.244467547898412e-06, "loss": 0.0931, "step": 11820 }, { "epoch": 1.7570176741422843, "grad_norm": 0.7617473006248474, "learning_rate": 8.242982325857716e-06, "loss": 0.1047, "step": 11830 }, { "epoch": 1.7585028961829794, "grad_norm": 0.5361869931221008, "learning_rate": 8.24149710381702e-06, "loss": 0.0704, "step": 11840 }, { "epoch": 1.7599881182236743, "grad_norm": 1.0671625137329102, "learning_rate": 8.240011881776327e-06, "loss": 0.0721, "step": 11850 }, { "epoch": 1.7614733402643696, "grad_norm": 1.3257787227630615, "learning_rate": 8.238526659735631e-06, "loss": 0.0787, "step": 11860 }, { "epoch": 1.7629585623050645, "grad_norm": 0.47311872243881226, "learning_rate": 8.237041437694936e-06, "loss": 0.0844, "step": 11870 }, { "epoch": 1.7644437843457597, "grad_norm": 0.9832594990730286, "learning_rate": 8.235556215654242e-06, "loss": 0.0589, "step": 11880 }, { "epoch": 1.7659290063864548, "grad_norm": 1.4535409212112427, "learning_rate": 8.234070993613545e-06, "loss": 0.0787, "step": 11890 }, { "epoch": 1.76741422842715, "grad_norm": 0.5096237063407898, "learning_rate": 8.23258577157285e-06, "loss": 0.0668, "step": 11900 }, { "epoch": 1.768899450467845, "grad_norm": 0.4844669699668884, "learning_rate": 8.231100549532155e-06, "loss": 0.064, "step": 11910 }, { "epoch": 1.77038467250854, "grad_norm": 0.989778995513916, "learning_rate": 8.22961532749146e-06, "loss": 0.0832, "step": 11920 }, { "epoch": 1.7718698945492353, "grad_norm": 0.5861250758171082, "learning_rate": 8.228130105450766e-06, "loss": 0.0792, "step": 11930 }, { "epoch": 1.7733551165899302, "grad_norm": 0.930970311164856, "learning_rate": 8.22664488341007e-06, "loss": 0.0809, "step": 11940 }, { "epoch": 1.7748403386306253, "grad_norm": 1.105394959449768, "learning_rate": 8.225159661369375e-06, "loss": 0.0817, "step": 11950 }, { "epoch": 1.7763255606713204, "grad_norm": 0.5445694923400879, "learning_rate": 8.22367443932868e-06, "loss": 0.0833, "step": 11960 }, { "epoch": 1.7778107827120153, "grad_norm": 0.954166829586029, "learning_rate": 8.222189217287985e-06, "loss": 0.0633, "step": 11970 }, { "epoch": 1.7792960047527107, "grad_norm": 1.1215667724609375, "learning_rate": 8.22070399524729e-06, "loss": 0.0576, "step": 11980 }, { "epoch": 1.7807812267934056, "grad_norm": 1.0657153129577637, "learning_rate": 8.219218773206596e-06, "loss": 0.052, "step": 11990 }, { "epoch": 1.7822664488341007, "grad_norm": 0.8490333557128906, "learning_rate": 8.2177335511659e-06, "loss": 0.056, "step": 12000 }, { "epoch": 1.7837516708747958, "grad_norm": 0.7806589007377625, "learning_rate": 8.216248329125205e-06, "loss": 0.0554, "step": 12010 }, { "epoch": 1.7852368929154907, "grad_norm": 0.9815047979354858, "learning_rate": 8.21476310708451e-06, "loss": 0.0868, "step": 12020 }, { "epoch": 1.786722114956186, "grad_norm": 1.7111990451812744, "learning_rate": 8.213277885043815e-06, "loss": 0.0767, "step": 12030 }, { "epoch": 1.788207336996881, "grad_norm": 0.5546345710754395, "learning_rate": 8.21179266300312e-06, "loss": 0.074, "step": 12040 }, { "epoch": 1.789692559037576, "grad_norm": 0.6579710245132446, "learning_rate": 8.210307440962426e-06, "loss": 0.0711, "step": 12050 }, { "epoch": 1.7911777810782712, "grad_norm": 0.24335908889770508, "learning_rate": 8.208822218921729e-06, "loss": 0.0714, "step": 12060 }, { "epoch": 1.7926630031189663, "grad_norm": 1.7280948162078857, "learning_rate": 8.207336996881035e-06, "loss": 0.062, "step": 12070 }, { "epoch": 1.7941482251596614, "grad_norm": 0.5008382797241211, "learning_rate": 8.205851774840339e-06, "loss": 0.0704, "step": 12080 }, { "epoch": 1.7956334472003563, "grad_norm": 1.4439283609390259, "learning_rate": 8.204366552799644e-06, "loss": 0.0912, "step": 12090 }, { "epoch": 1.7971186692410517, "grad_norm": 0.7069539427757263, "learning_rate": 8.20288133075895e-06, "loss": 0.0556, "step": 12100 }, { "epoch": 1.7986038912817466, "grad_norm": 0.9885798692703247, "learning_rate": 8.201396108718254e-06, "loss": 0.0631, "step": 12110 }, { "epoch": 1.8000891133224417, "grad_norm": 0.8661073446273804, "learning_rate": 8.199910886677559e-06, "loss": 0.0687, "step": 12120 }, { "epoch": 1.8015743353631368, "grad_norm": 1.235711693763733, "learning_rate": 8.198425664636865e-06, "loss": 0.0695, "step": 12130 }, { "epoch": 1.8030595574038317, "grad_norm": 0.7834354639053345, "learning_rate": 8.196940442596169e-06, "loss": 0.0845, "step": 12140 }, { "epoch": 1.804544779444527, "grad_norm": 0.5605319142341614, "learning_rate": 8.195455220555474e-06, "loss": 0.0648, "step": 12150 }, { "epoch": 1.806030001485222, "grad_norm": 1.1360461711883545, "learning_rate": 8.19396999851478e-06, "loss": 0.0792, "step": 12160 }, { "epoch": 1.807515223525917, "grad_norm": 0.9927794337272644, "learning_rate": 8.192484776474084e-06, "loss": 0.0811, "step": 12170 }, { "epoch": 1.8090004455666122, "grad_norm": 0.9647291898727417, "learning_rate": 8.190999554433389e-06, "loss": 0.0767, "step": 12180 }, { "epoch": 1.8104856676073073, "grad_norm": 0.6713846325874329, "learning_rate": 8.189514332392693e-06, "loss": 0.0601, "step": 12190 }, { "epoch": 1.8119708896480025, "grad_norm": 0.9489417672157288, "learning_rate": 8.188029110351997e-06, "loss": 0.0781, "step": 12200 }, { "epoch": 1.8134561116886974, "grad_norm": 0.9353389739990234, "learning_rate": 8.186543888311304e-06, "loss": 0.0701, "step": 12210 }, { "epoch": 1.8149413337293927, "grad_norm": 1.8731766939163208, "learning_rate": 8.185058666270608e-06, "loss": 0.0798, "step": 12220 }, { "epoch": 1.8164265557700876, "grad_norm": 0.8568295240402222, "learning_rate": 8.183573444229912e-06, "loss": 0.0682, "step": 12230 }, { "epoch": 1.8179117778107827, "grad_norm": 0.7391093373298645, "learning_rate": 8.182088222189219e-06, "loss": 0.0693, "step": 12240 }, { "epoch": 1.8193969998514778, "grad_norm": 0.7887309193611145, "learning_rate": 8.180603000148523e-06, "loss": 0.075, "step": 12250 }, { "epoch": 1.8208822218921727, "grad_norm": 0.4091874659061432, "learning_rate": 8.179117778107827e-06, "loss": 0.0619, "step": 12260 }, { "epoch": 1.822367443932868, "grad_norm": 0.642400860786438, "learning_rate": 8.177632556067134e-06, "loss": 0.0799, "step": 12270 }, { "epoch": 1.823852665973563, "grad_norm": 0.6852913498878479, "learning_rate": 8.176147334026438e-06, "loss": 0.0962, "step": 12280 }, { "epoch": 1.8253378880142581, "grad_norm": 0.8183869123458862, "learning_rate": 8.174662111985742e-06, "loss": 0.059, "step": 12290 }, { "epoch": 1.8268231100549532, "grad_norm": 0.7981597781181335, "learning_rate": 8.173176889945047e-06, "loss": 0.0824, "step": 12300 }, { "epoch": 1.8283083320956481, "grad_norm": 0.4024955928325653, "learning_rate": 8.171691667904353e-06, "loss": 0.0895, "step": 12310 }, { "epoch": 1.8297935541363435, "grad_norm": 1.4834824800491333, "learning_rate": 8.170206445863657e-06, "loss": 0.0766, "step": 12320 }, { "epoch": 1.8312787761770384, "grad_norm": 1.0925889015197754, "learning_rate": 8.168721223822962e-06, "loss": 0.0823, "step": 12330 }, { "epoch": 1.8327639982177335, "grad_norm": 0.5679433345794678, "learning_rate": 8.167236001782268e-06, "loss": 0.0712, "step": 12340 }, { "epoch": 1.8342492202584286, "grad_norm": 0.6262628436088562, "learning_rate": 8.165750779741572e-06, "loss": 0.0834, "step": 12350 }, { "epoch": 1.8357344422991237, "grad_norm": 0.903304934501648, "learning_rate": 8.164265557700877e-06, "loss": 0.0757, "step": 12360 }, { "epoch": 1.8372196643398189, "grad_norm": 1.2422025203704834, "learning_rate": 8.162780335660181e-06, "loss": 0.0547, "step": 12370 }, { "epoch": 1.8387048863805138, "grad_norm": 0.7186129093170166, "learning_rate": 8.161295113619486e-06, "loss": 0.0574, "step": 12380 }, { "epoch": 1.8401901084212091, "grad_norm": 0.728678286075592, "learning_rate": 8.159809891578792e-06, "loss": 0.0754, "step": 12390 }, { "epoch": 1.841675330461904, "grad_norm": 1.3359917402267456, "learning_rate": 8.158324669538096e-06, "loss": 0.0937, "step": 12400 }, { "epoch": 1.8431605525025991, "grad_norm": 0.9950870871543884, "learning_rate": 8.1568394474974e-06, "loss": 0.0708, "step": 12410 }, { "epoch": 1.8446457745432943, "grad_norm": 0.5623972415924072, "learning_rate": 8.155354225456707e-06, "loss": 0.0796, "step": 12420 }, { "epoch": 1.8461309965839892, "grad_norm": 0.8156291246414185, "learning_rate": 8.153869003416011e-06, "loss": 0.0818, "step": 12430 }, { "epoch": 1.8476162186246845, "grad_norm": 1.0178585052490234, "learning_rate": 8.152383781375316e-06, "loss": 0.0717, "step": 12440 }, { "epoch": 1.8491014406653794, "grad_norm": 0.8862835168838501, "learning_rate": 8.150898559334622e-06, "loss": 0.0639, "step": 12450 }, { "epoch": 1.8505866627060745, "grad_norm": 1.0448565483093262, "learning_rate": 8.149413337293926e-06, "loss": 0.0868, "step": 12460 }, { "epoch": 1.8520718847467696, "grad_norm": 1.3027526140213013, "learning_rate": 8.14792811525323e-06, "loss": 0.0671, "step": 12470 }, { "epoch": 1.8535571067874648, "grad_norm": 0.7499740123748779, "learning_rate": 8.146442893212537e-06, "loss": 0.0749, "step": 12480 }, { "epoch": 1.85504232882816, "grad_norm": 1.153708577156067, "learning_rate": 8.14495767117184e-06, "loss": 0.0722, "step": 12490 }, { "epoch": 1.8565275508688548, "grad_norm": 0.627673864364624, "learning_rate": 8.143472449131146e-06, "loss": 0.0502, "step": 12500 }, { "epoch": 1.8580127729095501, "grad_norm": 1.2380660772323608, "learning_rate": 8.141987227090452e-06, "loss": 0.0751, "step": 12510 }, { "epoch": 1.859497994950245, "grad_norm": 0.9718501567840576, "learning_rate": 8.140502005049755e-06, "loss": 0.0813, "step": 12520 }, { "epoch": 1.8609832169909402, "grad_norm": 0.7296931743621826, "learning_rate": 8.13901678300906e-06, "loss": 0.0771, "step": 12530 }, { "epoch": 1.8624684390316353, "grad_norm": 0.6836357712745667, "learning_rate": 8.137531560968365e-06, "loss": 0.0657, "step": 12540 }, { "epoch": 1.8639536610723302, "grad_norm": 0.8493332266807556, "learning_rate": 8.13604633892767e-06, "loss": 0.0699, "step": 12550 }, { "epoch": 1.8654388831130255, "grad_norm": 0.5506488084793091, "learning_rate": 8.134561116886976e-06, "loss": 0.0656, "step": 12560 }, { "epoch": 1.8669241051537204, "grad_norm": 1.1473393440246582, "learning_rate": 8.13307589484628e-06, "loss": 0.0688, "step": 12570 }, { "epoch": 1.8684093271944155, "grad_norm": 0.6129547357559204, "learning_rate": 8.131590672805584e-06, "loss": 0.0615, "step": 12580 }, { "epoch": 1.8698945492351107, "grad_norm": 0.6691162586212158, "learning_rate": 8.13010545076489e-06, "loss": 0.0637, "step": 12590 }, { "epoch": 1.8713797712758056, "grad_norm": 1.2546964883804321, "learning_rate": 8.128620228724195e-06, "loss": 0.0796, "step": 12600 }, { "epoch": 1.872864993316501, "grad_norm": 0.7441532611846924, "learning_rate": 8.1271350066835e-06, "loss": 0.0818, "step": 12610 }, { "epoch": 1.8743502153571958, "grad_norm": 1.3013454675674438, "learning_rate": 8.125649784642806e-06, "loss": 0.0689, "step": 12620 }, { "epoch": 1.875835437397891, "grad_norm": 0.7669069170951843, "learning_rate": 8.12416456260211e-06, "loss": 0.0647, "step": 12630 }, { "epoch": 1.877320659438586, "grad_norm": 1.1804280281066895, "learning_rate": 8.122679340561414e-06, "loss": 0.0915, "step": 12640 }, { "epoch": 1.8788058814792812, "grad_norm": 1.0035256147384644, "learning_rate": 8.12119411852072e-06, "loss": 0.0844, "step": 12650 }, { "epoch": 1.8802911035199763, "grad_norm": 0.7921715974807739, "learning_rate": 8.119708896480023e-06, "loss": 0.0752, "step": 12660 }, { "epoch": 1.8817763255606712, "grad_norm": 0.3802033066749573, "learning_rate": 8.11822367443933e-06, "loss": 0.0712, "step": 12670 }, { "epoch": 1.8832615476013665, "grad_norm": 0.45273879170417786, "learning_rate": 8.116738452398636e-06, "loss": 0.0694, "step": 12680 }, { "epoch": 1.8847467696420614, "grad_norm": 0.9187343716621399, "learning_rate": 8.115253230357938e-06, "loss": 0.0643, "step": 12690 }, { "epoch": 1.8862319916827566, "grad_norm": 0.7383594512939453, "learning_rate": 8.113768008317244e-06, "loss": 0.067, "step": 12700 }, { "epoch": 1.8877172137234517, "grad_norm": 1.0016905069351196, "learning_rate": 8.112282786276549e-06, "loss": 0.0725, "step": 12710 }, { "epoch": 1.8892024357641466, "grad_norm": 0.43434959650039673, "learning_rate": 8.110797564235853e-06, "loss": 0.0708, "step": 12720 }, { "epoch": 1.890687657804842, "grad_norm": 0.5876697897911072, "learning_rate": 8.10931234219516e-06, "loss": 0.0771, "step": 12730 }, { "epoch": 1.8921728798455368, "grad_norm": 0.7081173062324524, "learning_rate": 8.107827120154464e-06, "loss": 0.0798, "step": 12740 }, { "epoch": 1.893658101886232, "grad_norm": 0.8173850774765015, "learning_rate": 8.106341898113768e-06, "loss": 0.0733, "step": 12750 }, { "epoch": 1.895143323926927, "grad_norm": 1.0225803852081299, "learning_rate": 8.104856676073074e-06, "loss": 0.0657, "step": 12760 }, { "epoch": 1.8966285459676222, "grad_norm": 0.45076829195022583, "learning_rate": 8.103371454032379e-06, "loss": 0.0643, "step": 12770 }, { "epoch": 1.8981137680083173, "grad_norm": 0.8104088306427002, "learning_rate": 8.101886231991683e-06, "loss": 0.069, "step": 12780 }, { "epoch": 1.8995989900490122, "grad_norm": 0.32960525155067444, "learning_rate": 8.10040100995099e-06, "loss": 0.0722, "step": 12790 }, { "epoch": 1.9010842120897076, "grad_norm": 0.983810544013977, "learning_rate": 8.098915787910294e-06, "loss": 0.0593, "step": 12800 }, { "epoch": 1.9025694341304025, "grad_norm": 0.5095059871673584, "learning_rate": 8.097430565869598e-06, "loss": 0.0692, "step": 12810 }, { "epoch": 1.9040546561710976, "grad_norm": 0.8406579494476318, "learning_rate": 8.095945343828903e-06, "loss": 0.0691, "step": 12820 }, { "epoch": 1.9055398782117927, "grad_norm": 0.5694898962974548, "learning_rate": 8.094460121788207e-06, "loss": 0.0822, "step": 12830 }, { "epoch": 1.9070251002524876, "grad_norm": 0.5778987407684326, "learning_rate": 8.092974899747513e-06, "loss": 0.0726, "step": 12840 }, { "epoch": 1.908510322293183, "grad_norm": 0.5937968492507935, "learning_rate": 8.091489677706818e-06, "loss": 0.0759, "step": 12850 }, { "epoch": 1.9099955443338779, "grad_norm": 0.6318387389183044, "learning_rate": 8.090004455666122e-06, "loss": 0.0771, "step": 12860 }, { "epoch": 1.911480766374573, "grad_norm": 0.5258229374885559, "learning_rate": 8.088519233625428e-06, "loss": 0.0674, "step": 12870 }, { "epoch": 1.912965988415268, "grad_norm": 0.7556629776954651, "learning_rate": 8.087034011584733e-06, "loss": 0.074, "step": 12880 }, { "epoch": 1.914451210455963, "grad_norm": 0.7217100262641907, "learning_rate": 8.085548789544037e-06, "loss": 0.0578, "step": 12890 }, { "epoch": 1.9159364324966583, "grad_norm": 0.6837291717529297, "learning_rate": 8.084063567503342e-06, "loss": 0.0592, "step": 12900 }, { "epoch": 1.9174216545373532, "grad_norm": 0.6772293448448181, "learning_rate": 8.082578345462648e-06, "loss": 0.0754, "step": 12910 }, { "epoch": 1.9189068765780484, "grad_norm": 0.7865071892738342, "learning_rate": 8.081093123421952e-06, "loss": 0.0777, "step": 12920 }, { "epoch": 1.9203920986187435, "grad_norm": 0.5671284198760986, "learning_rate": 8.079607901381257e-06, "loss": 0.0815, "step": 12930 }, { "epoch": 1.9218773206594386, "grad_norm": 0.7648490071296692, "learning_rate": 8.078122679340563e-06, "loss": 0.0585, "step": 12940 }, { "epoch": 1.9233625427001337, "grad_norm": 0.4957009255886078, "learning_rate": 8.076637457299867e-06, "loss": 0.0508, "step": 12950 }, { "epoch": 1.9248477647408286, "grad_norm": 0.8752880692481995, "learning_rate": 8.075152235259172e-06, "loss": 0.0883, "step": 12960 }, { "epoch": 1.926332986781524, "grad_norm": 0.690645158290863, "learning_rate": 8.073667013218478e-06, "loss": 0.0591, "step": 12970 }, { "epoch": 1.9278182088222189, "grad_norm": 0.7108989357948303, "learning_rate": 8.072181791177782e-06, "loss": 0.0527, "step": 12980 }, { "epoch": 1.929303430862914, "grad_norm": 0.6949102282524109, "learning_rate": 8.070696569137087e-06, "loss": 0.0705, "step": 12990 }, { "epoch": 1.9307886529036091, "grad_norm": 1.1313326358795166, "learning_rate": 8.069211347096391e-06, "loss": 0.0627, "step": 13000 }, { "epoch": 1.932273874944304, "grad_norm": 0.7514609098434448, "learning_rate": 8.067726125055695e-06, "loss": 0.0607, "step": 13010 }, { "epoch": 1.9337590969849994, "grad_norm": 0.749814510345459, "learning_rate": 8.066240903015002e-06, "loss": 0.0598, "step": 13020 }, { "epoch": 1.9352443190256943, "grad_norm": 1.5375010967254639, "learning_rate": 8.064755680974306e-06, "loss": 0.0777, "step": 13030 }, { "epoch": 1.9367295410663894, "grad_norm": 1.0587717294692993, "learning_rate": 8.06327045893361e-06, "loss": 0.0808, "step": 13040 }, { "epoch": 1.9382147631070845, "grad_norm": 0.8014340400695801, "learning_rate": 8.061785236892917e-06, "loss": 0.0889, "step": 13050 }, { "epoch": 1.9396999851477796, "grad_norm": 0.5191594958305359, "learning_rate": 8.060300014852221e-06, "loss": 0.076, "step": 13060 }, { "epoch": 1.9411852071884748, "grad_norm": 0.6541637778282166, "learning_rate": 8.058814792811525e-06, "loss": 0.0701, "step": 13070 }, { "epoch": 1.9426704292291697, "grad_norm": 0.8594491481781006, "learning_rate": 8.057329570770832e-06, "loss": 0.0696, "step": 13080 }, { "epoch": 1.944155651269865, "grad_norm": 0.7585026025772095, "learning_rate": 8.055844348730136e-06, "loss": 0.0777, "step": 13090 }, { "epoch": 1.94564087331056, "grad_norm": 0.4566737711429596, "learning_rate": 8.05435912668944e-06, "loss": 0.0722, "step": 13100 }, { "epoch": 1.947126095351255, "grad_norm": 1.1220885515213013, "learning_rate": 8.052873904648747e-06, "loss": 0.0868, "step": 13110 }, { "epoch": 1.9486113173919501, "grad_norm": 0.7873674631118774, "learning_rate": 8.05138868260805e-06, "loss": 0.0651, "step": 13120 }, { "epoch": 1.950096539432645, "grad_norm": 0.9283306002616882, "learning_rate": 8.049903460567355e-06, "loss": 0.0732, "step": 13130 }, { "epoch": 1.9515817614733404, "grad_norm": 1.4717298746109009, "learning_rate": 8.04841823852666e-06, "loss": 0.0703, "step": 13140 }, { "epoch": 1.9530669835140353, "grad_norm": 1.004787564277649, "learning_rate": 8.046933016485964e-06, "loss": 0.0723, "step": 13150 }, { "epoch": 1.9545522055547304, "grad_norm": 0.5175033211708069, "learning_rate": 8.04544779444527e-06, "loss": 0.081, "step": 13160 }, { "epoch": 1.9560374275954255, "grad_norm": 0.9765888452529907, "learning_rate": 8.043962572404575e-06, "loss": 0.0685, "step": 13170 }, { "epoch": 1.9575226496361204, "grad_norm": 1.0930464267730713, "learning_rate": 8.04247735036388e-06, "loss": 0.0801, "step": 13180 }, { "epoch": 1.9590078716768158, "grad_norm": 0.6813475489616394, "learning_rate": 8.040992128323185e-06, "loss": 0.061, "step": 13190 }, { "epoch": 1.9604930937175107, "grad_norm": 0.9491771459579468, "learning_rate": 8.03950690628249e-06, "loss": 0.0682, "step": 13200 }, { "epoch": 1.9619783157582058, "grad_norm": 0.9751015305519104, "learning_rate": 8.038021684241794e-06, "loss": 0.07, "step": 13210 }, { "epoch": 1.963463537798901, "grad_norm": 0.6187069416046143, "learning_rate": 8.0365364622011e-06, "loss": 0.067, "step": 13220 }, { "epoch": 1.964948759839596, "grad_norm": 0.6812853217124939, "learning_rate": 8.035051240160405e-06, "loss": 0.0613, "step": 13230 }, { "epoch": 1.9664339818802912, "grad_norm": 0.4832938611507416, "learning_rate": 8.03356601811971e-06, "loss": 0.0661, "step": 13240 }, { "epoch": 1.967919203920986, "grad_norm": 0.44773080945014954, "learning_rate": 8.032080796079015e-06, "loss": 0.0617, "step": 13250 }, { "epoch": 1.9694044259616814, "grad_norm": 0.8043562769889832, "learning_rate": 8.03059557403832e-06, "loss": 0.064, "step": 13260 }, { "epoch": 1.9708896480023763, "grad_norm": 0.5656445622444153, "learning_rate": 8.029110351997624e-06, "loss": 0.0678, "step": 13270 }, { "epoch": 1.9723748700430714, "grad_norm": 0.43990078568458557, "learning_rate": 8.02762512995693e-06, "loss": 0.0619, "step": 13280 }, { "epoch": 1.9738600920837666, "grad_norm": 1.2974399328231812, "learning_rate": 8.026139907916233e-06, "loss": 0.076, "step": 13290 }, { "epoch": 1.9753453141244615, "grad_norm": 0.7306672930717468, "learning_rate": 8.02465468587554e-06, "loss": 0.0965, "step": 13300 }, { "epoch": 1.9768305361651568, "grad_norm": 0.657429575920105, "learning_rate": 8.023169463834844e-06, "loss": 0.0689, "step": 13310 }, { "epoch": 1.9783157582058517, "grad_norm": 0.7042214870452881, "learning_rate": 8.021684241794148e-06, "loss": 0.0562, "step": 13320 }, { "epoch": 1.9798009802465468, "grad_norm": 0.7432844638824463, "learning_rate": 8.020199019753454e-06, "loss": 0.0756, "step": 13330 }, { "epoch": 1.981286202287242, "grad_norm": 0.6771445870399475, "learning_rate": 8.018713797712759e-06, "loss": 0.0811, "step": 13340 }, { "epoch": 1.982771424327937, "grad_norm": 0.629199743270874, "learning_rate": 8.017228575672063e-06, "loss": 0.0528, "step": 13350 }, { "epoch": 1.9842566463686322, "grad_norm": 0.8837576508522034, "learning_rate": 8.01574335363137e-06, "loss": 0.0744, "step": 13360 }, { "epoch": 1.985741868409327, "grad_norm": 0.8143694996833801, "learning_rate": 8.014258131590674e-06, "loss": 0.0597, "step": 13370 }, { "epoch": 1.9872270904500224, "grad_norm": 0.993070662021637, "learning_rate": 8.012772909549978e-06, "loss": 0.0787, "step": 13380 }, { "epoch": 1.9887123124907173, "grad_norm": 1.5197769403457642, "learning_rate": 8.011287687509284e-06, "loss": 0.0722, "step": 13390 }, { "epoch": 1.9901975345314125, "grad_norm": 1.2251149415969849, "learning_rate": 8.009802465468589e-06, "loss": 0.0734, "step": 13400 }, { "epoch": 1.9916827565721076, "grad_norm": 0.6797492504119873, "learning_rate": 8.008317243427893e-06, "loss": 0.0755, "step": 13410 }, { "epoch": 1.9931679786128025, "grad_norm": 0.8411869406700134, "learning_rate": 8.006832021387198e-06, "loss": 0.083, "step": 13420 }, { "epoch": 1.9946532006534978, "grad_norm": 0.501990556716919, "learning_rate": 8.005346799346502e-06, "loss": 0.0728, "step": 13430 }, { "epoch": 1.9961384226941927, "grad_norm": 0.4618406891822815, "learning_rate": 8.003861577305808e-06, "loss": 0.0841, "step": 13440 }, { "epoch": 1.9976236447348878, "grad_norm": 1.0440362691879272, "learning_rate": 8.002376355265113e-06, "loss": 0.0647, "step": 13450 }, { "epoch": 1.999108866775583, "grad_norm": 1.502364993095398, "learning_rate": 8.000891133224417e-06, "loss": 0.0729, "step": 13460 }, { "epoch": 2.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.06133544072508812, "eval_runtime": 210.8496, "eval_samples_per_second": 180.313, "eval_steps_per_second": 5.639, "step": 13466 }, { "epoch": 2.000594088816278, "grad_norm": 0.5821937918663025, "learning_rate": 7.999405911183723e-06, "loss": 0.0607, "step": 13470 }, { "epoch": 2.002079310856973, "grad_norm": 0.681476354598999, "learning_rate": 7.997920689143028e-06, "loss": 0.053, "step": 13480 }, { "epoch": 2.003564532897668, "grad_norm": 0.7646535634994507, "learning_rate": 7.996435467102332e-06, "loss": 0.0786, "step": 13490 }, { "epoch": 2.0050497549383635, "grad_norm": 1.3491325378417969, "learning_rate": 7.994950245061638e-06, "loss": 0.0735, "step": 13500 }, { "epoch": 2.0065349769790584, "grad_norm": 0.9875779747962952, "learning_rate": 7.993465023020943e-06, "loss": 0.0746, "step": 13510 }, { "epoch": 2.0080201990197533, "grad_norm": 0.7491360902786255, "learning_rate": 7.991979800980247e-06, "loss": 0.0784, "step": 13520 }, { "epoch": 2.0095054210604486, "grad_norm": 1.0167654752731323, "learning_rate": 7.990494578939551e-06, "loss": 0.0704, "step": 13530 }, { "epoch": 2.0109906431011435, "grad_norm": 0.5021530985832214, "learning_rate": 7.989009356898858e-06, "loss": 0.07, "step": 13540 }, { "epoch": 2.012475865141839, "grad_norm": 1.1179542541503906, "learning_rate": 7.987524134858162e-06, "loss": 0.0716, "step": 13550 }, { "epoch": 2.0139610871825337, "grad_norm": 1.1701716184616089, "learning_rate": 7.986038912817466e-06, "loss": 0.0778, "step": 13560 }, { "epoch": 2.015446309223229, "grad_norm": 0.6975985169410706, "learning_rate": 7.984553690776773e-06, "loss": 0.0661, "step": 13570 }, { "epoch": 2.016931531263924, "grad_norm": 0.7828264832496643, "learning_rate": 7.983068468736077e-06, "loss": 0.069, "step": 13580 }, { "epoch": 2.018416753304619, "grad_norm": 1.1737070083618164, "learning_rate": 7.981583246695381e-06, "loss": 0.0913, "step": 13590 }, { "epoch": 2.0199019753453142, "grad_norm": 0.706625759601593, "learning_rate": 7.980098024654686e-06, "loss": 0.0716, "step": 13600 }, { "epoch": 2.021387197386009, "grad_norm": 0.4071919023990631, "learning_rate": 7.978612802613992e-06, "loss": 0.0659, "step": 13610 }, { "epoch": 2.0228724194267045, "grad_norm": 1.00408935546875, "learning_rate": 7.977127580573296e-06, "loss": 0.0731, "step": 13620 }, { "epoch": 2.0243576414673994, "grad_norm": 1.0669114589691162, "learning_rate": 7.9756423585326e-06, "loss": 0.08, "step": 13630 }, { "epoch": 2.0258428635080943, "grad_norm": 0.363745778799057, "learning_rate": 7.974157136491905e-06, "loss": 0.0467, "step": 13640 }, { "epoch": 2.0273280855487896, "grad_norm": 1.2333308458328247, "learning_rate": 7.972671914451211e-06, "loss": 0.0555, "step": 13650 }, { "epoch": 2.0288133075894845, "grad_norm": 1.272576093673706, "learning_rate": 7.971186692410516e-06, "loss": 0.0807, "step": 13660 }, { "epoch": 2.03029852963018, "grad_norm": 0.647402822971344, "learning_rate": 7.96970147036982e-06, "loss": 0.0724, "step": 13670 }, { "epoch": 2.0317837516708748, "grad_norm": 0.6416436433792114, "learning_rate": 7.968216248329126e-06, "loss": 0.0573, "step": 13680 }, { "epoch": 2.0332689737115697, "grad_norm": 0.5371004343032837, "learning_rate": 7.96673102628843e-06, "loss": 0.0574, "step": 13690 }, { "epoch": 2.034754195752265, "grad_norm": 0.7714745998382568, "learning_rate": 7.965245804247735e-06, "loss": 0.0824, "step": 13700 }, { "epoch": 2.03623941779296, "grad_norm": 1.1034531593322754, "learning_rate": 7.963760582207041e-06, "loss": 0.0733, "step": 13710 }, { "epoch": 2.0377246398336553, "grad_norm": 1.0166898965835571, "learning_rate": 7.962275360166346e-06, "loss": 0.0738, "step": 13720 }, { "epoch": 2.03920986187435, "grad_norm": 1.3070622682571411, "learning_rate": 7.96079013812565e-06, "loss": 0.0818, "step": 13730 }, { "epoch": 2.0406950839150455, "grad_norm": 0.7084892392158508, "learning_rate": 7.959304916084956e-06, "loss": 0.0538, "step": 13740 }, { "epoch": 2.0421803059557404, "grad_norm": 0.5742867588996887, "learning_rate": 7.957819694044259e-06, "loss": 0.0875, "step": 13750 }, { "epoch": 2.0436655279964353, "grad_norm": 0.7937983870506287, "learning_rate": 7.956334472003565e-06, "loss": 0.0639, "step": 13760 }, { "epoch": 2.0451507500371306, "grad_norm": 1.001183271408081, "learning_rate": 7.95484924996287e-06, "loss": 0.0621, "step": 13770 }, { "epoch": 2.0466359720778255, "grad_norm": 0.7804184556007385, "learning_rate": 7.953364027922174e-06, "loss": 0.0748, "step": 13780 }, { "epoch": 2.048121194118521, "grad_norm": 0.8443603515625, "learning_rate": 7.95187880588148e-06, "loss": 0.0661, "step": 13790 }, { "epoch": 2.049606416159216, "grad_norm": 0.716850996017456, "learning_rate": 7.950393583840785e-06, "loss": 0.0754, "step": 13800 }, { "epoch": 2.0510916381999107, "grad_norm": 1.1926591396331787, "learning_rate": 7.948908361800089e-06, "loss": 0.0601, "step": 13810 }, { "epoch": 2.052576860240606, "grad_norm": 1.0652192831039429, "learning_rate": 7.947423139759395e-06, "loss": 0.076, "step": 13820 }, { "epoch": 2.054062082281301, "grad_norm": 1.3740670680999756, "learning_rate": 7.9459379177187e-06, "loss": 0.0739, "step": 13830 }, { "epoch": 2.0555473043219963, "grad_norm": 1.0437966585159302, "learning_rate": 7.944452695678004e-06, "loss": 0.0842, "step": 13840 }, { "epoch": 2.057032526362691, "grad_norm": 1.145579218864441, "learning_rate": 7.94296747363731e-06, "loss": 0.0754, "step": 13850 }, { "epoch": 2.0585177484033865, "grad_norm": 0.6400442123413086, "learning_rate": 7.941482251596615e-06, "loss": 0.071, "step": 13860 }, { "epoch": 2.0600029704440814, "grad_norm": 0.7294432520866394, "learning_rate": 7.939997029555919e-06, "loss": 0.0712, "step": 13870 }, { "epoch": 2.0614881924847763, "grad_norm": 1.1582225561141968, "learning_rate": 7.938511807515225e-06, "loss": 0.0717, "step": 13880 }, { "epoch": 2.0629734145254717, "grad_norm": 1.0745748281478882, "learning_rate": 7.937026585474528e-06, "loss": 0.0678, "step": 13890 }, { "epoch": 2.0644586365661666, "grad_norm": 0.6209663152694702, "learning_rate": 7.935541363433834e-06, "loss": 0.0594, "step": 13900 }, { "epoch": 2.065943858606862, "grad_norm": 0.42464518547058105, "learning_rate": 7.93405614139314e-06, "loss": 0.0653, "step": 13910 }, { "epoch": 2.067429080647557, "grad_norm": 1.677648663520813, "learning_rate": 7.932570919352443e-06, "loss": 0.0769, "step": 13920 }, { "epoch": 2.0689143026882517, "grad_norm": 0.6147602796554565, "learning_rate": 7.931085697311749e-06, "loss": 0.0738, "step": 13930 }, { "epoch": 2.070399524728947, "grad_norm": 0.7974849939346313, "learning_rate": 7.929600475271053e-06, "loss": 0.0844, "step": 13940 }, { "epoch": 2.071884746769642, "grad_norm": 1.607678771018982, "learning_rate": 7.928115253230358e-06, "loss": 0.0669, "step": 13950 }, { "epoch": 2.0733699688103373, "grad_norm": 0.8933297991752625, "learning_rate": 7.926630031189664e-06, "loss": 0.0798, "step": 13960 }, { "epoch": 2.074855190851032, "grad_norm": 0.7862673997879028, "learning_rate": 7.925144809148968e-06, "loss": 0.071, "step": 13970 }, { "epoch": 2.076340412891727, "grad_norm": 0.622954249382019, "learning_rate": 7.923659587108273e-06, "loss": 0.079, "step": 13980 }, { "epoch": 2.0778256349324224, "grad_norm": 0.48314040899276733, "learning_rate": 7.922174365067579e-06, "loss": 0.052, "step": 13990 }, { "epoch": 2.0793108569731173, "grad_norm": 0.7916228175163269, "learning_rate": 7.920689143026883e-06, "loss": 0.0904, "step": 14000 }, { "epoch": 2.0807960790138127, "grad_norm": 0.45953261852264404, "learning_rate": 7.919203920986188e-06, "loss": 0.0764, "step": 14010 }, { "epoch": 2.0822813010545076, "grad_norm": 0.6490451693534851, "learning_rate": 7.917718698945494e-06, "loss": 0.0581, "step": 14020 }, { "epoch": 2.083766523095203, "grad_norm": 1.2762033939361572, "learning_rate": 7.916233476904798e-06, "loss": 0.0871, "step": 14030 }, { "epoch": 2.085251745135898, "grad_norm": 0.9953591227531433, "learning_rate": 7.914748254864103e-06, "loss": 0.0629, "step": 14040 }, { "epoch": 2.0867369671765927, "grad_norm": 0.6095679402351379, "learning_rate": 7.913263032823407e-06, "loss": 0.0748, "step": 14050 }, { "epoch": 2.088222189217288, "grad_norm": 1.2095739841461182, "learning_rate": 7.911777810782712e-06, "loss": 0.073, "step": 14060 }, { "epoch": 2.089707411257983, "grad_norm": 0.7403334379196167, "learning_rate": 7.910292588742018e-06, "loss": 0.0924, "step": 14070 }, { "epoch": 2.0911926332986783, "grad_norm": 0.6129160523414612, "learning_rate": 7.908807366701322e-06, "loss": 0.0688, "step": 14080 }, { "epoch": 2.0926778553393732, "grad_norm": 0.6304017305374146, "learning_rate": 7.907322144660627e-06, "loss": 0.0662, "step": 14090 }, { "epoch": 2.094163077380068, "grad_norm": 0.8260060548782349, "learning_rate": 7.905836922619933e-06, "loss": 0.0894, "step": 14100 }, { "epoch": 2.0956482994207635, "grad_norm": 1.0576426982879639, "learning_rate": 7.904351700579237e-06, "loss": 0.0831, "step": 14110 }, { "epoch": 2.0971335214614584, "grad_norm": 0.8142070174217224, "learning_rate": 7.902866478538542e-06, "loss": 0.0718, "step": 14120 }, { "epoch": 2.0986187435021537, "grad_norm": 0.44303345680236816, "learning_rate": 7.901381256497848e-06, "loss": 0.0687, "step": 14130 }, { "epoch": 2.1001039655428486, "grad_norm": 1.0897902250289917, "learning_rate": 7.899896034457152e-06, "loss": 0.0796, "step": 14140 }, { "epoch": 2.101589187583544, "grad_norm": 1.709612488746643, "learning_rate": 7.898410812416457e-06, "loss": 0.0658, "step": 14150 }, { "epoch": 2.103074409624239, "grad_norm": 1.4813929796218872, "learning_rate": 7.896925590375761e-06, "loss": 0.0909, "step": 14160 }, { "epoch": 2.1045596316649338, "grad_norm": 1.0697726011276245, "learning_rate": 7.895440368335067e-06, "loss": 0.0604, "step": 14170 }, { "epoch": 2.106044853705629, "grad_norm": 0.8308620452880859, "learning_rate": 7.893955146294372e-06, "loss": 0.0819, "step": 14180 }, { "epoch": 2.107530075746324, "grad_norm": 0.589131236076355, "learning_rate": 7.892469924253676e-06, "loss": 0.0403, "step": 14190 }, { "epoch": 2.1090152977870193, "grad_norm": 1.19193696975708, "learning_rate": 7.890984702212982e-06, "loss": 0.0693, "step": 14200 }, { "epoch": 2.1105005198277142, "grad_norm": 0.6469309329986572, "learning_rate": 7.889499480172287e-06, "loss": 0.0679, "step": 14210 }, { "epoch": 2.111985741868409, "grad_norm": 0.9464824795722961, "learning_rate": 7.888014258131591e-06, "loss": 0.0708, "step": 14220 }, { "epoch": 2.1134709639091045, "grad_norm": 0.47763773798942566, "learning_rate": 7.886529036090896e-06, "loss": 0.0622, "step": 14230 }, { "epoch": 2.1149561859497994, "grad_norm": 0.7277671694755554, "learning_rate": 7.885043814050202e-06, "loss": 0.0795, "step": 14240 }, { "epoch": 2.1164414079904947, "grad_norm": 1.1749573945999146, "learning_rate": 7.883558592009506e-06, "loss": 0.0734, "step": 14250 }, { "epoch": 2.1179266300311896, "grad_norm": 0.9396737813949585, "learning_rate": 7.88207336996881e-06, "loss": 0.0711, "step": 14260 }, { "epoch": 2.1194118520718845, "grad_norm": 1.0463459491729736, "learning_rate": 7.880588147928115e-06, "loss": 0.0726, "step": 14270 }, { "epoch": 2.12089707411258, "grad_norm": 0.6679229140281677, "learning_rate": 7.879102925887421e-06, "loss": 0.0535, "step": 14280 }, { "epoch": 2.122382296153275, "grad_norm": 0.7594313621520996, "learning_rate": 7.877617703846726e-06, "loss": 0.0616, "step": 14290 }, { "epoch": 2.12386751819397, "grad_norm": 0.7679316997528076, "learning_rate": 7.87613248180603e-06, "loss": 0.0522, "step": 14300 }, { "epoch": 2.125352740234665, "grad_norm": 0.5544955134391785, "learning_rate": 7.874647259765336e-06, "loss": 0.0646, "step": 14310 }, { "epoch": 2.1268379622753604, "grad_norm": 0.9552668929100037, "learning_rate": 7.87316203772464e-06, "loss": 0.0577, "step": 14320 }, { "epoch": 2.1283231843160553, "grad_norm": 1.1328357458114624, "learning_rate": 7.871676815683945e-06, "loss": 0.0647, "step": 14330 }, { "epoch": 2.12980840635675, "grad_norm": 1.2579344511032104, "learning_rate": 7.870191593643251e-06, "loss": 0.0701, "step": 14340 }, { "epoch": 2.1312936283974455, "grad_norm": 0.7890368700027466, "learning_rate": 7.868706371602554e-06, "loss": 0.0574, "step": 14350 }, { "epoch": 2.1327788504381404, "grad_norm": 0.752335786819458, "learning_rate": 7.86722114956186e-06, "loss": 0.0522, "step": 14360 }, { "epoch": 2.1342640724788358, "grad_norm": 0.8492644429206848, "learning_rate": 7.865735927521166e-06, "loss": 0.07, "step": 14370 }, { "epoch": 2.1357492945195307, "grad_norm": 0.9102494120597839, "learning_rate": 7.864250705480469e-06, "loss": 0.0544, "step": 14380 }, { "epoch": 2.1372345165602256, "grad_norm": 0.9324311017990112, "learning_rate": 7.862765483439775e-06, "loss": 0.059, "step": 14390 }, { "epoch": 2.138719738600921, "grad_norm": 1.1309044361114502, "learning_rate": 7.86128026139908e-06, "loss": 0.0547, "step": 14400 }, { "epoch": 2.140204960641616, "grad_norm": 1.2628252506256104, "learning_rate": 7.859795039358384e-06, "loss": 0.0823, "step": 14410 }, { "epoch": 2.141690182682311, "grad_norm": 1.4814199209213257, "learning_rate": 7.85830981731769e-06, "loss": 0.0833, "step": 14420 }, { "epoch": 2.143175404723006, "grad_norm": 0.6828455328941345, "learning_rate": 7.856824595276994e-06, "loss": 0.0692, "step": 14430 }, { "epoch": 2.1446606267637014, "grad_norm": 1.0615395307540894, "learning_rate": 7.855339373236299e-06, "loss": 0.0823, "step": 14440 }, { "epoch": 2.1461458488043963, "grad_norm": 1.5419204235076904, "learning_rate": 7.853854151195605e-06, "loss": 0.0617, "step": 14450 }, { "epoch": 2.147631070845091, "grad_norm": 0.6285436749458313, "learning_rate": 7.85236892915491e-06, "loss": 0.075, "step": 14460 }, { "epoch": 2.1491162928857865, "grad_norm": 0.6257336139678955, "learning_rate": 7.850883707114214e-06, "loss": 0.0792, "step": 14470 }, { "epoch": 2.1506015149264814, "grad_norm": 1.2522633075714111, "learning_rate": 7.84939848507352e-06, "loss": 0.0864, "step": 14480 }, { "epoch": 2.152086736967177, "grad_norm": 1.2310116291046143, "learning_rate": 7.847913263032824e-06, "loss": 0.0731, "step": 14490 }, { "epoch": 2.1535719590078717, "grad_norm": 0.6669400930404663, "learning_rate": 7.846428040992129e-06, "loss": 0.0697, "step": 14500 }, { "epoch": 2.1550571810485666, "grad_norm": 1.038515567779541, "learning_rate": 7.844942818951435e-06, "loss": 0.0625, "step": 14510 }, { "epoch": 2.156542403089262, "grad_norm": 0.7487426400184631, "learning_rate": 7.843457596910738e-06, "loss": 0.0748, "step": 14520 }, { "epoch": 2.158027625129957, "grad_norm": 0.6837989687919617, "learning_rate": 7.841972374870044e-06, "loss": 0.0585, "step": 14530 }, { "epoch": 2.159512847170652, "grad_norm": 1.1205534934997559, "learning_rate": 7.840487152829348e-06, "loss": 0.0982, "step": 14540 }, { "epoch": 2.160998069211347, "grad_norm": 0.7028797268867493, "learning_rate": 7.839001930788653e-06, "loss": 0.0544, "step": 14550 }, { "epoch": 2.162483291252042, "grad_norm": 0.5611459016799927, "learning_rate": 7.837516708747959e-06, "loss": 0.0509, "step": 14560 }, { "epoch": 2.1639685132927373, "grad_norm": 0.8358970880508423, "learning_rate": 7.836031486707263e-06, "loss": 0.0815, "step": 14570 }, { "epoch": 2.165453735333432, "grad_norm": 0.9127787351608276, "learning_rate": 7.834546264666568e-06, "loss": 0.0774, "step": 14580 }, { "epoch": 2.1669389573741276, "grad_norm": 1.102961778640747, "learning_rate": 7.833061042625874e-06, "loss": 0.0639, "step": 14590 }, { "epoch": 2.1684241794148225, "grad_norm": 1.123892068862915, "learning_rate": 7.831575820585178e-06, "loss": 0.066, "step": 14600 }, { "epoch": 2.169909401455518, "grad_norm": 1.0453194379806519, "learning_rate": 7.830090598544483e-06, "loss": 0.0584, "step": 14610 }, { "epoch": 2.1713946234962127, "grad_norm": 0.4781140983104706, "learning_rate": 7.828605376503789e-06, "loss": 0.0727, "step": 14620 }, { "epoch": 2.1728798455369076, "grad_norm": 0.7792844772338867, "learning_rate": 7.827120154463093e-06, "loss": 0.0669, "step": 14630 }, { "epoch": 2.174365067577603, "grad_norm": 1.3519136905670166, "learning_rate": 7.825634932422398e-06, "loss": 0.0652, "step": 14640 }, { "epoch": 2.175850289618298, "grad_norm": 1.1446763277053833, "learning_rate": 7.824149710381704e-06, "loss": 0.0702, "step": 14650 }, { "epoch": 2.177335511658993, "grad_norm": 0.6634364128112793, "learning_rate": 7.822664488341008e-06, "loss": 0.0548, "step": 14660 }, { "epoch": 2.178820733699688, "grad_norm": 0.37931519746780396, "learning_rate": 7.821179266300313e-06, "loss": 0.0889, "step": 14670 }, { "epoch": 2.180305955740383, "grad_norm": 0.9678369164466858, "learning_rate": 7.819694044259617e-06, "loss": 0.081, "step": 14680 }, { "epoch": 2.1817911777810783, "grad_norm": 0.8365264534950256, "learning_rate": 7.818208822218922e-06, "loss": 0.0622, "step": 14690 }, { "epoch": 2.1832763998217732, "grad_norm": 0.43657487630844116, "learning_rate": 7.816723600178228e-06, "loss": 0.0578, "step": 14700 }, { "epoch": 2.1847616218624686, "grad_norm": 0.68485426902771, "learning_rate": 7.815238378137532e-06, "loss": 0.0579, "step": 14710 }, { "epoch": 2.1862468439031635, "grad_norm": 0.9559560418128967, "learning_rate": 7.813753156096837e-06, "loss": 0.0736, "step": 14720 }, { "epoch": 2.187732065943859, "grad_norm": 1.7984938621520996, "learning_rate": 7.812267934056143e-06, "loss": 0.0719, "step": 14730 }, { "epoch": 2.1892172879845537, "grad_norm": 1.2785571813583374, "learning_rate": 7.810782712015447e-06, "loss": 0.0755, "step": 14740 }, { "epoch": 2.1907025100252486, "grad_norm": 0.6600275039672852, "learning_rate": 7.809297489974752e-06, "loss": 0.0782, "step": 14750 }, { "epoch": 2.192187732065944, "grad_norm": 0.6349477171897888, "learning_rate": 7.807812267934058e-06, "loss": 0.0792, "step": 14760 }, { "epoch": 2.193672954106639, "grad_norm": 0.6333335041999817, "learning_rate": 7.806327045893362e-06, "loss": 0.0767, "step": 14770 }, { "epoch": 2.195158176147334, "grad_norm": 0.6242454051971436, "learning_rate": 7.804841823852667e-06, "loss": 0.0714, "step": 14780 }, { "epoch": 2.196643398188029, "grad_norm": 0.8708824515342712, "learning_rate": 7.803356601811971e-06, "loss": 0.0789, "step": 14790 }, { "epoch": 2.198128620228724, "grad_norm": 0.5887396931648254, "learning_rate": 7.801871379771277e-06, "loss": 0.0534, "step": 14800 }, { "epoch": 2.1996138422694194, "grad_norm": 0.9256582260131836, "learning_rate": 7.800386157730582e-06, "loss": 0.0624, "step": 14810 }, { "epoch": 2.2010990643101143, "grad_norm": 1.3295891284942627, "learning_rate": 7.798900935689886e-06, "loss": 0.0695, "step": 14820 }, { "epoch": 2.2025842863508096, "grad_norm": 1.114598035812378, "learning_rate": 7.79741571364919e-06, "loss": 0.0633, "step": 14830 }, { "epoch": 2.2040695083915045, "grad_norm": 0.6609292030334473, "learning_rate": 7.795930491608497e-06, "loss": 0.0658, "step": 14840 }, { "epoch": 2.2055547304321994, "grad_norm": 2.0195000171661377, "learning_rate": 7.794445269567801e-06, "loss": 0.0824, "step": 14850 }, { "epoch": 2.2070399524728948, "grad_norm": 0.7190861105918884, "learning_rate": 7.792960047527105e-06, "loss": 0.0574, "step": 14860 }, { "epoch": 2.2085251745135897, "grad_norm": 0.8353859782218933, "learning_rate": 7.79147482548641e-06, "loss": 0.0959, "step": 14870 }, { "epoch": 2.210010396554285, "grad_norm": 0.5273255109786987, "learning_rate": 7.789989603445716e-06, "loss": 0.057, "step": 14880 }, { "epoch": 2.21149561859498, "grad_norm": 1.0599677562713623, "learning_rate": 7.78850438140502e-06, "loss": 0.0713, "step": 14890 }, { "epoch": 2.2129808406356752, "grad_norm": 1.4016988277435303, "learning_rate": 7.787019159364325e-06, "loss": 0.074, "step": 14900 }, { "epoch": 2.21446606267637, "grad_norm": 0.2593357264995575, "learning_rate": 7.785533937323631e-06, "loss": 0.0642, "step": 14910 }, { "epoch": 2.215951284717065, "grad_norm": 1.8210548162460327, "learning_rate": 7.784048715282935e-06, "loss": 0.0722, "step": 14920 }, { "epoch": 2.2174365067577604, "grad_norm": 0.4837227761745453, "learning_rate": 7.78256349324224e-06, "loss": 0.0755, "step": 14930 }, { "epoch": 2.2189217287984553, "grad_norm": 0.8653165102005005, "learning_rate": 7.781078271201546e-06, "loss": 0.0689, "step": 14940 }, { "epoch": 2.2204069508391506, "grad_norm": 0.6423892378807068, "learning_rate": 7.77959304916085e-06, "loss": 0.0693, "step": 14950 }, { "epoch": 2.2218921728798455, "grad_norm": 1.4939004182815552, "learning_rate": 7.778107827120155e-06, "loss": 0.0878, "step": 14960 }, { "epoch": 2.2233773949205404, "grad_norm": 0.8596535921096802, "learning_rate": 7.776622605079461e-06, "loss": 0.0774, "step": 14970 }, { "epoch": 2.2248626169612358, "grad_norm": 0.9801391363143921, "learning_rate": 7.775137383038764e-06, "loss": 0.0602, "step": 14980 }, { "epoch": 2.2263478390019307, "grad_norm": 0.5129373669624329, "learning_rate": 7.77365216099807e-06, "loss": 0.0402, "step": 14990 }, { "epoch": 2.227833061042626, "grad_norm": 1.0586576461791992, "learning_rate": 7.772166938957374e-06, "loss": 0.0696, "step": 15000 }, { "epoch": 2.229318283083321, "grad_norm": 0.7149041891098022, "learning_rate": 7.770681716916679e-06, "loss": 0.0877, "step": 15010 }, { "epoch": 2.2308035051240163, "grad_norm": 1.1977014541625977, "learning_rate": 7.769196494875985e-06, "loss": 0.0783, "step": 15020 }, { "epoch": 2.232288727164711, "grad_norm": 0.6709617376327515, "learning_rate": 7.76771127283529e-06, "loss": 0.0624, "step": 15030 }, { "epoch": 2.233773949205406, "grad_norm": 0.6660168766975403, "learning_rate": 7.766226050794594e-06, "loss": 0.0642, "step": 15040 }, { "epoch": 2.2352591712461014, "grad_norm": 0.5352195501327515, "learning_rate": 7.7647408287539e-06, "loss": 0.0554, "step": 15050 }, { "epoch": 2.2367443932867963, "grad_norm": 1.1467076539993286, "learning_rate": 7.763255606713204e-06, "loss": 0.072, "step": 15060 }, { "epoch": 2.2382296153274917, "grad_norm": 0.9584192037582397, "learning_rate": 7.761770384672509e-06, "loss": 0.0721, "step": 15070 }, { "epoch": 2.2397148373681866, "grad_norm": 1.2189011573791504, "learning_rate": 7.760285162631815e-06, "loss": 0.0661, "step": 15080 }, { "epoch": 2.2412000594088815, "grad_norm": 1.0347647666931152, "learning_rate": 7.75879994059112e-06, "loss": 0.089, "step": 15090 }, { "epoch": 2.242685281449577, "grad_norm": 1.1839691400527954, "learning_rate": 7.757314718550424e-06, "loss": 0.0834, "step": 15100 }, { "epoch": 2.2441705034902717, "grad_norm": 0.8672871589660645, "learning_rate": 7.75582949650973e-06, "loss": 0.0763, "step": 15110 }, { "epoch": 2.245655725530967, "grad_norm": 0.44413626194000244, "learning_rate": 7.754344274469033e-06, "loss": 0.0745, "step": 15120 }, { "epoch": 2.247140947571662, "grad_norm": 1.441617488861084, "learning_rate": 7.752859052428339e-06, "loss": 0.0943, "step": 15130 }, { "epoch": 2.248626169612357, "grad_norm": 1.0939092636108398, "learning_rate": 7.751373830387645e-06, "loss": 0.0822, "step": 15140 }, { "epoch": 2.250111391653052, "grad_norm": 0.6537975668907166, "learning_rate": 7.749888608346948e-06, "loss": 0.0677, "step": 15150 }, { "epoch": 2.251596613693747, "grad_norm": 1.4885673522949219, "learning_rate": 7.748403386306254e-06, "loss": 0.0616, "step": 15160 }, { "epoch": 2.2530818357344424, "grad_norm": 0.9456076622009277, "learning_rate": 7.746918164265558e-06, "loss": 0.0715, "step": 15170 }, { "epoch": 2.2545670577751373, "grad_norm": 0.8747375011444092, "learning_rate": 7.745432942224863e-06, "loss": 0.0965, "step": 15180 }, { "epoch": 2.2560522798158322, "grad_norm": 0.8469992876052856, "learning_rate": 7.743947720184169e-06, "loss": 0.0631, "step": 15190 }, { "epoch": 2.2575375018565276, "grad_norm": 1.1663897037506104, "learning_rate": 7.742462498143473e-06, "loss": 0.0689, "step": 15200 }, { "epoch": 2.2590227238972225, "grad_norm": 1.6151713132858276, "learning_rate": 7.740977276102778e-06, "loss": 0.0616, "step": 15210 }, { "epoch": 2.260507945937918, "grad_norm": 0.8520883917808533, "learning_rate": 7.739492054062084e-06, "loss": 0.0769, "step": 15220 }, { "epoch": 2.2619931679786127, "grad_norm": 0.6212167143821716, "learning_rate": 7.738006832021388e-06, "loss": 0.076, "step": 15230 }, { "epoch": 2.263478390019308, "grad_norm": 0.7191680073738098, "learning_rate": 7.736521609980693e-06, "loss": 0.0513, "step": 15240 }, { "epoch": 2.264963612060003, "grad_norm": 1.244286298751831, "learning_rate": 7.735036387939999e-06, "loss": 0.0731, "step": 15250 }, { "epoch": 2.266448834100698, "grad_norm": 0.6261264681816101, "learning_rate": 7.733551165899303e-06, "loss": 0.0614, "step": 15260 }, { "epoch": 2.267934056141393, "grad_norm": 0.5021324753761292, "learning_rate": 7.732065943858608e-06, "loss": 0.0877, "step": 15270 }, { "epoch": 2.269419278182088, "grad_norm": 1.0325325727462769, "learning_rate": 7.730580721817912e-06, "loss": 0.0662, "step": 15280 }, { "epoch": 2.2709045002227835, "grad_norm": 1.0051850080490112, "learning_rate": 7.729095499777216e-06, "loss": 0.0866, "step": 15290 }, { "epoch": 2.2723897222634784, "grad_norm": 1.184901475906372, "learning_rate": 7.727610277736523e-06, "loss": 0.0565, "step": 15300 }, { "epoch": 2.2738749443041737, "grad_norm": 1.2180163860321045, "learning_rate": 7.726125055695827e-06, "loss": 0.0576, "step": 15310 }, { "epoch": 2.2753601663448686, "grad_norm": 0.8974994421005249, "learning_rate": 7.724639833655131e-06, "loss": 0.0876, "step": 15320 }, { "epoch": 2.2768453883855635, "grad_norm": 0.9203564524650574, "learning_rate": 7.723154611614438e-06, "loss": 0.0548, "step": 15330 }, { "epoch": 2.278330610426259, "grad_norm": 1.1554508209228516, "learning_rate": 7.721669389573742e-06, "loss": 0.0725, "step": 15340 }, { "epoch": 2.2798158324669537, "grad_norm": 1.1028167009353638, "learning_rate": 7.720184167533046e-06, "loss": 0.0729, "step": 15350 }, { "epoch": 2.281301054507649, "grad_norm": 0.9877077341079712, "learning_rate": 7.718698945492352e-06, "loss": 0.0657, "step": 15360 }, { "epoch": 2.282786276548344, "grad_norm": 1.2431800365447998, "learning_rate": 7.717213723451657e-06, "loss": 0.0884, "step": 15370 }, { "epoch": 2.284271498589039, "grad_norm": 0.6929932832717896, "learning_rate": 7.715728501410961e-06, "loss": 0.0765, "step": 15380 }, { "epoch": 2.2857567206297342, "grad_norm": 0.5198222398757935, "learning_rate": 7.714243279370266e-06, "loss": 0.0527, "step": 15390 }, { "epoch": 2.287241942670429, "grad_norm": 0.39518117904663086, "learning_rate": 7.712758057329572e-06, "loss": 0.0703, "step": 15400 }, { "epoch": 2.2887271647111245, "grad_norm": 1.2519538402557373, "learning_rate": 7.711272835288876e-06, "loss": 0.0589, "step": 15410 }, { "epoch": 2.2902123867518194, "grad_norm": 0.6768942475318909, "learning_rate": 7.70978761324818e-06, "loss": 0.0507, "step": 15420 }, { "epoch": 2.2916976087925143, "grad_norm": 0.732701301574707, "learning_rate": 7.708302391207487e-06, "loss": 0.0686, "step": 15430 }, { "epoch": 2.2931828308332096, "grad_norm": 0.7347680330276489, "learning_rate": 7.706817169166791e-06, "loss": 0.0767, "step": 15440 }, { "epoch": 2.2946680528739045, "grad_norm": 0.6387335658073425, "learning_rate": 7.705331947126096e-06, "loss": 0.0657, "step": 15450 }, { "epoch": 2.2961532749146, "grad_norm": 0.5078271627426147, "learning_rate": 7.7038467250854e-06, "loss": 0.0766, "step": 15460 }, { "epoch": 2.2976384969552948, "grad_norm": 0.5774367451667786, "learning_rate": 7.702361503044706e-06, "loss": 0.0578, "step": 15470 }, { "epoch": 2.2991237189959897, "grad_norm": 0.8975863456726074, "learning_rate": 7.70087628100401e-06, "loss": 0.0626, "step": 15480 }, { "epoch": 2.300608941036685, "grad_norm": 0.54920893907547, "learning_rate": 7.699391058963315e-06, "loss": 0.0573, "step": 15490 }, { "epoch": 2.30209416307738, "grad_norm": 0.9207307696342468, "learning_rate": 7.69790583692262e-06, "loss": 0.08, "step": 15500 }, { "epoch": 2.3035793851180753, "grad_norm": 0.7573016881942749, "learning_rate": 7.696420614881926e-06, "loss": 0.0672, "step": 15510 }, { "epoch": 2.30506460715877, "grad_norm": 0.47608405351638794, "learning_rate": 7.69493539284123e-06, "loss": 0.0739, "step": 15520 }, { "epoch": 2.3065498291994655, "grad_norm": 1.493303894996643, "learning_rate": 7.693450170800535e-06, "loss": 0.0737, "step": 15530 }, { "epoch": 2.3080350512401604, "grad_norm": 0.50611811876297, "learning_rate": 7.69196494875984e-06, "loss": 0.0538, "step": 15540 }, { "epoch": 2.3095202732808553, "grad_norm": 0.39653122425079346, "learning_rate": 7.690479726719145e-06, "loss": 0.0699, "step": 15550 }, { "epoch": 2.3110054953215506, "grad_norm": 1.0106645822525024, "learning_rate": 7.68899450467845e-06, "loss": 0.0725, "step": 15560 }, { "epoch": 2.3124907173622455, "grad_norm": 0.8386173844337463, "learning_rate": 7.687509282637756e-06, "loss": 0.0695, "step": 15570 }, { "epoch": 2.313975939402941, "grad_norm": 1.1915806531906128, "learning_rate": 7.68602406059706e-06, "loss": 0.0769, "step": 15580 }, { "epoch": 2.315461161443636, "grad_norm": 0.8047814965248108, "learning_rate": 7.684538838556365e-06, "loss": 0.0791, "step": 15590 }, { "epoch": 2.316946383484331, "grad_norm": 0.912143349647522, "learning_rate": 7.68305361651567e-06, "loss": 0.0802, "step": 15600 }, { "epoch": 2.318431605525026, "grad_norm": 0.9745832681655884, "learning_rate": 7.681568394474973e-06, "loss": 0.0716, "step": 15610 }, { "epoch": 2.319916827565721, "grad_norm": 0.41111069917678833, "learning_rate": 7.68008317243428e-06, "loss": 0.0728, "step": 15620 }, { "epoch": 2.3214020496064163, "grad_norm": 0.8632513284683228, "learning_rate": 7.678597950393584e-06, "loss": 0.0795, "step": 15630 }, { "epoch": 2.322887271647111, "grad_norm": 0.6391085982322693, "learning_rate": 7.677112728352888e-06, "loss": 0.0597, "step": 15640 }, { "epoch": 2.3243724936878065, "grad_norm": 0.5207847952842712, "learning_rate": 7.675627506312195e-06, "loss": 0.0817, "step": 15650 }, { "epoch": 2.3258577157285014, "grad_norm": 0.7792304754257202, "learning_rate": 7.674142284271499e-06, "loss": 0.0807, "step": 15660 }, { "epoch": 2.3273429377691963, "grad_norm": 0.8187819123268127, "learning_rate": 7.672657062230803e-06, "loss": 0.079, "step": 15670 }, { "epoch": 2.3288281598098917, "grad_norm": 0.4100897014141083, "learning_rate": 7.67117184019011e-06, "loss": 0.073, "step": 15680 }, { "epoch": 2.3303133818505866, "grad_norm": 0.7977427244186401, "learning_rate": 7.669686618149414e-06, "loss": 0.0787, "step": 15690 }, { "epoch": 2.331798603891282, "grad_norm": 0.5222535729408264, "learning_rate": 7.668201396108718e-06, "loss": 0.075, "step": 15700 }, { "epoch": 2.333283825931977, "grad_norm": 0.7441068887710571, "learning_rate": 7.666716174068025e-06, "loss": 0.0697, "step": 15710 }, { "epoch": 2.3347690479726717, "grad_norm": 0.9779987931251526, "learning_rate": 7.665230952027329e-06, "loss": 0.0756, "step": 15720 }, { "epoch": 2.336254270013367, "grad_norm": 1.1214604377746582, "learning_rate": 7.663745729986633e-06, "loss": 0.0663, "step": 15730 }, { "epoch": 2.337739492054062, "grad_norm": 0.6864479184150696, "learning_rate": 7.66226050794594e-06, "loss": 0.0506, "step": 15740 }, { "epoch": 2.3392247140947573, "grad_norm": 0.6545161008834839, "learning_rate": 7.660775285905242e-06, "loss": 0.0716, "step": 15750 }, { "epoch": 2.340709936135452, "grad_norm": 0.6110475659370422, "learning_rate": 7.659290063864548e-06, "loss": 0.0807, "step": 15760 }, { "epoch": 2.342195158176147, "grad_norm": 0.7456690073013306, "learning_rate": 7.657804841823855e-06, "loss": 0.0717, "step": 15770 }, { "epoch": 2.3436803802168424, "grad_norm": 1.7456918954849243, "learning_rate": 7.656319619783157e-06, "loss": 0.0838, "step": 15780 }, { "epoch": 2.3451656022575373, "grad_norm": 0.856558620929718, "learning_rate": 7.654834397742463e-06, "loss": 0.0662, "step": 15790 }, { "epoch": 2.3466508242982327, "grad_norm": 0.6788325309753418, "learning_rate": 7.653349175701768e-06, "loss": 0.0605, "step": 15800 }, { "epoch": 2.3481360463389276, "grad_norm": 0.6518844366073608, "learning_rate": 7.651863953661072e-06, "loss": 0.0653, "step": 15810 }, { "epoch": 2.349621268379623, "grad_norm": 0.9282351732254028, "learning_rate": 7.650378731620378e-06, "loss": 0.0545, "step": 15820 }, { "epoch": 2.351106490420318, "grad_norm": 1.422558307647705, "learning_rate": 7.648893509579683e-06, "loss": 0.0685, "step": 15830 }, { "epoch": 2.3525917124610127, "grad_norm": 1.116840124130249, "learning_rate": 7.647408287538987e-06, "loss": 0.0733, "step": 15840 }, { "epoch": 2.354076934501708, "grad_norm": 0.8625660538673401, "learning_rate": 7.645923065498293e-06, "loss": 0.0631, "step": 15850 }, { "epoch": 2.355562156542403, "grad_norm": 0.8200535774230957, "learning_rate": 7.644437843457598e-06, "loss": 0.0517, "step": 15860 }, { "epoch": 2.3570473785830983, "grad_norm": 0.6217565536499023, "learning_rate": 7.642952621416902e-06, "loss": 0.0644, "step": 15870 }, { "epoch": 2.358532600623793, "grad_norm": 0.6351733207702637, "learning_rate": 7.641467399376208e-06, "loss": 0.0651, "step": 15880 }, { "epoch": 2.3600178226644886, "grad_norm": 1.270407795906067, "learning_rate": 7.639982177335513e-06, "loss": 0.0438, "step": 15890 }, { "epoch": 2.3615030447051835, "grad_norm": 1.0128871202468872, "learning_rate": 7.638496955294817e-06, "loss": 0.0696, "step": 15900 }, { "epoch": 2.3629882667458784, "grad_norm": 0.6585198044776917, "learning_rate": 7.637011733254122e-06, "loss": 0.0741, "step": 15910 }, { "epoch": 2.3644734887865737, "grad_norm": 1.2549078464508057, "learning_rate": 7.635526511213426e-06, "loss": 0.0828, "step": 15920 }, { "epoch": 2.3659587108272686, "grad_norm": 0.5107712745666504, "learning_rate": 7.634041289172732e-06, "loss": 0.0643, "step": 15930 }, { "epoch": 2.367443932867964, "grad_norm": 1.306120753288269, "learning_rate": 7.632556067132037e-06, "loss": 0.0641, "step": 15940 }, { "epoch": 2.368929154908659, "grad_norm": 0.6712403297424316, "learning_rate": 7.631070845091341e-06, "loss": 0.0667, "step": 15950 }, { "epoch": 2.3704143769493538, "grad_norm": 0.6917909979820251, "learning_rate": 7.629585623050647e-06, "loss": 0.0668, "step": 15960 }, { "epoch": 2.371899598990049, "grad_norm": 1.065975308418274, "learning_rate": 7.628100401009951e-06, "loss": 0.0779, "step": 15970 }, { "epoch": 2.373384821030744, "grad_norm": 0.6773597598075867, "learning_rate": 7.626615178969257e-06, "loss": 0.0585, "step": 15980 }, { "epoch": 2.3748700430714393, "grad_norm": 0.946287989616394, "learning_rate": 7.625129956928562e-06, "loss": 0.0549, "step": 15990 }, { "epoch": 2.3763552651121342, "grad_norm": 1.1909871101379395, "learning_rate": 7.623644734887866e-06, "loss": 0.0898, "step": 16000 }, { "epoch": 2.377840487152829, "grad_norm": 0.9652341604232788, "learning_rate": 7.622159512847171e-06, "loss": 0.0686, "step": 16010 }, { "epoch": 2.3793257091935245, "grad_norm": 0.5234491229057312, "learning_rate": 7.620674290806476e-06, "loss": 0.0668, "step": 16020 }, { "epoch": 2.3808109312342194, "grad_norm": 0.6725775599479675, "learning_rate": 7.619189068765781e-06, "loss": 0.0673, "step": 16030 }, { "epoch": 2.3822961532749147, "grad_norm": 0.7732347249984741, "learning_rate": 7.617703846725086e-06, "loss": 0.0698, "step": 16040 }, { "epoch": 2.3837813753156096, "grad_norm": 1.0386955738067627, "learning_rate": 7.616218624684391e-06, "loss": 0.0679, "step": 16050 }, { "epoch": 2.3852665973563045, "grad_norm": 0.6354694366455078, "learning_rate": 7.614733402643696e-06, "loss": 0.0798, "step": 16060 }, { "epoch": 2.386751819397, "grad_norm": 0.8849825263023376, "learning_rate": 7.613248180603001e-06, "loss": 0.063, "step": 16070 }, { "epoch": 2.3882370414376948, "grad_norm": 1.465538501739502, "learning_rate": 7.611762958562306e-06, "loss": 0.0867, "step": 16080 }, { "epoch": 2.38972226347839, "grad_norm": 1.1470627784729004, "learning_rate": 7.610277736521611e-06, "loss": 0.0654, "step": 16090 }, { "epoch": 2.391207485519085, "grad_norm": 0.7879219055175781, "learning_rate": 7.608792514480916e-06, "loss": 0.0732, "step": 16100 }, { "epoch": 2.3926927075597804, "grad_norm": 1.3938816785812378, "learning_rate": 7.6073072924402206e-06, "loss": 0.0872, "step": 16110 }, { "epoch": 2.3941779296004753, "grad_norm": 0.9284458160400391, "learning_rate": 7.605822070399526e-06, "loss": 0.0724, "step": 16120 }, { "epoch": 2.39566315164117, "grad_norm": 0.7141979932785034, "learning_rate": 7.6043368483588294e-06, "loss": 0.0826, "step": 16130 }, { "epoch": 2.3971483736818655, "grad_norm": 0.3661617934703827, "learning_rate": 7.602851626318135e-06, "loss": 0.0683, "step": 16140 }, { "epoch": 2.3986335957225604, "grad_norm": 1.011782169342041, "learning_rate": 7.601366404277441e-06, "loss": 0.0769, "step": 16150 }, { "epoch": 2.4001188177632558, "grad_norm": 0.7773118615150452, "learning_rate": 7.5998811822367444e-06, "loss": 0.0718, "step": 16160 }, { "epoch": 2.4016040398039507, "grad_norm": 1.1565062999725342, "learning_rate": 7.59839596019605e-06, "loss": 0.0524, "step": 16170 }, { "epoch": 2.403089261844646, "grad_norm": 0.8525496125221252, "learning_rate": 7.596910738155355e-06, "loss": 0.0888, "step": 16180 }, { "epoch": 2.404574483885341, "grad_norm": 0.6374387741088867, "learning_rate": 7.5954255161146594e-06, "loss": 0.0855, "step": 16190 }, { "epoch": 2.406059705926036, "grad_norm": 0.8507137298583984, "learning_rate": 7.593940294073965e-06, "loss": 0.0696, "step": 16200 }, { "epoch": 2.407544927966731, "grad_norm": 0.7530370950698853, "learning_rate": 7.59245507203327e-06, "loss": 0.0742, "step": 16210 }, { "epoch": 2.409030150007426, "grad_norm": 0.3654564917087555, "learning_rate": 7.590969849992574e-06, "loss": 0.0675, "step": 16220 }, { "epoch": 2.4105153720481214, "grad_norm": 0.8282201290130615, "learning_rate": 7.58948462795188e-06, "loss": 0.0599, "step": 16230 }, { "epoch": 2.4120005940888163, "grad_norm": 0.7527429461479187, "learning_rate": 7.587999405911184e-06, "loss": 0.0676, "step": 16240 }, { "epoch": 2.413485816129511, "grad_norm": 0.8395132422447205, "learning_rate": 7.586514183870489e-06, "loss": 0.0757, "step": 16250 }, { "epoch": 2.4149710381702065, "grad_norm": 1.281912088394165, "learning_rate": 7.585028961829795e-06, "loss": 0.0783, "step": 16260 }, { "epoch": 2.4164562602109014, "grad_norm": 1.0508184432983398, "learning_rate": 7.583543739789099e-06, "loss": 0.0864, "step": 16270 }, { "epoch": 2.4179414822515968, "grad_norm": 0.9719825983047485, "learning_rate": 7.582058517748404e-06, "loss": 0.0636, "step": 16280 }, { "epoch": 2.4194267042922917, "grad_norm": 0.9227995276451111, "learning_rate": 7.58057329570771e-06, "loss": 0.0874, "step": 16290 }, { "epoch": 2.4209119263329866, "grad_norm": 0.854083776473999, "learning_rate": 7.579088073667013e-06, "loss": 0.0755, "step": 16300 }, { "epoch": 2.422397148373682, "grad_norm": 0.48843663930892944, "learning_rate": 7.5776028516263186e-06, "loss": 0.0621, "step": 16310 }, { "epoch": 2.423882370414377, "grad_norm": 0.956684410572052, "learning_rate": 7.576117629585623e-06, "loss": 0.0575, "step": 16320 }, { "epoch": 2.425367592455072, "grad_norm": 1.1044747829437256, "learning_rate": 7.574632407544928e-06, "loss": 0.0726, "step": 16330 }, { "epoch": 2.426852814495767, "grad_norm": 0.8783396482467651, "learning_rate": 7.5731471855042336e-06, "loss": 0.0753, "step": 16340 }, { "epoch": 2.428338036536462, "grad_norm": 0.7214937210083008, "learning_rate": 7.571661963463538e-06, "loss": 0.0471, "step": 16350 }, { "epoch": 2.4298232585771573, "grad_norm": 1.0365811586380005, "learning_rate": 7.570176741422843e-06, "loss": 0.0741, "step": 16360 }, { "epoch": 2.431308480617852, "grad_norm": 0.936764657497406, "learning_rate": 7.5686915193821485e-06, "loss": 0.0779, "step": 16370 }, { "epoch": 2.4327937026585476, "grad_norm": 0.803390622138977, "learning_rate": 7.567206297341453e-06, "loss": 0.0761, "step": 16380 }, { "epoch": 2.4342789246992425, "grad_norm": 0.5410571098327637, "learning_rate": 7.565721075300758e-06, "loss": 0.0751, "step": 16390 }, { "epoch": 2.435764146739938, "grad_norm": 0.7092260122299194, "learning_rate": 7.5642358532600635e-06, "loss": 0.0671, "step": 16400 }, { "epoch": 2.4372493687806327, "grad_norm": 1.1456022262573242, "learning_rate": 7.562750631219368e-06, "loss": 0.0702, "step": 16410 }, { "epoch": 2.4387345908213276, "grad_norm": 1.1234076023101807, "learning_rate": 7.561265409178673e-06, "loss": 0.0696, "step": 16420 }, { "epoch": 2.440219812862023, "grad_norm": 1.503846526145935, "learning_rate": 7.559780187137977e-06, "loss": 0.0768, "step": 16430 }, { "epoch": 2.441705034902718, "grad_norm": 0.7656980752944946, "learning_rate": 7.558294965097283e-06, "loss": 0.0741, "step": 16440 }, { "epoch": 2.443190256943413, "grad_norm": 0.432203084230423, "learning_rate": 7.556809743056588e-06, "loss": 0.0712, "step": 16450 }, { "epoch": 2.444675478984108, "grad_norm": 0.9114720225334167, "learning_rate": 7.555324521015892e-06, "loss": 0.071, "step": 16460 }, { "epoch": 2.4461607010248034, "grad_norm": 0.6950905323028564, "learning_rate": 7.553839298975197e-06, "loss": 0.0632, "step": 16470 }, { "epoch": 2.4476459230654983, "grad_norm": 0.7350353598594666, "learning_rate": 7.552354076934502e-06, "loss": 0.0614, "step": 16480 }, { "epoch": 2.4491311451061932, "grad_norm": 0.4233296811580658, "learning_rate": 7.550868854893807e-06, "loss": 0.0779, "step": 16490 }, { "epoch": 2.4506163671468886, "grad_norm": 1.4698457717895508, "learning_rate": 7.549383632853112e-06, "loss": 0.0786, "step": 16500 }, { "epoch": 2.4521015891875835, "grad_norm": 0.9390078783035278, "learning_rate": 7.547898410812417e-06, "loss": 0.0748, "step": 16510 }, { "epoch": 2.453586811228279, "grad_norm": 0.9275726675987244, "learning_rate": 7.546413188771722e-06, "loss": 0.0713, "step": 16520 }, { "epoch": 2.4550720332689737, "grad_norm": 1.1759885549545288, "learning_rate": 7.544927966731027e-06, "loss": 0.0776, "step": 16530 }, { "epoch": 2.4565572553096686, "grad_norm": 0.8219086527824402, "learning_rate": 7.5434427446903315e-06, "loss": 0.066, "step": 16540 }, { "epoch": 2.458042477350364, "grad_norm": 0.8670978546142578, "learning_rate": 7.541957522649637e-06, "loss": 0.073, "step": 16550 }, { "epoch": 2.459527699391059, "grad_norm": 0.8667005896568298, "learning_rate": 7.540472300608942e-06, "loss": 0.0562, "step": 16560 }, { "epoch": 2.461012921431754, "grad_norm": 1.1121703386306763, "learning_rate": 7.5389870785682465e-06, "loss": 0.0686, "step": 16570 }, { "epoch": 2.462498143472449, "grad_norm": 0.5565051436424255, "learning_rate": 7.537501856527552e-06, "loss": 0.0613, "step": 16580 }, { "epoch": 2.463983365513144, "grad_norm": 0.5451401472091675, "learning_rate": 7.536016634486857e-06, "loss": 0.0525, "step": 16590 }, { "epoch": 2.4654685875538394, "grad_norm": 0.6481854915618896, "learning_rate": 7.534531412446161e-06, "loss": 0.0624, "step": 16600 }, { "epoch": 2.4669538095945343, "grad_norm": 0.8952411413192749, "learning_rate": 7.533046190405466e-06, "loss": 0.0599, "step": 16610 }, { "epoch": 2.4684390316352296, "grad_norm": 0.49602147936820984, "learning_rate": 7.531560968364772e-06, "loss": 0.0656, "step": 16620 }, { "epoch": 2.4699242536759245, "grad_norm": 1.612073540687561, "learning_rate": 7.530075746324076e-06, "loss": 0.0725, "step": 16630 }, { "epoch": 2.4714094757166194, "grad_norm": 1.0118762254714966, "learning_rate": 7.528590524283381e-06, "loss": 0.0682, "step": 16640 }, { "epoch": 2.4728946977573147, "grad_norm": 0.7182286977767944, "learning_rate": 7.527105302242685e-06, "loss": 0.0621, "step": 16650 }, { "epoch": 2.4743799197980096, "grad_norm": 0.4392257630825043, "learning_rate": 7.525620080201991e-06, "loss": 0.0548, "step": 16660 }, { "epoch": 2.475865141838705, "grad_norm": 0.3020372688770294, "learning_rate": 7.524134858161296e-06, "loss": 0.0607, "step": 16670 }, { "epoch": 2.4773503638794, "grad_norm": 1.2162078619003296, "learning_rate": 7.5226496361206e-06, "loss": 0.0564, "step": 16680 }, { "epoch": 2.4788355859200952, "grad_norm": 1.0460399389266968, "learning_rate": 7.521164414079906e-06, "loss": 0.0671, "step": 16690 }, { "epoch": 2.48032080796079, "grad_norm": 0.5017814636230469, "learning_rate": 7.519679192039211e-06, "loss": 0.0618, "step": 16700 }, { "epoch": 2.481806030001485, "grad_norm": 0.7603805065155029, "learning_rate": 7.518193969998515e-06, "loss": 0.076, "step": 16710 }, { "epoch": 2.4832912520421804, "grad_norm": 0.6417643427848816, "learning_rate": 7.516708747957821e-06, "loss": 0.085, "step": 16720 }, { "epoch": 2.4847764740828753, "grad_norm": 0.7605277895927429, "learning_rate": 7.515223525917125e-06, "loss": 0.0974, "step": 16730 }, { "epoch": 2.4862616961235706, "grad_norm": 0.7401385307312012, "learning_rate": 7.51373830387643e-06, "loss": 0.0659, "step": 16740 }, { "epoch": 2.4877469181642655, "grad_norm": 1.0905494689941406, "learning_rate": 7.512253081835736e-06, "loss": 0.0756, "step": 16750 }, { "epoch": 2.489232140204961, "grad_norm": 0.9249421954154968, "learning_rate": 7.510767859795039e-06, "loss": 0.0699, "step": 16760 }, { "epoch": 2.4907173622456558, "grad_norm": 1.4558953046798706, "learning_rate": 7.5092826377543445e-06, "loss": 0.073, "step": 16770 }, { "epoch": 2.4922025842863507, "grad_norm": 1.035396933555603, "learning_rate": 7.50779741571365e-06, "loss": 0.0673, "step": 16780 }, { "epoch": 2.493687806327046, "grad_norm": 0.9037249684333801, "learning_rate": 7.506312193672954e-06, "loss": 0.0732, "step": 16790 }, { "epoch": 2.495173028367741, "grad_norm": 0.8131252527236938, "learning_rate": 7.5048269716322595e-06, "loss": 0.0783, "step": 16800 }, { "epoch": 2.4966582504084363, "grad_norm": 1.5988869667053223, "learning_rate": 7.503341749591565e-06, "loss": 0.0664, "step": 16810 }, { "epoch": 2.498143472449131, "grad_norm": 0.6440235376358032, "learning_rate": 7.501856527550869e-06, "loss": 0.061, "step": 16820 }, { "epoch": 2.499628694489826, "grad_norm": 0.5629106760025024, "learning_rate": 7.5003713055101745e-06, "loss": 0.0504, "step": 16830 }, { "epoch": 2.5011139165305214, "grad_norm": 0.7344189286231995, "learning_rate": 7.498886083469479e-06, "loss": 0.0579, "step": 16840 }, { "epoch": 2.5025991385712163, "grad_norm": 1.2263416051864624, "learning_rate": 7.497400861428784e-06, "loss": 0.0693, "step": 16850 }, { "epoch": 2.5040843606119116, "grad_norm": 0.47904643416404724, "learning_rate": 7.4959156393880895e-06, "loss": 0.0691, "step": 16860 }, { "epoch": 2.5055695826526065, "grad_norm": 0.4501626789569855, "learning_rate": 7.494430417347394e-06, "loss": 0.0465, "step": 16870 }, { "epoch": 2.5070548046933014, "grad_norm": 0.9378479719161987, "learning_rate": 7.492945195306699e-06, "loss": 0.0804, "step": 16880 }, { "epoch": 2.508540026733997, "grad_norm": 0.6688227653503418, "learning_rate": 7.4914599732660045e-06, "loss": 0.066, "step": 16890 }, { "epoch": 2.5100252487746917, "grad_norm": 1.1443583965301514, "learning_rate": 7.489974751225308e-06, "loss": 0.0745, "step": 16900 }, { "epoch": 2.511510470815387, "grad_norm": 1.8711727857589722, "learning_rate": 7.488489529184614e-06, "loss": 0.0803, "step": 16910 }, { "epoch": 2.512995692856082, "grad_norm": 1.0903749465942383, "learning_rate": 7.4870043071439195e-06, "loss": 0.0523, "step": 16920 }, { "epoch": 2.514480914896777, "grad_norm": 0.7821912169456482, "learning_rate": 7.485519085103223e-06, "loss": 0.0839, "step": 16930 }, { "epoch": 2.515966136937472, "grad_norm": 1.8524274826049805, "learning_rate": 7.484033863062528e-06, "loss": 0.0845, "step": 16940 }, { "epoch": 2.517451358978167, "grad_norm": 0.7729329466819763, "learning_rate": 7.482548641021833e-06, "loss": 0.0607, "step": 16950 }, { "epoch": 2.5189365810188624, "grad_norm": 0.6898858547210693, "learning_rate": 7.481063418981138e-06, "loss": 0.0587, "step": 16960 }, { "epoch": 2.5204218030595573, "grad_norm": 0.9820473194122314, "learning_rate": 7.479578196940443e-06, "loss": 0.0607, "step": 16970 }, { "epoch": 2.521907025100252, "grad_norm": 0.8298704028129578, "learning_rate": 7.478092974899748e-06, "loss": 0.0607, "step": 16980 }, { "epoch": 2.5233922471409476, "grad_norm": 1.4189072847366333, "learning_rate": 7.476607752859053e-06, "loss": 0.0889, "step": 16990 }, { "epoch": 2.524877469181643, "grad_norm": 0.6076411008834839, "learning_rate": 7.475122530818358e-06, "loss": 0.0608, "step": 17000 }, { "epoch": 2.526362691222338, "grad_norm": 0.8957213163375854, "learning_rate": 7.473637308777663e-06, "loss": 0.0694, "step": 17010 }, { "epoch": 2.5278479132630327, "grad_norm": 0.5196698307991028, "learning_rate": 7.472152086736968e-06, "loss": 0.0545, "step": 17020 }, { "epoch": 2.529333135303728, "grad_norm": 1.1449114084243774, "learning_rate": 7.470666864696273e-06, "loss": 0.0698, "step": 17030 }, { "epoch": 2.530818357344423, "grad_norm": 0.8476200699806213, "learning_rate": 7.469181642655578e-06, "loss": 0.0467, "step": 17040 }, { "epoch": 2.5323035793851183, "grad_norm": 1.2697662115097046, "learning_rate": 7.467696420614883e-06, "loss": 0.0828, "step": 17050 }, { "epoch": 2.533788801425813, "grad_norm": 0.7003705501556396, "learning_rate": 7.466211198574187e-06, "loss": 0.0812, "step": 17060 }, { "epoch": 2.535274023466508, "grad_norm": 1.0060502290725708, "learning_rate": 7.464725976533492e-06, "loss": 0.0668, "step": 17070 }, { "epoch": 2.5367592455072034, "grad_norm": 0.6963827610015869, "learning_rate": 7.463240754492797e-06, "loss": 0.0587, "step": 17080 }, { "epoch": 2.5382444675478983, "grad_norm": 1.017677903175354, "learning_rate": 7.461755532452102e-06, "loss": 0.07, "step": 17090 }, { "epoch": 2.5397296895885937, "grad_norm": 0.9492692947387695, "learning_rate": 7.460270310411407e-06, "loss": 0.0635, "step": 17100 }, { "epoch": 2.5412149116292886, "grad_norm": 0.9661943316459656, "learning_rate": 7.458785088370712e-06, "loss": 0.0644, "step": 17110 }, { "epoch": 2.5427001336699835, "grad_norm": 1.2824933528900146, "learning_rate": 7.457299866330017e-06, "loss": 0.0802, "step": 17120 }, { "epoch": 2.544185355710679, "grad_norm": 0.9125545620918274, "learning_rate": 7.455814644289322e-06, "loss": 0.0685, "step": 17130 }, { "epoch": 2.5456705777513737, "grad_norm": 1.1388263702392578, "learning_rate": 7.454329422248627e-06, "loss": 0.0819, "step": 17140 }, { "epoch": 2.547155799792069, "grad_norm": 0.954296886920929, "learning_rate": 7.452844200207932e-06, "loss": 0.0806, "step": 17150 }, { "epoch": 2.548641021832764, "grad_norm": 0.6064457893371582, "learning_rate": 7.451358978167237e-06, "loss": 0.0867, "step": 17160 }, { "epoch": 2.550126243873459, "grad_norm": 0.5995849370956421, "learning_rate": 7.449873756126541e-06, "loss": 0.0672, "step": 17170 }, { "epoch": 2.551611465914154, "grad_norm": 0.3621232211589813, "learning_rate": 7.448388534085847e-06, "loss": 0.062, "step": 17180 }, { "epoch": 2.553096687954849, "grad_norm": 0.40740764141082764, "learning_rate": 7.446903312045152e-06, "loss": 0.0623, "step": 17190 }, { "epoch": 2.5545819099955445, "grad_norm": 0.8544617891311646, "learning_rate": 7.445418090004456e-06, "loss": 0.0758, "step": 17200 }, { "epoch": 2.5560671320362394, "grad_norm": 1.7287043333053589, "learning_rate": 7.443932867963762e-06, "loss": 0.0535, "step": 17210 }, { "epoch": 2.5575523540769343, "grad_norm": 0.7510034441947937, "learning_rate": 7.442447645923067e-06, "loss": 0.083, "step": 17220 }, { "epoch": 2.5590375761176296, "grad_norm": 0.5877035856246948, "learning_rate": 7.4409624238823705e-06, "loss": 0.0723, "step": 17230 }, { "epoch": 2.5605227981583245, "grad_norm": 1.369896411895752, "learning_rate": 7.439477201841676e-06, "loss": 0.0693, "step": 17240 }, { "epoch": 2.56200802019902, "grad_norm": 1.503385305404663, "learning_rate": 7.43799197980098e-06, "loss": 0.0637, "step": 17250 }, { "epoch": 2.5634932422397148, "grad_norm": 0.6620815992355347, "learning_rate": 7.4365067577602855e-06, "loss": 0.0572, "step": 17260 }, { "epoch": 2.5649784642804097, "grad_norm": 1.0107345581054688, "learning_rate": 7.435021535719591e-06, "loss": 0.0685, "step": 17270 }, { "epoch": 2.566463686321105, "grad_norm": 0.6963445544242859, "learning_rate": 7.433536313678895e-06, "loss": 0.0642, "step": 17280 }, { "epoch": 2.5679489083618003, "grad_norm": 0.7158843278884888, "learning_rate": 7.4320510916382005e-06, "loss": 0.0879, "step": 17290 }, { "epoch": 2.5694341304024952, "grad_norm": 0.49214211106300354, "learning_rate": 7.430565869597506e-06, "loss": 0.0556, "step": 17300 }, { "epoch": 2.57091935244319, "grad_norm": 0.5516762733459473, "learning_rate": 7.42908064755681e-06, "loss": 0.0548, "step": 17310 }, { "epoch": 2.5724045744838855, "grad_norm": 0.8607064485549927, "learning_rate": 7.4275954255161155e-06, "loss": 0.078, "step": 17320 }, { "epoch": 2.5738897965245804, "grad_norm": 0.9706307649612427, "learning_rate": 7.426110203475421e-06, "loss": 0.0552, "step": 17330 }, { "epoch": 2.5753750185652757, "grad_norm": 0.6151372790336609, "learning_rate": 7.424624981434725e-06, "loss": 0.0835, "step": 17340 }, { "epoch": 2.5768602406059706, "grad_norm": 1.603258490562439, "learning_rate": 7.4231397593940305e-06, "loss": 0.0943, "step": 17350 }, { "epoch": 2.5783454626466655, "grad_norm": 0.7356602549552917, "learning_rate": 7.421654537353334e-06, "loss": 0.0783, "step": 17360 }, { "epoch": 2.579830684687361, "grad_norm": 0.5023389458656311, "learning_rate": 7.420169315312639e-06, "loss": 0.0707, "step": 17370 }, { "epoch": 2.5813159067280558, "grad_norm": 0.5624324679374695, "learning_rate": 7.4186840932719454e-06, "loss": 0.0606, "step": 17380 }, { "epoch": 2.582801128768751, "grad_norm": 0.6219521164894104, "learning_rate": 7.417198871231249e-06, "loss": 0.0768, "step": 17390 }, { "epoch": 2.584286350809446, "grad_norm": 0.8075690865516663, "learning_rate": 7.415713649190554e-06, "loss": 0.0646, "step": 17400 }, { "epoch": 2.585771572850141, "grad_norm": 0.8664126396179199, "learning_rate": 7.41422842714986e-06, "loss": 0.0548, "step": 17410 }, { "epoch": 2.5872567948908363, "grad_norm": 0.8029124736785889, "learning_rate": 7.412743205109164e-06, "loss": 0.062, "step": 17420 }, { "epoch": 2.588742016931531, "grad_norm": 0.9638460278511047, "learning_rate": 7.411257983068469e-06, "loss": 0.0816, "step": 17430 }, { "epoch": 2.5902272389722265, "grad_norm": 0.6284675002098083, "learning_rate": 7.409772761027775e-06, "loss": 0.0558, "step": 17440 }, { "epoch": 2.5917124610129214, "grad_norm": 0.517883837223053, "learning_rate": 7.408287538987079e-06, "loss": 0.0791, "step": 17450 }, { "epoch": 2.5931976830536163, "grad_norm": 0.49655789136886597, "learning_rate": 7.406802316946384e-06, "loss": 0.0543, "step": 17460 }, { "epoch": 2.5946829050943117, "grad_norm": 0.5364641547203064, "learning_rate": 7.405317094905689e-06, "loss": 0.076, "step": 17470 }, { "epoch": 2.5961681271350066, "grad_norm": 0.2534388601779938, "learning_rate": 7.403831872864994e-06, "loss": 0.0625, "step": 17480 }, { "epoch": 2.597653349175702, "grad_norm": 0.8373918533325195, "learning_rate": 7.402346650824299e-06, "loss": 0.0701, "step": 17490 }, { "epoch": 2.599138571216397, "grad_norm": 0.8206072449684143, "learning_rate": 7.400861428783604e-06, "loss": 0.082, "step": 17500 }, { "epoch": 2.6006237932570917, "grad_norm": 0.7630594968795776, "learning_rate": 7.399376206742909e-06, "loss": 0.0852, "step": 17510 }, { "epoch": 2.602109015297787, "grad_norm": 1.250341773033142, "learning_rate": 7.397890984702214e-06, "loss": 0.0644, "step": 17520 }, { "epoch": 2.603594237338482, "grad_norm": 0.9379168152809143, "learning_rate": 7.396405762661518e-06, "loss": 0.0767, "step": 17530 }, { "epoch": 2.6050794593791773, "grad_norm": 0.8298720121383667, "learning_rate": 7.394920540620823e-06, "loss": 0.0751, "step": 17540 }, { "epoch": 2.606564681419872, "grad_norm": 1.1554667949676514, "learning_rate": 7.393435318580129e-06, "loss": 0.0617, "step": 17550 }, { "epoch": 2.608049903460567, "grad_norm": 1.0690228939056396, "learning_rate": 7.391950096539433e-06, "loss": 0.0686, "step": 17560 }, { "epoch": 2.6095351255012624, "grad_norm": 0.49055570363998413, "learning_rate": 7.390464874498738e-06, "loss": 0.0869, "step": 17570 }, { "epoch": 2.6110203475419578, "grad_norm": 0.8171910643577576, "learning_rate": 7.388979652458043e-06, "loss": 0.0636, "step": 17580 }, { "epoch": 2.6125055695826527, "grad_norm": 0.917361855506897, "learning_rate": 7.387494430417348e-06, "loss": 0.0772, "step": 17590 }, { "epoch": 2.6139907916233476, "grad_norm": 0.653695285320282, "learning_rate": 7.386009208376653e-06, "loss": 0.0572, "step": 17600 }, { "epoch": 2.615476013664043, "grad_norm": 0.48957398533821106, "learning_rate": 7.384523986335958e-06, "loss": 0.0906, "step": 17610 }, { "epoch": 2.616961235704738, "grad_norm": 1.2944921255111694, "learning_rate": 7.383038764295263e-06, "loss": 0.0627, "step": 17620 }, { "epoch": 2.618446457745433, "grad_norm": 0.4532714784145355, "learning_rate": 7.381553542254568e-06, "loss": 0.0544, "step": 17630 }, { "epoch": 2.619931679786128, "grad_norm": 0.8860215544700623, "learning_rate": 7.3800683202138726e-06, "loss": 0.0554, "step": 17640 }, { "epoch": 2.621416901826823, "grad_norm": 0.8389759659767151, "learning_rate": 7.378583098173178e-06, "loss": 0.0659, "step": 17650 }, { "epoch": 2.6229021238675183, "grad_norm": 0.4027443826198578, "learning_rate": 7.377097876132483e-06, "loss": 0.0703, "step": 17660 }, { "epoch": 2.624387345908213, "grad_norm": 1.314283847808838, "learning_rate": 7.3756126540917876e-06, "loss": 0.0805, "step": 17670 }, { "epoch": 2.6258725679489086, "grad_norm": 0.6991994976997375, "learning_rate": 7.374127432051093e-06, "loss": 0.0639, "step": 17680 }, { "epoch": 2.6273577899896035, "grad_norm": 0.8641003966331482, "learning_rate": 7.3726422100103964e-06, "loss": 0.0895, "step": 17690 }, { "epoch": 2.6288430120302984, "grad_norm": 0.6983335614204407, "learning_rate": 7.371156987969702e-06, "loss": 0.0591, "step": 17700 }, { "epoch": 2.6303282340709937, "grad_norm": 0.9575406908988953, "learning_rate": 7.369671765929007e-06, "loss": 0.0835, "step": 17710 }, { "epoch": 2.6318134561116886, "grad_norm": 0.5745441317558289, "learning_rate": 7.3681865438883114e-06, "loss": 0.0633, "step": 17720 }, { "epoch": 2.633298678152384, "grad_norm": 0.5129587650299072, "learning_rate": 7.366701321847617e-06, "loss": 0.057, "step": 17730 }, { "epoch": 2.634783900193079, "grad_norm": 0.9663777947425842, "learning_rate": 7.365216099806922e-06, "loss": 0.0712, "step": 17740 }, { "epoch": 2.6362691222337737, "grad_norm": 0.8724805116653442, "learning_rate": 7.363730877766226e-06, "loss": 0.0615, "step": 17750 }, { "epoch": 2.637754344274469, "grad_norm": 1.2762563228607178, "learning_rate": 7.362245655725532e-06, "loss": 0.0724, "step": 17760 }, { "epoch": 2.639239566315164, "grad_norm": 1.088175654411316, "learning_rate": 7.360760433684836e-06, "loss": 0.0636, "step": 17770 }, { "epoch": 2.6407247883558593, "grad_norm": 1.3335635662078857, "learning_rate": 7.359275211644141e-06, "loss": 0.0685, "step": 17780 }, { "epoch": 2.6422100103965542, "grad_norm": 0.5454977750778198, "learning_rate": 7.357789989603447e-06, "loss": 0.0582, "step": 17790 }, { "epoch": 2.643695232437249, "grad_norm": 0.5082247853279114, "learning_rate": 7.356304767562751e-06, "loss": 0.0589, "step": 17800 }, { "epoch": 2.6451804544779445, "grad_norm": 1.032354474067688, "learning_rate": 7.354819545522056e-06, "loss": 0.0696, "step": 17810 }, { "epoch": 2.6466656765186394, "grad_norm": 0.808302104473114, "learning_rate": 7.353334323481362e-06, "loss": 0.0766, "step": 17820 }, { "epoch": 2.6481508985593347, "grad_norm": 0.6936222314834595, "learning_rate": 7.351849101440665e-06, "loss": 0.0599, "step": 17830 }, { "epoch": 2.6496361206000296, "grad_norm": 0.5071762800216675, "learning_rate": 7.3503638793999706e-06, "loss": 0.0645, "step": 17840 }, { "epoch": 2.6511213426407245, "grad_norm": 0.5135564208030701, "learning_rate": 7.348878657359277e-06, "loss": 0.0625, "step": 17850 }, { "epoch": 2.65260656468142, "grad_norm": 1.0177409648895264, "learning_rate": 7.34739343531858e-06, "loss": 0.0707, "step": 17860 }, { "epoch": 2.654091786722115, "grad_norm": 0.4974595904350281, "learning_rate": 7.3459082132778856e-06, "loss": 0.065, "step": 17870 }, { "epoch": 2.65557700876281, "grad_norm": 0.5838668346405029, "learning_rate": 7.34442299123719e-06, "loss": 0.07, "step": 17880 }, { "epoch": 2.657062230803505, "grad_norm": 0.531019389629364, "learning_rate": 7.342937769196495e-06, "loss": 0.0636, "step": 17890 }, { "epoch": 2.6585474528442004, "grad_norm": 0.6077486276626587, "learning_rate": 7.3414525471558005e-06, "loss": 0.0571, "step": 17900 }, { "epoch": 2.6600326748848953, "grad_norm": 0.7069369554519653, "learning_rate": 7.339967325115105e-06, "loss": 0.0712, "step": 17910 }, { "epoch": 2.6615178969255906, "grad_norm": 0.5238094329833984, "learning_rate": 7.33848210307441e-06, "loss": 0.0733, "step": 17920 }, { "epoch": 2.6630031189662855, "grad_norm": 0.4017098546028137, "learning_rate": 7.3369968810337155e-06, "loss": 0.0559, "step": 17930 }, { "epoch": 2.6644883410069804, "grad_norm": 0.3973262906074524, "learning_rate": 7.33551165899302e-06, "loss": 0.0629, "step": 17940 }, { "epoch": 2.6659735630476757, "grad_norm": 0.6550332903862, "learning_rate": 7.334026436952325e-06, "loss": 0.0641, "step": 17950 }, { "epoch": 2.6674587850883706, "grad_norm": 0.3614233136177063, "learning_rate": 7.3325412149116305e-06, "loss": 0.0517, "step": 17960 }, { "epoch": 2.668944007129066, "grad_norm": 0.5007290840148926, "learning_rate": 7.331055992870935e-06, "loss": 0.0672, "step": 17970 }, { "epoch": 2.670429229169761, "grad_norm": 0.8679770231246948, "learning_rate": 7.32957077083024e-06, "loss": 0.0632, "step": 17980 }, { "epoch": 2.671914451210456, "grad_norm": 0.9736119508743286, "learning_rate": 7.328085548789544e-06, "loss": 0.065, "step": 17990 }, { "epoch": 2.673399673251151, "grad_norm": 0.6604450941085815, "learning_rate": 7.326600326748849e-06, "loss": 0.0847, "step": 18000 }, { "epoch": 2.674884895291846, "grad_norm": 1.0800334215164185, "learning_rate": 7.325115104708154e-06, "loss": 0.0903, "step": 18010 }, { "epoch": 2.6763701173325414, "grad_norm": 0.4174042344093323, "learning_rate": 7.323629882667459e-06, "loss": 0.0554, "step": 18020 }, { "epoch": 2.6778553393732363, "grad_norm": 0.7919684648513794, "learning_rate": 7.322144660626764e-06, "loss": 0.0537, "step": 18030 }, { "epoch": 2.679340561413931, "grad_norm": 0.6713187098503113, "learning_rate": 7.320659438586069e-06, "loss": 0.0668, "step": 18040 }, { "epoch": 2.6808257834546265, "grad_norm": 1.1003612279891968, "learning_rate": 7.319174216545374e-06, "loss": 0.0745, "step": 18050 }, { "epoch": 2.6823110054953214, "grad_norm": 1.0887432098388672, "learning_rate": 7.317688994504679e-06, "loss": 0.0697, "step": 18060 }, { "epoch": 2.6837962275360168, "grad_norm": 0.7097349762916565, "learning_rate": 7.316203772463984e-06, "loss": 0.067, "step": 18070 }, { "epoch": 2.6852814495767117, "grad_norm": 0.8918297290802002, "learning_rate": 7.314718550423289e-06, "loss": 0.0597, "step": 18080 }, { "epoch": 2.6867666716174066, "grad_norm": 0.39780157804489136, "learning_rate": 7.313233328382594e-06, "loss": 0.0617, "step": 18090 }, { "epoch": 2.688251893658102, "grad_norm": 0.7399517893791199, "learning_rate": 7.3117481063418985e-06, "loss": 0.0701, "step": 18100 }, { "epoch": 2.689737115698797, "grad_norm": 0.567972719669342, "learning_rate": 7.310262884301204e-06, "loss": 0.0847, "step": 18110 }, { "epoch": 2.691222337739492, "grad_norm": 0.699404239654541, "learning_rate": 7.308777662260509e-06, "loss": 0.0684, "step": 18120 }, { "epoch": 2.692707559780187, "grad_norm": 1.000461459159851, "learning_rate": 7.307292440219813e-06, "loss": 0.0784, "step": 18130 }, { "epoch": 2.694192781820882, "grad_norm": 0.6232538223266602, "learning_rate": 7.305807218179119e-06, "loss": 0.0501, "step": 18140 }, { "epoch": 2.6956780038615773, "grad_norm": 0.5169805288314819, "learning_rate": 7.304321996138424e-06, "loss": 0.0608, "step": 18150 }, { "epoch": 2.6971632259022726, "grad_norm": 1.0496106147766113, "learning_rate": 7.302836774097728e-06, "loss": 0.0598, "step": 18160 }, { "epoch": 2.6986484479429675, "grad_norm": 0.6121331453323364, "learning_rate": 7.301351552057033e-06, "loss": 0.0544, "step": 18170 }, { "epoch": 2.7001336699836624, "grad_norm": 0.779712975025177, "learning_rate": 7.299866330016337e-06, "loss": 0.0768, "step": 18180 }, { "epoch": 2.701618892024358, "grad_norm": 0.36729615926742554, "learning_rate": 7.298381107975643e-06, "loss": 0.0643, "step": 18190 }, { "epoch": 2.7031041140650527, "grad_norm": 0.8492846488952637, "learning_rate": 7.296895885934948e-06, "loss": 0.0681, "step": 18200 }, { "epoch": 2.704589336105748, "grad_norm": 1.0522595643997192, "learning_rate": 7.295410663894252e-06, "loss": 0.0716, "step": 18210 }, { "epoch": 2.706074558146443, "grad_norm": 1.1811456680297852, "learning_rate": 7.293925441853558e-06, "loss": 0.0666, "step": 18220 }, { "epoch": 2.707559780187138, "grad_norm": 1.1208118200302124, "learning_rate": 7.292440219812863e-06, "loss": 0.0618, "step": 18230 }, { "epoch": 2.709045002227833, "grad_norm": 0.8347768187522888, "learning_rate": 7.290954997772167e-06, "loss": 0.069, "step": 18240 }, { "epoch": 2.710530224268528, "grad_norm": 0.9092279672622681, "learning_rate": 7.289469775731473e-06, "loss": 0.0555, "step": 18250 }, { "epoch": 2.7120154463092234, "grad_norm": 1.1388651132583618, "learning_rate": 7.287984553690778e-06, "loss": 0.0818, "step": 18260 }, { "epoch": 2.7135006683499183, "grad_norm": 0.8529207110404968, "learning_rate": 7.286499331650082e-06, "loss": 0.0424, "step": 18270 }, { "epoch": 2.714985890390613, "grad_norm": 1.3158096075057983, "learning_rate": 7.285014109609388e-06, "loss": 0.0847, "step": 18280 }, { "epoch": 2.7164711124313086, "grad_norm": 1.0572584867477417, "learning_rate": 7.283528887568691e-06, "loss": 0.0485, "step": 18290 }, { "epoch": 2.7179563344720035, "grad_norm": 0.9866123199462891, "learning_rate": 7.2820436655279965e-06, "loss": 0.063, "step": 18300 }, { "epoch": 2.719441556512699, "grad_norm": 0.6498306393623352, "learning_rate": 7.280558443487303e-06, "loss": 0.076, "step": 18310 }, { "epoch": 2.7209267785533937, "grad_norm": 0.4041306972503662, "learning_rate": 7.279073221446606e-06, "loss": 0.0619, "step": 18320 }, { "epoch": 2.7224120005940886, "grad_norm": 0.3998267650604248, "learning_rate": 7.2775879994059115e-06, "loss": 0.0594, "step": 18330 }, { "epoch": 2.723897222634784, "grad_norm": 0.4064011573791504, "learning_rate": 7.276102777365217e-06, "loss": 0.0601, "step": 18340 }, { "epoch": 2.725382444675479, "grad_norm": 0.5988971590995789, "learning_rate": 7.274617555324521e-06, "loss": 0.0691, "step": 18350 }, { "epoch": 2.726867666716174, "grad_norm": 1.0736236572265625, "learning_rate": 7.2731323332838265e-06, "loss": 0.0621, "step": 18360 }, { "epoch": 2.728352888756869, "grad_norm": 1.3379560708999634, "learning_rate": 7.271647111243132e-06, "loss": 0.0683, "step": 18370 }, { "epoch": 2.729838110797564, "grad_norm": 0.5252112150192261, "learning_rate": 7.270161889202436e-06, "loss": 0.055, "step": 18380 }, { "epoch": 2.7313233328382593, "grad_norm": 1.0529993772506714, "learning_rate": 7.2686766671617415e-06, "loss": 0.0797, "step": 18390 }, { "epoch": 2.7328085548789542, "grad_norm": 1.2092722654342651, "learning_rate": 7.267191445121046e-06, "loss": 0.0717, "step": 18400 }, { "epoch": 2.7342937769196496, "grad_norm": 0.4357248842716217, "learning_rate": 7.265706223080351e-06, "loss": 0.0524, "step": 18410 }, { "epoch": 2.7357789989603445, "grad_norm": 0.6754854917526245, "learning_rate": 7.2642210010396565e-06, "loss": 0.0754, "step": 18420 }, { "epoch": 2.7372642210010394, "grad_norm": 1.5296664237976074, "learning_rate": 7.262735778998961e-06, "loss": 0.0769, "step": 18430 }, { "epoch": 2.7387494430417347, "grad_norm": 0.5845615267753601, "learning_rate": 7.261250556958266e-06, "loss": 0.0528, "step": 18440 }, { "epoch": 2.74023466508243, "grad_norm": 0.5885583758354187, "learning_rate": 7.2597653349175715e-06, "loss": 0.0692, "step": 18450 }, { "epoch": 2.741719887123125, "grad_norm": 1.1398755311965942, "learning_rate": 7.258280112876875e-06, "loss": 0.0751, "step": 18460 }, { "epoch": 2.74320510916382, "grad_norm": 1.1153408288955688, "learning_rate": 7.25679489083618e-06, "loss": 0.0528, "step": 18470 }, { "epoch": 2.744690331204515, "grad_norm": 0.9327086210250854, "learning_rate": 7.255309668795486e-06, "loss": 0.0748, "step": 18480 }, { "epoch": 2.74617555324521, "grad_norm": 0.783569872379303, "learning_rate": 7.25382444675479e-06, "loss": 0.0803, "step": 18490 }, { "epoch": 2.7476607752859055, "grad_norm": 0.7508075833320618, "learning_rate": 7.252339224714095e-06, "loss": 0.052, "step": 18500 }, { "epoch": 2.7491459973266004, "grad_norm": 0.318877249956131, "learning_rate": 7.2508540026734e-06, "loss": 0.0644, "step": 18510 }, { "epoch": 2.7506312193672953, "grad_norm": 0.6378808617591858, "learning_rate": 7.249368780632705e-06, "loss": 0.0547, "step": 18520 }, { "epoch": 2.7521164414079906, "grad_norm": 1.1061114072799683, "learning_rate": 7.24788355859201e-06, "loss": 0.0833, "step": 18530 }, { "epoch": 2.7536016634486855, "grad_norm": 1.0416938066482544, "learning_rate": 7.246398336551315e-06, "loss": 0.0604, "step": 18540 }, { "epoch": 2.755086885489381, "grad_norm": 0.9235290884971619, "learning_rate": 7.24491311451062e-06, "loss": 0.0627, "step": 18550 }, { "epoch": 2.7565721075300758, "grad_norm": 0.8502004146575928, "learning_rate": 7.243427892469925e-06, "loss": 0.0575, "step": 18560 }, { "epoch": 2.7580573295707707, "grad_norm": 0.780972421169281, "learning_rate": 7.24194267042923e-06, "loss": 0.0748, "step": 18570 }, { "epoch": 2.759542551611466, "grad_norm": 0.6407886743545532, "learning_rate": 7.240457448388535e-06, "loss": 0.0777, "step": 18580 }, { "epoch": 2.761027773652161, "grad_norm": 0.5302232503890991, "learning_rate": 7.23897222634784e-06, "loss": 0.0513, "step": 18590 }, { "epoch": 2.7625129956928562, "grad_norm": 0.8282850980758667, "learning_rate": 7.237487004307145e-06, "loss": 0.0683, "step": 18600 }, { "epoch": 2.763998217733551, "grad_norm": 0.811703085899353, "learning_rate": 7.23600178226645e-06, "loss": 0.0624, "step": 18610 }, { "epoch": 2.765483439774246, "grad_norm": 0.7585626840591431, "learning_rate": 7.234516560225754e-06, "loss": 0.0698, "step": 18620 }, { "epoch": 2.7669686618149414, "grad_norm": 0.6847484707832336, "learning_rate": 7.233031338185059e-06, "loss": 0.0661, "step": 18630 }, { "epoch": 2.7684538838556363, "grad_norm": 0.8766204714775085, "learning_rate": 7.231546116144364e-06, "loss": 0.0804, "step": 18640 }, { "epoch": 2.7699391058963316, "grad_norm": 1.2006505727767944, "learning_rate": 7.230060894103669e-06, "loss": 0.0816, "step": 18650 }, { "epoch": 2.7714243279370265, "grad_norm": 1.2238951921463013, "learning_rate": 7.228575672062974e-06, "loss": 0.0765, "step": 18660 }, { "epoch": 2.7729095499777214, "grad_norm": 0.6546773910522461, "learning_rate": 7.227090450022279e-06, "loss": 0.0775, "step": 18670 }, { "epoch": 2.7743947720184168, "grad_norm": 0.8361360430717468, "learning_rate": 7.225605227981584e-06, "loss": 0.0624, "step": 18680 }, { "epoch": 2.7758799940591117, "grad_norm": 0.6821973323822021, "learning_rate": 7.224120005940889e-06, "loss": 0.0617, "step": 18690 }, { "epoch": 2.777365216099807, "grad_norm": 0.9379958510398865, "learning_rate": 7.222634783900193e-06, "loss": 0.0695, "step": 18700 }, { "epoch": 2.778850438140502, "grad_norm": 1.1127417087554932, "learning_rate": 7.221149561859499e-06, "loss": 0.0648, "step": 18710 }, { "epoch": 2.780335660181197, "grad_norm": 0.6109861135482788, "learning_rate": 7.219664339818804e-06, "loss": 0.0694, "step": 18720 }, { "epoch": 2.781820882221892, "grad_norm": 0.9905062317848206, "learning_rate": 7.218179117778108e-06, "loss": 0.0667, "step": 18730 }, { "epoch": 2.7833061042625875, "grad_norm": 0.9200990796089172, "learning_rate": 7.216693895737414e-06, "loss": 0.0533, "step": 18740 }, { "epoch": 2.7847913263032824, "grad_norm": 0.9881905317306519, "learning_rate": 7.215208673696719e-06, "loss": 0.0768, "step": 18750 }, { "epoch": 2.7862765483439773, "grad_norm": 0.7869969606399536, "learning_rate": 7.2137234516560225e-06, "loss": 0.0823, "step": 18760 }, { "epoch": 2.7877617703846727, "grad_norm": 0.6413224935531616, "learning_rate": 7.212238229615328e-06, "loss": 0.0477, "step": 18770 }, { "epoch": 2.7892469924253676, "grad_norm": 1.2393912076950073, "learning_rate": 7.210753007574634e-06, "loss": 0.0602, "step": 18780 }, { "epoch": 2.790732214466063, "grad_norm": 0.6673420667648315, "learning_rate": 7.2092677855339375e-06, "loss": 0.06, "step": 18790 }, { "epoch": 2.792217436506758, "grad_norm": 1.4284379482269287, "learning_rate": 7.207782563493243e-06, "loss": 0.0562, "step": 18800 }, { "epoch": 2.7937026585474527, "grad_norm": 0.855707585811615, "learning_rate": 7.206297341452547e-06, "loss": 0.0704, "step": 18810 }, { "epoch": 2.795187880588148, "grad_norm": 1.2716715335845947, "learning_rate": 7.2048121194118525e-06, "loss": 0.0804, "step": 18820 }, { "epoch": 2.796673102628843, "grad_norm": 1.4810649156570435, "learning_rate": 7.203326897371158e-06, "loss": 0.0635, "step": 18830 }, { "epoch": 2.7981583246695383, "grad_norm": 0.6314173340797424, "learning_rate": 7.201841675330462e-06, "loss": 0.0558, "step": 18840 }, { "epoch": 2.799643546710233, "grad_norm": 0.5451415181159973, "learning_rate": 7.2003564532897675e-06, "loss": 0.0534, "step": 18850 }, { "epoch": 2.801128768750928, "grad_norm": 1.1898857355117798, "learning_rate": 7.198871231249073e-06, "loss": 0.0718, "step": 18860 }, { "epoch": 2.8026139907916234, "grad_norm": 0.6038246750831604, "learning_rate": 7.197386009208377e-06, "loss": 0.0625, "step": 18870 }, { "epoch": 2.8040992128323183, "grad_norm": 1.0258615016937256, "learning_rate": 7.1959007871676825e-06, "loss": 0.0971, "step": 18880 }, { "epoch": 2.8055844348730137, "grad_norm": 0.45901933312416077, "learning_rate": 7.194415565126988e-06, "loss": 0.041, "step": 18890 }, { "epoch": 2.8070696569137086, "grad_norm": 0.6708415746688843, "learning_rate": 7.192930343086292e-06, "loss": 0.0704, "step": 18900 }, { "epoch": 2.8085548789544035, "grad_norm": 0.7538895010948181, "learning_rate": 7.1914451210455974e-06, "loss": 0.0905, "step": 18910 }, { "epoch": 2.810040100995099, "grad_norm": 1.0020769834518433, "learning_rate": 7.189959899004901e-06, "loss": 0.0682, "step": 18920 }, { "epoch": 2.8115253230357937, "grad_norm": 0.47720983624458313, "learning_rate": 7.188474676964206e-06, "loss": 0.0661, "step": 18930 }, { "epoch": 2.813010545076489, "grad_norm": 0.21680277585983276, "learning_rate": 7.186989454923512e-06, "loss": 0.0711, "step": 18940 }, { "epoch": 2.814495767117184, "grad_norm": 0.8336321711540222, "learning_rate": 7.185504232882816e-06, "loss": 0.055, "step": 18950 }, { "epoch": 2.815980989157879, "grad_norm": 0.4809732735157013, "learning_rate": 7.184019010842121e-06, "loss": 0.0636, "step": 18960 }, { "epoch": 2.817466211198574, "grad_norm": 0.7873929738998413, "learning_rate": 7.182533788801427e-06, "loss": 0.0749, "step": 18970 }, { "epoch": 2.818951433239269, "grad_norm": 1.0418553352355957, "learning_rate": 7.181048566760731e-06, "loss": 0.0742, "step": 18980 }, { "epoch": 2.8204366552799645, "grad_norm": 0.7662057280540466, "learning_rate": 7.179563344720036e-06, "loss": 0.0733, "step": 18990 }, { "epoch": 2.8219218773206594, "grad_norm": 1.117795705795288, "learning_rate": 7.178078122679342e-06, "loss": 0.068, "step": 19000 }, { "epoch": 2.8234070993613543, "grad_norm": 0.7875536680221558, "learning_rate": 7.176592900638646e-06, "loss": 0.0709, "step": 19010 }, { "epoch": 2.8248923214020496, "grad_norm": 0.7272695302963257, "learning_rate": 7.175107678597951e-06, "loss": 0.0655, "step": 19020 }, { "epoch": 2.826377543442745, "grad_norm": 0.6126848459243774, "learning_rate": 7.173622456557256e-06, "loss": 0.0745, "step": 19030 }, { "epoch": 2.82786276548344, "grad_norm": 0.6373000741004944, "learning_rate": 7.172137234516561e-06, "loss": 0.0835, "step": 19040 }, { "epoch": 2.8293479875241347, "grad_norm": 0.6726350784301758, "learning_rate": 7.170652012475866e-06, "loss": 0.0589, "step": 19050 }, { "epoch": 2.83083320956483, "grad_norm": 0.8001404404640198, "learning_rate": 7.16916679043517e-06, "loss": 0.0541, "step": 19060 }, { "epoch": 2.832318431605525, "grad_norm": 0.4326551556587219, "learning_rate": 7.167681568394476e-06, "loss": 0.05, "step": 19070 }, { "epoch": 2.8338036536462203, "grad_norm": 0.7540894150733948, "learning_rate": 7.166196346353781e-06, "loss": 0.0643, "step": 19080 }, { "epoch": 2.8352888756869152, "grad_norm": 1.2359886169433594, "learning_rate": 7.164711124313085e-06, "loss": 0.0773, "step": 19090 }, { "epoch": 2.83677409772761, "grad_norm": 0.692885160446167, "learning_rate": 7.16322590227239e-06, "loss": 0.0625, "step": 19100 }, { "epoch": 2.8382593197683055, "grad_norm": 1.0626580715179443, "learning_rate": 7.1617406802316954e-06, "loss": 0.05, "step": 19110 }, { "epoch": 2.8397445418090004, "grad_norm": 0.8324934840202332, "learning_rate": 7.160255458191e-06, "loss": 0.0746, "step": 19120 }, { "epoch": 2.8412297638496957, "grad_norm": 0.3550914525985718, "learning_rate": 7.158770236150305e-06, "loss": 0.0829, "step": 19130 }, { "epoch": 2.8427149858903906, "grad_norm": 0.6439893841743469, "learning_rate": 7.15728501410961e-06, "loss": 0.0676, "step": 19140 }, { "epoch": 2.8442002079310855, "grad_norm": 1.1629294157028198, "learning_rate": 7.155799792068915e-06, "loss": 0.0656, "step": 19150 }, { "epoch": 2.845685429971781, "grad_norm": 1.1239734888076782, "learning_rate": 7.15431457002822e-06, "loss": 0.0821, "step": 19160 }, { "epoch": 2.8471706520124758, "grad_norm": 0.692963719367981, "learning_rate": 7.1528293479875246e-06, "loss": 0.0694, "step": 19170 }, { "epoch": 2.848655874053171, "grad_norm": 0.7630389928817749, "learning_rate": 7.15134412594683e-06, "loss": 0.0664, "step": 19180 }, { "epoch": 2.850141096093866, "grad_norm": 0.7898661494255066, "learning_rate": 7.149858903906135e-06, "loss": 0.0778, "step": 19190 }, { "epoch": 2.851626318134561, "grad_norm": 1.2775942087173462, "learning_rate": 7.1483736818654396e-06, "loss": 0.0733, "step": 19200 }, { "epoch": 2.8531115401752563, "grad_norm": 1.9423375129699707, "learning_rate": 7.146888459824745e-06, "loss": 0.0681, "step": 19210 }, { "epoch": 2.854596762215951, "grad_norm": 0.9860016703605652, "learning_rate": 7.1454032377840484e-06, "loss": 0.0859, "step": 19220 }, { "epoch": 2.8560819842566465, "grad_norm": 0.6084530353546143, "learning_rate": 7.143918015743354e-06, "loss": 0.0599, "step": 19230 }, { "epoch": 2.8575672062973414, "grad_norm": 1.0293453931808472, "learning_rate": 7.142432793702659e-06, "loss": 0.0628, "step": 19240 }, { "epoch": 2.8590524283380363, "grad_norm": 0.9693927764892578, "learning_rate": 7.1409475716619634e-06, "loss": 0.0749, "step": 19250 }, { "epoch": 2.8605376503787316, "grad_norm": 0.5707103610038757, "learning_rate": 7.139462349621269e-06, "loss": 0.0656, "step": 19260 }, { "epoch": 2.8620228724194265, "grad_norm": 0.5898536443710327, "learning_rate": 7.137977127580574e-06, "loss": 0.0452, "step": 19270 }, { "epoch": 2.863508094460122, "grad_norm": 0.3954699635505676, "learning_rate": 7.136491905539878e-06, "loss": 0.0623, "step": 19280 }, { "epoch": 2.864993316500817, "grad_norm": 1.130014419555664, "learning_rate": 7.135006683499184e-06, "loss": 0.0673, "step": 19290 }, { "epoch": 2.8664785385415117, "grad_norm": 0.5030196905136108, "learning_rate": 7.133521461458489e-06, "loss": 0.0747, "step": 19300 }, { "epoch": 2.867963760582207, "grad_norm": 1.10323166847229, "learning_rate": 7.132036239417793e-06, "loss": 0.0867, "step": 19310 }, { "epoch": 2.8694489826229024, "grad_norm": 0.868181586265564, "learning_rate": 7.130551017377099e-06, "loss": 0.0642, "step": 19320 }, { "epoch": 2.8709342046635973, "grad_norm": 0.4541347324848175, "learning_rate": 7.129065795336403e-06, "loss": 0.0708, "step": 19330 }, { "epoch": 2.872419426704292, "grad_norm": 0.479602187871933, "learning_rate": 7.127580573295708e-06, "loss": 0.0578, "step": 19340 }, { "epoch": 2.8739046487449875, "grad_norm": 0.6411767601966858, "learning_rate": 7.126095351255014e-06, "loss": 0.0558, "step": 19350 }, { "epoch": 2.8753898707856824, "grad_norm": 0.45273324847221375, "learning_rate": 7.124610129214318e-06, "loss": 0.0778, "step": 19360 }, { "epoch": 2.8768750928263778, "grad_norm": 0.6530646085739136, "learning_rate": 7.123124907173623e-06, "loss": 0.0693, "step": 19370 }, { "epoch": 2.8783603148670727, "grad_norm": 0.6566230654716492, "learning_rate": 7.121639685132929e-06, "loss": 0.0648, "step": 19380 }, { "epoch": 2.8798455369077676, "grad_norm": 1.1032828092575073, "learning_rate": 7.120154463092232e-06, "loss": 0.0647, "step": 19390 }, { "epoch": 2.881330758948463, "grad_norm": 0.5805070400238037, "learning_rate": 7.1186692410515376e-06, "loss": 0.0836, "step": 19400 }, { "epoch": 2.882815980989158, "grad_norm": 0.7455107569694519, "learning_rate": 7.117184019010843e-06, "loss": 0.0589, "step": 19410 }, { "epoch": 2.884301203029853, "grad_norm": 0.6039009690284729, "learning_rate": 7.115698796970147e-06, "loss": 0.0623, "step": 19420 }, { "epoch": 2.885786425070548, "grad_norm": 0.39744484424591064, "learning_rate": 7.1142135749294525e-06, "loss": 0.0587, "step": 19430 }, { "epoch": 2.887271647111243, "grad_norm": 0.4915350377559662, "learning_rate": 7.112728352888757e-06, "loss": 0.0667, "step": 19440 }, { "epoch": 2.8887568691519383, "grad_norm": 2.344184637069702, "learning_rate": 7.111243130848062e-06, "loss": 0.0832, "step": 19450 }, { "epoch": 2.890242091192633, "grad_norm": 0.5884201526641846, "learning_rate": 7.1097579088073675e-06, "loss": 0.0669, "step": 19460 }, { "epoch": 2.8917273132333285, "grad_norm": 0.6957660913467407, "learning_rate": 7.108272686766672e-06, "loss": 0.0678, "step": 19470 }, { "epoch": 2.8932125352740234, "grad_norm": 0.752271294593811, "learning_rate": 7.106787464725977e-06, "loss": 0.0565, "step": 19480 }, { "epoch": 2.8946977573147183, "grad_norm": 0.8159077167510986, "learning_rate": 7.1053022426852825e-06, "loss": 0.0651, "step": 19490 }, { "epoch": 2.8961829793554137, "grad_norm": 0.9899120926856995, "learning_rate": 7.103817020644587e-06, "loss": 0.0457, "step": 19500 }, { "epoch": 2.8976682013961086, "grad_norm": 0.5578276515007019, "learning_rate": 7.102331798603892e-06, "loss": 0.0737, "step": 19510 }, { "epoch": 2.899153423436804, "grad_norm": 1.1280609369277954, "learning_rate": 7.1008465765631975e-06, "loss": 0.0853, "step": 19520 }, { "epoch": 2.900638645477499, "grad_norm": 1.308525562286377, "learning_rate": 7.099361354522501e-06, "loss": 0.0582, "step": 19530 }, { "epoch": 2.9021238675181937, "grad_norm": 0.870124340057373, "learning_rate": 7.097876132481807e-06, "loss": 0.062, "step": 19540 }, { "epoch": 2.903609089558889, "grad_norm": 0.7574545741081238, "learning_rate": 7.096390910441111e-06, "loss": 0.057, "step": 19550 }, { "epoch": 2.905094311599584, "grad_norm": 1.3428432941436768, "learning_rate": 7.094905688400416e-06, "loss": 0.0614, "step": 19560 }, { "epoch": 2.9065795336402793, "grad_norm": 0.890991747379303, "learning_rate": 7.093420466359721e-06, "loss": 0.0814, "step": 19570 }, { "epoch": 2.9080647556809742, "grad_norm": 1.0695637464523315, "learning_rate": 7.091935244319026e-06, "loss": 0.0692, "step": 19580 }, { "epoch": 2.909549977721669, "grad_norm": 0.8482929468154907, "learning_rate": 7.090450022278331e-06, "loss": 0.0635, "step": 19590 }, { "epoch": 2.9110351997623645, "grad_norm": 0.8700017333030701, "learning_rate": 7.088964800237636e-06, "loss": 0.075, "step": 19600 }, { "epoch": 2.91252042180306, "grad_norm": 1.818686842918396, "learning_rate": 7.087479578196941e-06, "loss": 0.0619, "step": 19610 }, { "epoch": 2.9140056438437547, "grad_norm": 1.458304762840271, "learning_rate": 7.085994356156246e-06, "loss": 0.0644, "step": 19620 }, { "epoch": 2.9154908658844496, "grad_norm": 0.509807825088501, "learning_rate": 7.0845091341155505e-06, "loss": 0.071, "step": 19630 }, { "epoch": 2.916976087925145, "grad_norm": 1.3976842164993286, "learning_rate": 7.083023912074856e-06, "loss": 0.0732, "step": 19640 }, { "epoch": 2.91846130996584, "grad_norm": 0.8412267565727234, "learning_rate": 7.081538690034161e-06, "loss": 0.051, "step": 19650 }, { "epoch": 2.919946532006535, "grad_norm": 0.7144739627838135, "learning_rate": 7.0800534679934655e-06, "loss": 0.0491, "step": 19660 }, { "epoch": 2.92143175404723, "grad_norm": 1.5335413217544556, "learning_rate": 7.078568245952771e-06, "loss": 0.0607, "step": 19670 }, { "epoch": 2.922916976087925, "grad_norm": 0.42868825793266296, "learning_rate": 7.077083023912076e-06, "loss": 0.0714, "step": 19680 }, { "epoch": 2.9244021981286203, "grad_norm": 0.5871951580047607, "learning_rate": 7.07559780187138e-06, "loss": 0.063, "step": 19690 }, { "epoch": 2.9258874201693152, "grad_norm": 1.0395044088363647, "learning_rate": 7.074112579830685e-06, "loss": 0.074, "step": 19700 }, { "epoch": 2.9273726422100106, "grad_norm": 0.9330030679702759, "learning_rate": 7.07262735778999e-06, "loss": 0.0591, "step": 19710 }, { "epoch": 2.9288578642507055, "grad_norm": 0.6595268845558167, "learning_rate": 7.071142135749295e-06, "loss": 0.059, "step": 19720 }, { "epoch": 2.9303430862914004, "grad_norm": 0.7226126194000244, "learning_rate": 7.0696569137086e-06, "loss": 0.0627, "step": 19730 }, { "epoch": 2.9318283083320957, "grad_norm": 0.6599835753440857, "learning_rate": 7.068171691667904e-06, "loss": 0.0657, "step": 19740 }, { "epoch": 2.9333135303727906, "grad_norm": 1.201311469078064, "learning_rate": 7.06668646962721e-06, "loss": 0.0639, "step": 19750 }, { "epoch": 2.934798752413486, "grad_norm": 0.5775285959243774, "learning_rate": 7.065201247586515e-06, "loss": 0.0832, "step": 19760 }, { "epoch": 2.936283974454181, "grad_norm": 0.5414377450942993, "learning_rate": 7.063716025545819e-06, "loss": 0.0557, "step": 19770 }, { "epoch": 2.937769196494876, "grad_norm": 0.9754282236099243, "learning_rate": 7.062230803505125e-06, "loss": 0.0642, "step": 19780 }, { "epoch": 2.939254418535571, "grad_norm": 0.7484234571456909, "learning_rate": 7.06074558146443e-06, "loss": 0.0688, "step": 19790 }, { "epoch": 2.940739640576266, "grad_norm": 0.8926592469215393, "learning_rate": 7.059260359423734e-06, "loss": 0.079, "step": 19800 }, { "epoch": 2.9422248626169614, "grad_norm": 0.9294021129608154, "learning_rate": 7.05777513738304e-06, "loss": 0.064, "step": 19810 }, { "epoch": 2.9437100846576563, "grad_norm": 0.4306425452232361, "learning_rate": 7.056289915342345e-06, "loss": 0.0875, "step": 19820 }, { "epoch": 2.945195306698351, "grad_norm": 0.8793667554855347, "learning_rate": 7.054804693301649e-06, "loss": 0.0745, "step": 19830 }, { "epoch": 2.9466805287390465, "grad_norm": 1.5072189569473267, "learning_rate": 7.053319471260955e-06, "loss": 0.0922, "step": 19840 }, { "epoch": 2.9481657507797414, "grad_norm": 0.5707129836082458, "learning_rate": 7.051834249220258e-06, "loss": 0.0642, "step": 19850 }, { "epoch": 2.9496509728204368, "grad_norm": 1.5201908349990845, "learning_rate": 7.0503490271795635e-06, "loss": 0.0679, "step": 19860 }, { "epoch": 2.9511361948611317, "grad_norm": 0.6788731217384338, "learning_rate": 7.048863805138869e-06, "loss": 0.0794, "step": 19870 }, { "epoch": 2.9526214169018266, "grad_norm": 0.6304814219474792, "learning_rate": 7.047378583098173e-06, "loss": 0.0579, "step": 19880 }, { "epoch": 2.954106638942522, "grad_norm": 0.5509234666824341, "learning_rate": 7.0458933610574785e-06, "loss": 0.0608, "step": 19890 }, { "epoch": 2.9555918609832172, "grad_norm": 0.6580905914306641, "learning_rate": 7.044408139016784e-06, "loss": 0.0758, "step": 19900 }, { "epoch": 2.957077083023912, "grad_norm": 1.1142622232437134, "learning_rate": 7.042922916976088e-06, "loss": 0.0556, "step": 19910 }, { "epoch": 2.958562305064607, "grad_norm": 0.4926334023475647, "learning_rate": 7.0414376949353935e-06, "loss": 0.0578, "step": 19920 }, { "epoch": 2.9600475271053024, "grad_norm": 0.603307843208313, "learning_rate": 7.039952472894699e-06, "loss": 0.0609, "step": 19930 }, { "epoch": 2.9615327491459973, "grad_norm": 1.4161272048950195, "learning_rate": 7.038467250854003e-06, "loss": 0.0848, "step": 19940 }, { "epoch": 2.9630179711866926, "grad_norm": 0.8850290179252625, "learning_rate": 7.0369820288133085e-06, "loss": 0.0722, "step": 19950 }, { "epoch": 2.9645031932273875, "grad_norm": 0.6632411479949951, "learning_rate": 7.035496806772613e-06, "loss": 0.0677, "step": 19960 }, { "epoch": 2.9659884152680824, "grad_norm": 0.4602072536945343, "learning_rate": 7.034011584731918e-06, "loss": 0.0488, "step": 19970 }, { "epoch": 2.967473637308778, "grad_norm": 0.9665570259094238, "learning_rate": 7.0325263626912235e-06, "loss": 0.0609, "step": 19980 }, { "epoch": 2.9689588593494727, "grad_norm": 0.6160033941268921, "learning_rate": 7.031041140650527e-06, "loss": 0.0588, "step": 19990 }, { "epoch": 2.970444081390168, "grad_norm": 0.8523101210594177, "learning_rate": 7.029555918609832e-06, "loss": 0.0786, "step": 20000 }, { "epoch": 2.971929303430863, "grad_norm": 0.8637480139732361, "learning_rate": 7.0280706965691385e-06, "loss": 0.0539, "step": 20010 }, { "epoch": 2.973414525471558, "grad_norm": 1.4097833633422852, "learning_rate": 7.026585474528442e-06, "loss": 0.0854, "step": 20020 }, { "epoch": 2.974899747512253, "grad_norm": 1.2111132144927979, "learning_rate": 7.025100252487747e-06, "loss": 0.0548, "step": 20030 }, { "epoch": 2.976384969552948, "grad_norm": 0.8269698023796082, "learning_rate": 7.023615030447053e-06, "loss": 0.0945, "step": 20040 }, { "epoch": 2.9778701915936434, "grad_norm": 0.7142363786697388, "learning_rate": 7.022129808406357e-06, "loss": 0.0738, "step": 20050 }, { "epoch": 2.9793554136343383, "grad_norm": 0.6995881199836731, "learning_rate": 7.020644586365662e-06, "loss": 0.0609, "step": 20060 }, { "epoch": 2.980840635675033, "grad_norm": 1.4468334913253784, "learning_rate": 7.019159364324967e-06, "loss": 0.0832, "step": 20070 }, { "epoch": 2.9823258577157286, "grad_norm": 0.9121166467666626, "learning_rate": 7.017674142284272e-06, "loss": 0.0681, "step": 20080 }, { "epoch": 2.9838110797564235, "grad_norm": 0.7170482873916626, "learning_rate": 7.016188920243577e-06, "loss": 0.0653, "step": 20090 }, { "epoch": 2.985296301797119, "grad_norm": 0.7121782302856445, "learning_rate": 7.014703698202882e-06, "loss": 0.0743, "step": 20100 }, { "epoch": 2.9867815238378137, "grad_norm": 0.46788862347602844, "learning_rate": 7.013218476162187e-06, "loss": 0.0587, "step": 20110 }, { "epoch": 2.9882667458785086, "grad_norm": 1.205629587173462, "learning_rate": 7.011733254121492e-06, "loss": 0.0547, "step": 20120 }, { "epoch": 2.989751967919204, "grad_norm": 0.6308774352073669, "learning_rate": 7.010248032080797e-06, "loss": 0.0729, "step": 20130 }, { "epoch": 2.991237189959899, "grad_norm": 1.0978872776031494, "learning_rate": 7.008762810040102e-06, "loss": 0.0767, "step": 20140 }, { "epoch": 2.992722412000594, "grad_norm": 0.6353699564933777, "learning_rate": 7.007277587999406e-06, "loss": 0.0645, "step": 20150 }, { "epoch": 2.994207634041289, "grad_norm": 0.7821415066719055, "learning_rate": 7.005792365958711e-06, "loss": 0.0642, "step": 20160 }, { "epoch": 2.995692856081984, "grad_norm": 0.493858277797699, "learning_rate": 7.004307143918016e-06, "loss": 0.0356, "step": 20170 }, { "epoch": 2.9971780781226793, "grad_norm": 0.9723030924797058, "learning_rate": 7.002821921877321e-06, "loss": 0.0715, "step": 20180 }, { "epoch": 2.9986633001633747, "grad_norm": 0.9995088577270508, "learning_rate": 7.001336699836626e-06, "loss": 0.0564, "step": 20190 }, { "epoch": 3.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.0620584562420845, "eval_runtime": 210.5838, "eval_samples_per_second": 180.541, "eval_steps_per_second": 5.646, "step": 20199 }, { "epoch": 3.0001485222040696, "grad_norm": 0.7721754908561707, "learning_rate": 6.999851477795931e-06, "loss": 0.0561, "step": 20200 }, { "epoch": 3.0016337442447645, "grad_norm": 0.5628637075424194, "learning_rate": 6.998366255755236e-06, "loss": 0.0575, "step": 20210 }, { "epoch": 3.00311896628546, "grad_norm": 0.7381501793861389, "learning_rate": 6.996881033714541e-06, "loss": 0.0614, "step": 20220 }, { "epoch": 3.0046041883261547, "grad_norm": 0.8572022318840027, "learning_rate": 6.995395811673846e-06, "loss": 0.0706, "step": 20230 }, { "epoch": 3.0060894103668496, "grad_norm": 0.702950656414032, "learning_rate": 6.993910589633151e-06, "loss": 0.063, "step": 20240 }, { "epoch": 3.007574632407545, "grad_norm": 0.7224368453025818, "learning_rate": 6.992425367592456e-06, "loss": 0.078, "step": 20250 }, { "epoch": 3.00905985444824, "grad_norm": 0.8272649049758911, "learning_rate": 6.99094014555176e-06, "loss": 0.0787, "step": 20260 }, { "epoch": 3.010545076488935, "grad_norm": 1.782045841217041, "learning_rate": 6.989454923511066e-06, "loss": 0.0621, "step": 20270 }, { "epoch": 3.01203029852963, "grad_norm": 0.8865758180618286, "learning_rate": 6.987969701470371e-06, "loss": 0.0582, "step": 20280 }, { "epoch": 3.0135155205703255, "grad_norm": 1.336371898651123, "learning_rate": 6.9864844794296745e-06, "loss": 0.0694, "step": 20290 }, { "epoch": 3.0150007426110204, "grad_norm": 0.6026767492294312, "learning_rate": 6.984999257388981e-06, "loss": 0.0563, "step": 20300 }, { "epoch": 3.0164859646517153, "grad_norm": 0.6530321836471558, "learning_rate": 6.983514035348286e-06, "loss": 0.0826, "step": 20310 }, { "epoch": 3.0179711866924106, "grad_norm": 1.187461256980896, "learning_rate": 6.9820288133075895e-06, "loss": 0.0576, "step": 20320 }, { "epoch": 3.0194564087331055, "grad_norm": 0.42498815059661865, "learning_rate": 6.980543591266895e-06, "loss": 0.0541, "step": 20330 }, { "epoch": 3.020941630773801, "grad_norm": 1.0660574436187744, "learning_rate": 6.9790583692262e-06, "loss": 0.0542, "step": 20340 }, { "epoch": 3.0224268528144957, "grad_norm": 0.7157576084136963, "learning_rate": 6.9775731471855045e-06, "loss": 0.0531, "step": 20350 }, { "epoch": 3.0239120748551906, "grad_norm": 0.721301257610321, "learning_rate": 6.97608792514481e-06, "loss": 0.0606, "step": 20360 }, { "epoch": 3.025397296895886, "grad_norm": 0.23004470765590668, "learning_rate": 6.974602703104114e-06, "loss": 0.0517, "step": 20370 }, { "epoch": 3.026882518936581, "grad_norm": 1.1255507469177246, "learning_rate": 6.9731174810634195e-06, "loss": 0.0836, "step": 20380 }, { "epoch": 3.0283677409772762, "grad_norm": 0.5415315628051758, "learning_rate": 6.971632259022725e-06, "loss": 0.0604, "step": 20390 }, { "epoch": 3.029852963017971, "grad_norm": 1.0092153549194336, "learning_rate": 6.970147036982029e-06, "loss": 0.0625, "step": 20400 }, { "epoch": 3.0313381850586665, "grad_norm": 1.8862570524215698, "learning_rate": 6.9686618149413345e-06, "loss": 0.063, "step": 20410 }, { "epoch": 3.0328234070993614, "grad_norm": 1.144673466682434, "learning_rate": 6.96717659290064e-06, "loss": 0.0678, "step": 20420 }, { "epoch": 3.0343086291400563, "grad_norm": 0.7662794589996338, "learning_rate": 6.965691370859944e-06, "loss": 0.0744, "step": 20430 }, { "epoch": 3.0357938511807516, "grad_norm": 0.6166210770606995, "learning_rate": 6.9642061488192494e-06, "loss": 0.0679, "step": 20440 }, { "epoch": 3.0372790732214465, "grad_norm": 0.8427115082740784, "learning_rate": 6.962720926778555e-06, "loss": 0.0656, "step": 20450 }, { "epoch": 3.038764295262142, "grad_norm": 0.5689003467559814, "learning_rate": 6.961235704737858e-06, "loss": 0.0518, "step": 20460 }, { "epoch": 3.0402495173028368, "grad_norm": 1.7824311256408691, "learning_rate": 6.9597504826971644e-06, "loss": 0.0769, "step": 20470 }, { "epoch": 3.0417347393435317, "grad_norm": 1.074300765991211, "learning_rate": 6.958265260656468e-06, "loss": 0.0741, "step": 20480 }, { "epoch": 3.043219961384227, "grad_norm": 0.41286715865135193, "learning_rate": 6.956780038615773e-06, "loss": 0.0683, "step": 20490 }, { "epoch": 3.044705183424922, "grad_norm": 0.6059936881065369, "learning_rate": 6.955294816575079e-06, "loss": 0.046, "step": 20500 }, { "epoch": 3.0461904054656173, "grad_norm": 0.7452453374862671, "learning_rate": 6.953809594534383e-06, "loss": 0.0655, "step": 20510 }, { "epoch": 3.047675627506312, "grad_norm": 0.7540690302848816, "learning_rate": 6.952324372493688e-06, "loss": 0.0612, "step": 20520 }, { "epoch": 3.0491608495470075, "grad_norm": 0.7747832536697388, "learning_rate": 6.950839150452994e-06, "loss": 0.0667, "step": 20530 }, { "epoch": 3.0506460715877024, "grad_norm": 0.6237127184867859, "learning_rate": 6.949353928412298e-06, "loss": 0.056, "step": 20540 }, { "epoch": 3.0521312936283973, "grad_norm": 0.6594868898391724, "learning_rate": 6.947868706371603e-06, "loss": 0.0704, "step": 20550 }, { "epoch": 3.0536165156690926, "grad_norm": 0.7020158767700195, "learning_rate": 6.9463834843309086e-06, "loss": 0.0649, "step": 20560 }, { "epoch": 3.0551017377097875, "grad_norm": 0.36921775341033936, "learning_rate": 6.944898262290213e-06, "loss": 0.0774, "step": 20570 }, { "epoch": 3.056586959750483, "grad_norm": 0.8545680046081543, "learning_rate": 6.943413040249518e-06, "loss": 0.0592, "step": 20580 }, { "epoch": 3.058072181791178, "grad_norm": 1.0698968172073364, "learning_rate": 6.941927818208823e-06, "loss": 0.0846, "step": 20590 }, { "epoch": 3.0595574038318727, "grad_norm": 1.441307783126831, "learning_rate": 6.940442596168128e-06, "loss": 0.0766, "step": 20600 }, { "epoch": 3.061042625872568, "grad_norm": 0.8100225329399109, "learning_rate": 6.938957374127433e-06, "loss": 0.063, "step": 20610 }, { "epoch": 3.062527847913263, "grad_norm": 0.7782602906227112, "learning_rate": 6.937472152086737e-06, "loss": 0.084, "step": 20620 }, { "epoch": 3.0640130699539583, "grad_norm": 1.3601396083831787, "learning_rate": 6.935986930046042e-06, "loss": 0.09, "step": 20630 }, { "epoch": 3.065498291994653, "grad_norm": 0.49343305826187134, "learning_rate": 6.9345017080053474e-06, "loss": 0.0694, "step": 20640 }, { "epoch": 3.066983514035348, "grad_norm": 0.6675896048545837, "learning_rate": 6.933016485964652e-06, "loss": 0.0515, "step": 20650 }, { "epoch": 3.0684687360760434, "grad_norm": 1.5816264152526855, "learning_rate": 6.931531263923957e-06, "loss": 0.0867, "step": 20660 }, { "epoch": 3.0699539581167383, "grad_norm": 0.8865456581115723, "learning_rate": 6.930046041883262e-06, "loss": 0.068, "step": 20670 }, { "epoch": 3.0714391801574337, "grad_norm": 0.9473548531532288, "learning_rate": 6.928560819842567e-06, "loss": 0.0694, "step": 20680 }, { "epoch": 3.0729244021981286, "grad_norm": 0.991002082824707, "learning_rate": 6.927075597801872e-06, "loss": 0.0642, "step": 20690 }, { "epoch": 3.074409624238824, "grad_norm": 0.24137896299362183, "learning_rate": 6.9255903757611766e-06, "loss": 0.0697, "step": 20700 }, { "epoch": 3.075894846279519, "grad_norm": 1.1634925603866577, "learning_rate": 6.924105153720482e-06, "loss": 0.073, "step": 20710 }, { "epoch": 3.0773800683202137, "grad_norm": 0.7352584004402161, "learning_rate": 6.922619931679787e-06, "loss": 0.0552, "step": 20720 }, { "epoch": 3.078865290360909, "grad_norm": 0.663188636302948, "learning_rate": 6.9211347096390916e-06, "loss": 0.0549, "step": 20730 }, { "epoch": 3.080350512401604, "grad_norm": 0.4469948709011078, "learning_rate": 6.919649487598397e-06, "loss": 0.0448, "step": 20740 }, { "epoch": 3.0818357344422993, "grad_norm": 0.750174880027771, "learning_rate": 6.918164265557702e-06, "loss": 0.0807, "step": 20750 }, { "epoch": 3.083320956482994, "grad_norm": 0.8470863699913025, "learning_rate": 6.9166790435170066e-06, "loss": 0.0693, "step": 20760 }, { "epoch": 3.084806178523689, "grad_norm": 0.5609787702560425, "learning_rate": 6.915193821476312e-06, "loss": 0.0694, "step": 20770 }, { "epoch": 3.0862914005643844, "grad_norm": 0.9973790645599365, "learning_rate": 6.9137085994356154e-06, "loss": 0.0679, "step": 20780 }, { "epoch": 3.0877766226050793, "grad_norm": 0.9720730781555176, "learning_rate": 6.912223377394921e-06, "loss": 0.0527, "step": 20790 }, { "epoch": 3.0892618446457747, "grad_norm": 1.0503332614898682, "learning_rate": 6.910738155354226e-06, "loss": 0.0687, "step": 20800 }, { "epoch": 3.0907470666864696, "grad_norm": 1.0064181089401245, "learning_rate": 6.90925293331353e-06, "loss": 0.0805, "step": 20810 }, { "epoch": 3.092232288727165, "grad_norm": 0.841968834400177, "learning_rate": 6.907767711272836e-06, "loss": 0.0781, "step": 20820 }, { "epoch": 3.09371751076786, "grad_norm": 0.9688336849212646, "learning_rate": 6.906282489232141e-06, "loss": 0.0701, "step": 20830 }, { "epoch": 3.0952027328085547, "grad_norm": 0.6418406367301941, "learning_rate": 6.904797267191445e-06, "loss": 0.0568, "step": 20840 }, { "epoch": 3.09668795484925, "grad_norm": 1.104293704032898, "learning_rate": 6.903312045150751e-06, "loss": 0.0787, "step": 20850 }, { "epoch": 3.098173176889945, "grad_norm": 0.5715312957763672, "learning_rate": 6.901826823110056e-06, "loss": 0.0802, "step": 20860 }, { "epoch": 3.0996583989306403, "grad_norm": 0.5578724145889282, "learning_rate": 6.90034160106936e-06, "loss": 0.0707, "step": 20870 }, { "epoch": 3.1011436209713352, "grad_norm": 0.7479428052902222, "learning_rate": 6.898856379028666e-06, "loss": 0.0599, "step": 20880 }, { "epoch": 3.10262884301203, "grad_norm": 0.7864044904708862, "learning_rate": 6.89737115698797e-06, "loss": 0.0726, "step": 20890 }, { "epoch": 3.1041140650527255, "grad_norm": 1.1238430738449097, "learning_rate": 6.895885934947275e-06, "loss": 0.0693, "step": 20900 }, { "epoch": 3.1055992870934204, "grad_norm": 0.8007240295410156, "learning_rate": 6.894400712906581e-06, "loss": 0.0705, "step": 20910 }, { "epoch": 3.1070845091341157, "grad_norm": 0.8057619333267212, "learning_rate": 6.892915490865884e-06, "loss": 0.0757, "step": 20920 }, { "epoch": 3.1085697311748106, "grad_norm": 1.511337399482727, "learning_rate": 6.8914302688251896e-06, "loss": 0.0735, "step": 20930 }, { "epoch": 3.1100549532155055, "grad_norm": 0.6716585755348206, "learning_rate": 6.889945046784496e-06, "loss": 0.0592, "step": 20940 }, { "epoch": 3.111540175256201, "grad_norm": 1.2774038314819336, "learning_rate": 6.888459824743799e-06, "loss": 0.0791, "step": 20950 }, { "epoch": 3.1130253972968958, "grad_norm": 1.4515156745910645, "learning_rate": 6.8869746027031045e-06, "loss": 0.0599, "step": 20960 }, { "epoch": 3.114510619337591, "grad_norm": 0.8734662532806396, "learning_rate": 6.88548938066241e-06, "loss": 0.0537, "step": 20970 }, { "epoch": 3.115995841378286, "grad_norm": 1.2333000898361206, "learning_rate": 6.884004158621714e-06, "loss": 0.0668, "step": 20980 }, { "epoch": 3.1174810634189813, "grad_norm": 1.3227999210357666, "learning_rate": 6.8825189365810195e-06, "loss": 0.067, "step": 20990 }, { "epoch": 3.1189662854596762, "grad_norm": 0.49217718839645386, "learning_rate": 6.881033714540324e-06, "loss": 0.0727, "step": 21000 }, { "epoch": 3.120451507500371, "grad_norm": 1.2024414539337158, "learning_rate": 6.879548492499629e-06, "loss": 0.0791, "step": 21010 }, { "epoch": 3.1219367295410665, "grad_norm": 1.4273792505264282, "learning_rate": 6.8780632704589345e-06, "loss": 0.0819, "step": 21020 }, { "epoch": 3.1234219515817614, "grad_norm": 0.9906319975852966, "learning_rate": 6.876578048418239e-06, "loss": 0.0628, "step": 21030 }, { "epoch": 3.1249071736224567, "grad_norm": 0.7808650135993958, "learning_rate": 6.875092826377544e-06, "loss": 0.0563, "step": 21040 }, { "epoch": 3.1263923956631516, "grad_norm": 0.8338149189949036, "learning_rate": 6.8736076043368495e-06, "loss": 0.066, "step": 21050 }, { "epoch": 3.1278776177038465, "grad_norm": 0.4437405467033386, "learning_rate": 6.872122382296154e-06, "loss": 0.0722, "step": 21060 }, { "epoch": 3.129362839744542, "grad_norm": 0.47249501943588257, "learning_rate": 6.870637160255459e-06, "loss": 0.0819, "step": 21070 }, { "epoch": 3.130848061785237, "grad_norm": 0.8643490076065063, "learning_rate": 6.869151938214763e-06, "loss": 0.0589, "step": 21080 }, { "epoch": 3.132333283825932, "grad_norm": 0.6852704882621765, "learning_rate": 6.867666716174068e-06, "loss": 0.077, "step": 21090 }, { "epoch": 3.133818505866627, "grad_norm": 0.37101641297340393, "learning_rate": 6.866181494133373e-06, "loss": 0.0799, "step": 21100 }, { "epoch": 3.1353037279073224, "grad_norm": 0.873170793056488, "learning_rate": 6.864696272092678e-06, "loss": 0.0502, "step": 21110 }, { "epoch": 3.1367889499480173, "grad_norm": 1.023847222328186, "learning_rate": 6.863211050051983e-06, "loss": 0.0494, "step": 21120 }, { "epoch": 3.138274171988712, "grad_norm": 0.7707538604736328, "learning_rate": 6.861725828011288e-06, "loss": 0.06, "step": 21130 }, { "epoch": 3.1397593940294075, "grad_norm": 0.9110133647918701, "learning_rate": 6.860240605970593e-06, "loss": 0.0495, "step": 21140 }, { "epoch": 3.1412446160701024, "grad_norm": 0.9806025624275208, "learning_rate": 6.858755383929898e-06, "loss": 0.0693, "step": 21150 }, { "epoch": 3.1427298381107978, "grad_norm": 0.6785071492195129, "learning_rate": 6.857270161889203e-06, "loss": 0.0631, "step": 21160 }, { "epoch": 3.1442150601514927, "grad_norm": 1.0490822792053223, "learning_rate": 6.855784939848508e-06, "loss": 0.0658, "step": 21170 }, { "epoch": 3.1457002821921876, "grad_norm": 0.9065208435058594, "learning_rate": 6.854299717807813e-06, "loss": 0.0626, "step": 21180 }, { "epoch": 3.147185504232883, "grad_norm": 0.8384219408035278, "learning_rate": 6.8528144957671175e-06, "loss": 0.0498, "step": 21190 }, { "epoch": 3.148670726273578, "grad_norm": 1.3046029806137085, "learning_rate": 6.851329273726423e-06, "loss": 0.0716, "step": 21200 }, { "epoch": 3.150155948314273, "grad_norm": 1.7878382205963135, "learning_rate": 6.849844051685728e-06, "loss": 0.0644, "step": 21210 }, { "epoch": 3.151641170354968, "grad_norm": 1.844111442565918, "learning_rate": 6.848358829645032e-06, "loss": 0.0655, "step": 21220 }, { "epoch": 3.153126392395663, "grad_norm": 0.8533865809440613, "learning_rate": 6.846873607604338e-06, "loss": 0.0531, "step": 21230 }, { "epoch": 3.1546116144363583, "grad_norm": 1.1432991027832031, "learning_rate": 6.845388385563643e-06, "loss": 0.0805, "step": 21240 }, { "epoch": 3.156096836477053, "grad_norm": 0.9231775999069214, "learning_rate": 6.843903163522947e-06, "loss": 0.0618, "step": 21250 }, { "epoch": 3.1575820585177485, "grad_norm": 1.0056854486465454, "learning_rate": 6.842417941482252e-06, "loss": 0.0775, "step": 21260 }, { "epoch": 3.1590672805584434, "grad_norm": 0.24411170184612274, "learning_rate": 6.840932719441557e-06, "loss": 0.0648, "step": 21270 }, { "epoch": 3.1605525025991383, "grad_norm": 1.0229218006134033, "learning_rate": 6.839447497400862e-06, "loss": 0.0763, "step": 21280 }, { "epoch": 3.1620377246398337, "grad_norm": 0.6935153007507324, "learning_rate": 6.837962275360167e-06, "loss": 0.0518, "step": 21290 }, { "epoch": 3.1635229466805286, "grad_norm": 1.5722551345825195, "learning_rate": 6.836477053319471e-06, "loss": 0.0602, "step": 21300 }, { "epoch": 3.165008168721224, "grad_norm": 1.5089282989501953, "learning_rate": 6.834991831278777e-06, "loss": 0.0584, "step": 21310 }, { "epoch": 3.166493390761919, "grad_norm": 0.6778764128684998, "learning_rate": 6.833506609238082e-06, "loss": 0.0563, "step": 21320 }, { "epoch": 3.167978612802614, "grad_norm": 0.6647319793701172, "learning_rate": 6.832021387197386e-06, "loss": 0.067, "step": 21330 }, { "epoch": 3.169463834843309, "grad_norm": 0.9124835133552551, "learning_rate": 6.830536165156692e-06, "loss": 0.0702, "step": 21340 }, { "epoch": 3.170949056884004, "grad_norm": 0.538489818572998, "learning_rate": 6.829050943115997e-06, "loss": 0.0722, "step": 21350 }, { "epoch": 3.1724342789246993, "grad_norm": 0.7582644820213318, "learning_rate": 6.827565721075301e-06, "loss": 0.0611, "step": 21360 }, { "epoch": 3.173919500965394, "grad_norm": 1.9703272581100464, "learning_rate": 6.826080499034607e-06, "loss": 0.075, "step": 21370 }, { "epoch": 3.1754047230060896, "grad_norm": 0.6227496862411499, "learning_rate": 6.824595276993912e-06, "loss": 0.0663, "step": 21380 }, { "epoch": 3.1768899450467845, "grad_norm": 1.0263561010360718, "learning_rate": 6.8231100549532155e-06, "loss": 0.0663, "step": 21390 }, { "epoch": 3.17837516708748, "grad_norm": 0.9529784917831421, "learning_rate": 6.821624832912521e-06, "loss": 0.0672, "step": 21400 }, { "epoch": 3.1798603891281747, "grad_norm": 0.8865375518798828, "learning_rate": 6.820139610871825e-06, "loss": 0.0604, "step": 21410 }, { "epoch": 3.1813456111688696, "grad_norm": 0.518654465675354, "learning_rate": 6.8186543888311305e-06, "loss": 0.0463, "step": 21420 }, { "epoch": 3.182830833209565, "grad_norm": 1.2982616424560547, "learning_rate": 6.817169166790436e-06, "loss": 0.0803, "step": 21430 }, { "epoch": 3.18431605525026, "grad_norm": 1.0875815153121948, "learning_rate": 6.81568394474974e-06, "loss": 0.0565, "step": 21440 }, { "epoch": 3.185801277290955, "grad_norm": 0.6639821529388428, "learning_rate": 6.8141987227090455e-06, "loss": 0.0526, "step": 21450 }, { "epoch": 3.18728649933165, "grad_norm": 0.9514920711517334, "learning_rate": 6.812713500668351e-06, "loss": 0.0559, "step": 21460 }, { "epoch": 3.188771721372345, "grad_norm": 1.0590705871582031, "learning_rate": 6.811228278627655e-06, "loss": 0.0871, "step": 21470 }, { "epoch": 3.1902569434130403, "grad_norm": 1.098676323890686, "learning_rate": 6.8097430565869605e-06, "loss": 0.0616, "step": 21480 }, { "epoch": 3.1917421654537352, "grad_norm": 0.8639383912086487, "learning_rate": 6.808257834546266e-06, "loss": 0.0494, "step": 21490 }, { "epoch": 3.1932273874944306, "grad_norm": 1.6292216777801514, "learning_rate": 6.80677261250557e-06, "loss": 0.0588, "step": 21500 }, { "epoch": 3.1947126095351255, "grad_norm": 0.9250429272651672, "learning_rate": 6.8052873904648755e-06, "loss": 0.0608, "step": 21510 }, { "epoch": 3.1961978315758204, "grad_norm": 0.8409736752510071, "learning_rate": 6.80380216842418e-06, "loss": 0.0742, "step": 21520 }, { "epoch": 3.1976830536165157, "grad_norm": 1.2568039894104004, "learning_rate": 6.802316946383485e-06, "loss": 0.073, "step": 21530 }, { "epoch": 3.1991682756572106, "grad_norm": 0.3838708698749542, "learning_rate": 6.8008317243427905e-06, "loss": 0.0668, "step": 21540 }, { "epoch": 3.200653497697906, "grad_norm": 0.5494322180747986, "learning_rate": 6.799346502302094e-06, "loss": 0.0736, "step": 21550 }, { "epoch": 3.202138719738601, "grad_norm": 0.5036866664886475, "learning_rate": 6.797861280261399e-06, "loss": 0.0657, "step": 21560 }, { "epoch": 3.2036239417792958, "grad_norm": 0.9766075015068054, "learning_rate": 6.796376058220705e-06, "loss": 0.0615, "step": 21570 }, { "epoch": 3.205109163819991, "grad_norm": 0.7940554618835449, "learning_rate": 6.794890836180009e-06, "loss": 0.0751, "step": 21580 }, { "epoch": 3.206594385860686, "grad_norm": 0.4779742956161499, "learning_rate": 6.793405614139314e-06, "loss": 0.0621, "step": 21590 }, { "epoch": 3.2080796079013814, "grad_norm": 1.009729027748108, "learning_rate": 6.791920392098619e-06, "loss": 0.0938, "step": 21600 }, { "epoch": 3.2095648299420763, "grad_norm": 0.7350361943244934, "learning_rate": 6.790435170057924e-06, "loss": 0.0586, "step": 21610 }, { "epoch": 3.2110500519827716, "grad_norm": 0.8858749866485596, "learning_rate": 6.788949948017229e-06, "loss": 0.064, "step": 21620 }, { "epoch": 3.2125352740234665, "grad_norm": 0.4429284930229187, "learning_rate": 6.787464725976534e-06, "loss": 0.0734, "step": 21630 }, { "epoch": 3.2140204960641614, "grad_norm": 0.8132091164588928, "learning_rate": 6.785979503935839e-06, "loss": 0.0817, "step": 21640 }, { "epoch": 3.2155057181048567, "grad_norm": 0.9240391254425049, "learning_rate": 6.784494281895144e-06, "loss": 0.074, "step": 21650 }, { "epoch": 3.2169909401455516, "grad_norm": 1.1660693883895874, "learning_rate": 6.783009059854449e-06, "loss": 0.0687, "step": 21660 }, { "epoch": 3.218476162186247, "grad_norm": 0.4858531355857849, "learning_rate": 6.781523837813754e-06, "loss": 0.0525, "step": 21670 }, { "epoch": 3.219961384226942, "grad_norm": 0.8546695113182068, "learning_rate": 6.780038615773059e-06, "loss": 0.0747, "step": 21680 }, { "epoch": 3.2214466062676372, "grad_norm": 1.3556727170944214, "learning_rate": 6.778553393732363e-06, "loss": 0.0839, "step": 21690 }, { "epoch": 3.222931828308332, "grad_norm": 0.890733003616333, "learning_rate": 6.777068171691669e-06, "loss": 0.0659, "step": 21700 }, { "epoch": 3.224417050349027, "grad_norm": 0.9196014404296875, "learning_rate": 6.775582949650973e-06, "loss": 0.0622, "step": 21710 }, { "epoch": 3.2259022723897224, "grad_norm": 0.6223394870758057, "learning_rate": 6.774097727610278e-06, "loss": 0.0638, "step": 21720 }, { "epoch": 3.2273874944304173, "grad_norm": 0.6774187684059143, "learning_rate": 6.772612505569583e-06, "loss": 0.0636, "step": 21730 }, { "epoch": 3.2288727164711126, "grad_norm": 1.0828312635421753, "learning_rate": 6.771127283528888e-06, "loss": 0.0967, "step": 21740 }, { "epoch": 3.2303579385118075, "grad_norm": 0.2529934346675873, "learning_rate": 6.769642061488193e-06, "loss": 0.0432, "step": 21750 }, { "epoch": 3.2318431605525024, "grad_norm": 0.820793867111206, "learning_rate": 6.768156839447498e-06, "loss": 0.0566, "step": 21760 }, { "epoch": 3.2333283825931978, "grad_norm": 1.6257591247558594, "learning_rate": 6.766671617406803e-06, "loss": 0.0643, "step": 21770 }, { "epoch": 3.2348136046338927, "grad_norm": 0.9866087436676025, "learning_rate": 6.765186395366108e-06, "loss": 0.0694, "step": 21780 }, { "epoch": 3.236298826674588, "grad_norm": 1.2419787645339966, "learning_rate": 6.763701173325413e-06, "loss": 0.0691, "step": 21790 }, { "epoch": 3.237784048715283, "grad_norm": 0.8262988328933716, "learning_rate": 6.762215951284718e-06, "loss": 0.0702, "step": 21800 }, { "epoch": 3.239269270755978, "grad_norm": 0.3723410367965698, "learning_rate": 6.760730729244023e-06, "loss": 0.0657, "step": 21810 }, { "epoch": 3.240754492796673, "grad_norm": 0.7780771851539612, "learning_rate": 6.759245507203327e-06, "loss": 0.0508, "step": 21820 }, { "epoch": 3.242239714837368, "grad_norm": 0.6054874062538147, "learning_rate": 6.757760285162633e-06, "loss": 0.0664, "step": 21830 }, { "epoch": 3.2437249368780634, "grad_norm": 0.6230700612068176, "learning_rate": 6.756275063121938e-06, "loss": 0.0737, "step": 21840 }, { "epoch": 3.2452101589187583, "grad_norm": 0.7779073715209961, "learning_rate": 6.7547898410812415e-06, "loss": 0.0451, "step": 21850 }, { "epoch": 3.246695380959453, "grad_norm": 1.2229901552200317, "learning_rate": 6.753304619040547e-06, "loss": 0.0666, "step": 21860 }, { "epoch": 3.2481806030001485, "grad_norm": 0.6388977766036987, "learning_rate": 6.751819396999852e-06, "loss": 0.0563, "step": 21870 }, { "epoch": 3.2496658250408434, "grad_norm": 0.4443511962890625, "learning_rate": 6.7503341749591565e-06, "loss": 0.0483, "step": 21880 }, { "epoch": 3.251151047081539, "grad_norm": 1.2210910320281982, "learning_rate": 6.748848952918462e-06, "loss": 0.0581, "step": 21890 }, { "epoch": 3.2526362691222337, "grad_norm": 1.0396207571029663, "learning_rate": 6.747363730877767e-06, "loss": 0.0766, "step": 21900 }, { "epoch": 3.254121491162929, "grad_norm": 1.3031349182128906, "learning_rate": 6.7458785088370715e-06, "loss": 0.0791, "step": 21910 }, { "epoch": 3.255606713203624, "grad_norm": 0.9325276613235474, "learning_rate": 6.744393286796377e-06, "loss": 0.0728, "step": 21920 }, { "epoch": 3.257091935244319, "grad_norm": 0.5997186899185181, "learning_rate": 6.742908064755681e-06, "loss": 0.0606, "step": 21930 }, { "epoch": 3.258577157285014, "grad_norm": 0.7627629041671753, "learning_rate": 6.7414228427149865e-06, "loss": 0.0741, "step": 21940 }, { "epoch": 3.260062379325709, "grad_norm": 0.27523115277290344, "learning_rate": 6.739937620674292e-06, "loss": 0.0632, "step": 21950 }, { "epoch": 3.2615476013664044, "grad_norm": 1.0456265211105347, "learning_rate": 6.738452398633596e-06, "loss": 0.06, "step": 21960 }, { "epoch": 3.2630328234070993, "grad_norm": 1.1044933795928955, "learning_rate": 6.7369671765929014e-06, "loss": 0.09, "step": 21970 }, { "epoch": 3.2645180454477947, "grad_norm": 1.1576274633407593, "learning_rate": 6.735481954552207e-06, "loss": 0.0607, "step": 21980 }, { "epoch": 3.2660032674884896, "grad_norm": 1.0158040523529053, "learning_rate": 6.733996732511511e-06, "loss": 0.0657, "step": 21990 }, { "epoch": 3.2674884895291845, "grad_norm": 0.6219592094421387, "learning_rate": 6.7325115104708164e-06, "loss": 0.052, "step": 22000 }, { "epoch": 3.26897371156988, "grad_norm": 1.1890630722045898, "learning_rate": 6.731026288430122e-06, "loss": 0.0553, "step": 22010 }, { "epoch": 3.2704589336105747, "grad_norm": 1.2006758451461792, "learning_rate": 6.729541066389425e-06, "loss": 0.0468, "step": 22020 }, { "epoch": 3.27194415565127, "grad_norm": 0.7339828610420227, "learning_rate": 6.728055844348731e-06, "loss": 0.0567, "step": 22030 }, { "epoch": 3.273429377691965, "grad_norm": 0.8527355790138245, "learning_rate": 6.726570622308035e-06, "loss": 0.058, "step": 22040 }, { "epoch": 3.27491459973266, "grad_norm": 0.9126393795013428, "learning_rate": 6.72508540026734e-06, "loss": 0.0603, "step": 22050 }, { "epoch": 3.276399821773355, "grad_norm": 0.3882531523704529, "learning_rate": 6.723600178226646e-06, "loss": 0.0692, "step": 22060 }, { "epoch": 3.27788504381405, "grad_norm": 0.45046180486679077, "learning_rate": 6.72211495618595e-06, "loss": 0.0676, "step": 22070 }, { "epoch": 3.2793702658547454, "grad_norm": 1.058706283569336, "learning_rate": 6.720629734145255e-06, "loss": 0.0747, "step": 22080 }, { "epoch": 3.2808554878954403, "grad_norm": 1.137387990951538, "learning_rate": 6.7191445121045606e-06, "loss": 0.0847, "step": 22090 }, { "epoch": 3.2823407099361352, "grad_norm": 1.0290052890777588, "learning_rate": 6.717659290063865e-06, "loss": 0.0638, "step": 22100 }, { "epoch": 3.2838259319768306, "grad_norm": 0.8567383885383606, "learning_rate": 6.71617406802317e-06, "loss": 0.0608, "step": 22110 }, { "epoch": 3.2853111540175255, "grad_norm": 0.39815622568130493, "learning_rate": 6.714688845982475e-06, "loss": 0.0598, "step": 22120 }, { "epoch": 3.286796376058221, "grad_norm": 0.8857595920562744, "learning_rate": 6.71320362394178e-06, "loss": 0.0543, "step": 22130 }, { "epoch": 3.2882815980989157, "grad_norm": 0.857346773147583, "learning_rate": 6.711718401901085e-06, "loss": 0.059, "step": 22140 }, { "epoch": 3.2897668201396106, "grad_norm": 0.6323465704917908, "learning_rate": 6.710233179860389e-06, "loss": 0.0602, "step": 22150 }, { "epoch": 3.291252042180306, "grad_norm": 0.7687992453575134, "learning_rate": 6.708747957819694e-06, "loss": 0.0593, "step": 22160 }, { "epoch": 3.292737264221001, "grad_norm": 1.292677879333496, "learning_rate": 6.707262735779e-06, "loss": 0.0614, "step": 22170 }, { "epoch": 3.2942224862616962, "grad_norm": 1.0926405191421509, "learning_rate": 6.705777513738304e-06, "loss": 0.0496, "step": 22180 }, { "epoch": 3.295707708302391, "grad_norm": 1.2428966760635376, "learning_rate": 6.704292291697609e-06, "loss": 0.0719, "step": 22190 }, { "epoch": 3.2971929303430865, "grad_norm": 0.4689485430717468, "learning_rate": 6.7028070696569144e-06, "loss": 0.0553, "step": 22200 }, { "epoch": 3.2986781523837814, "grad_norm": 0.30755752325057983, "learning_rate": 6.701321847616219e-06, "loss": 0.0652, "step": 22210 }, { "epoch": 3.3001633744244763, "grad_norm": 1.099134087562561, "learning_rate": 6.699836625575524e-06, "loss": 0.0828, "step": 22220 }, { "epoch": 3.3016485964651716, "grad_norm": 0.6286141872406006, "learning_rate": 6.6983514035348286e-06, "loss": 0.0654, "step": 22230 }, { "epoch": 3.3031338185058665, "grad_norm": 1.0803701877593994, "learning_rate": 6.696866181494134e-06, "loss": 0.0606, "step": 22240 }, { "epoch": 3.304619040546562, "grad_norm": 1.0683226585388184, "learning_rate": 6.695380959453439e-06, "loss": 0.0687, "step": 22250 }, { "epoch": 3.3061042625872568, "grad_norm": 0.9039521813392639, "learning_rate": 6.6938957374127436e-06, "loss": 0.0662, "step": 22260 }, { "epoch": 3.307589484627952, "grad_norm": 1.0247334241867065, "learning_rate": 6.692410515372049e-06, "loss": 0.0781, "step": 22270 }, { "epoch": 3.309074706668647, "grad_norm": 0.925523042678833, "learning_rate": 6.690925293331354e-06, "loss": 0.051, "step": 22280 }, { "epoch": 3.310559928709342, "grad_norm": 1.3613653182983398, "learning_rate": 6.6894400712906586e-06, "loss": 0.0791, "step": 22290 }, { "epoch": 3.3120451507500372, "grad_norm": 1.0534887313842773, "learning_rate": 6.687954849249964e-06, "loss": 0.0755, "step": 22300 }, { "epoch": 3.313530372790732, "grad_norm": 1.1597150564193726, "learning_rate": 6.686469627209269e-06, "loss": 0.0752, "step": 22310 }, { "epoch": 3.3150155948314275, "grad_norm": 0.4005168676376343, "learning_rate": 6.684984405168573e-06, "loss": 0.0749, "step": 22320 }, { "epoch": 3.3165008168721224, "grad_norm": 0.8335085511207581, "learning_rate": 6.683499183127878e-06, "loss": 0.0635, "step": 22330 }, { "epoch": 3.3179860389128173, "grad_norm": 1.0074352025985718, "learning_rate": 6.682013961087182e-06, "loss": 0.0525, "step": 22340 }, { "epoch": 3.3194712609535126, "grad_norm": 1.2929524183273315, "learning_rate": 6.680528739046488e-06, "loss": 0.0658, "step": 22350 }, { "epoch": 3.3209564829942075, "grad_norm": 1.0960065126419067, "learning_rate": 6.679043517005793e-06, "loss": 0.0688, "step": 22360 }, { "epoch": 3.322441705034903, "grad_norm": 0.9017845392227173, "learning_rate": 6.677558294965097e-06, "loss": 0.063, "step": 22370 }, { "epoch": 3.323926927075598, "grad_norm": 0.459738165140152, "learning_rate": 6.676073072924403e-06, "loss": 0.0863, "step": 22380 }, { "epoch": 3.3254121491162927, "grad_norm": 1.2496856451034546, "learning_rate": 6.674587850883708e-06, "loss": 0.0712, "step": 22390 }, { "epoch": 3.326897371156988, "grad_norm": 1.0878329277038574, "learning_rate": 6.673102628843012e-06, "loss": 0.0709, "step": 22400 }, { "epoch": 3.328382593197683, "grad_norm": 0.8148536086082458, "learning_rate": 6.671617406802318e-06, "loss": 0.0757, "step": 22410 }, { "epoch": 3.3298678152383783, "grad_norm": 1.1004198789596558, "learning_rate": 6.670132184761623e-06, "loss": 0.0645, "step": 22420 }, { "epoch": 3.331353037279073, "grad_norm": 0.932357132434845, "learning_rate": 6.668646962720927e-06, "loss": 0.0554, "step": 22430 }, { "epoch": 3.332838259319768, "grad_norm": 0.9160596132278442, "learning_rate": 6.667161740680233e-06, "loss": 0.0582, "step": 22440 }, { "epoch": 3.3343234813604634, "grad_norm": 0.7866541147232056, "learning_rate": 6.665676518639536e-06, "loss": 0.077, "step": 22450 }, { "epoch": 3.3358087034011583, "grad_norm": 0.6644066572189331, "learning_rate": 6.664191296598842e-06, "loss": 0.0705, "step": 22460 }, { "epoch": 3.3372939254418537, "grad_norm": 0.7443733811378479, "learning_rate": 6.662706074558148e-06, "loss": 0.0603, "step": 22470 }, { "epoch": 3.3387791474825486, "grad_norm": 0.8200944662094116, "learning_rate": 6.661220852517451e-06, "loss": 0.0663, "step": 22480 }, { "epoch": 3.340264369523244, "grad_norm": 0.42802894115448, "learning_rate": 6.6597356304767565e-06, "loss": 0.0498, "step": 22490 }, { "epoch": 3.341749591563939, "grad_norm": 0.8416561484336853, "learning_rate": 6.658250408436062e-06, "loss": 0.0595, "step": 22500 }, { "epoch": 3.3432348136046337, "grad_norm": 0.5720903277397156, "learning_rate": 6.656765186395366e-06, "loss": 0.0639, "step": 22510 }, { "epoch": 3.344720035645329, "grad_norm": 0.6999571919441223, "learning_rate": 6.6552799643546715e-06, "loss": 0.0479, "step": 22520 }, { "epoch": 3.346205257686024, "grad_norm": 1.437461018562317, "learning_rate": 6.653794742313976e-06, "loss": 0.0571, "step": 22530 }, { "epoch": 3.3476904797267193, "grad_norm": 0.9364057779312134, "learning_rate": 6.652309520273281e-06, "loss": 0.066, "step": 22540 }, { "epoch": 3.349175701767414, "grad_norm": 0.8423236608505249, "learning_rate": 6.6508242982325865e-06, "loss": 0.0669, "step": 22550 }, { "epoch": 3.3506609238081095, "grad_norm": 0.8278792500495911, "learning_rate": 6.649339076191891e-06, "loss": 0.0747, "step": 22560 }, { "epoch": 3.3521461458488044, "grad_norm": 0.6401219367980957, "learning_rate": 6.647853854151196e-06, "loss": 0.0752, "step": 22570 }, { "epoch": 3.3536313678894993, "grad_norm": 0.652182400226593, "learning_rate": 6.6463686321105015e-06, "loss": 0.0773, "step": 22580 }, { "epoch": 3.3551165899301947, "grad_norm": 0.4512597322463989, "learning_rate": 6.644883410069806e-06, "loss": 0.0624, "step": 22590 }, { "epoch": 3.3566018119708896, "grad_norm": 0.7210500836372375, "learning_rate": 6.643398188029111e-06, "loss": 0.0687, "step": 22600 }, { "epoch": 3.358087034011585, "grad_norm": 0.9175443649291992, "learning_rate": 6.6419129659884165e-06, "loss": 0.0716, "step": 22610 }, { "epoch": 3.35957225605228, "grad_norm": 0.7475764155387878, "learning_rate": 6.64042774394772e-06, "loss": 0.0603, "step": 22620 }, { "epoch": 3.3610574780929747, "grad_norm": 1.225459337234497, "learning_rate": 6.638942521907026e-06, "loss": 0.0497, "step": 22630 }, { "epoch": 3.36254270013367, "grad_norm": 0.9790806174278259, "learning_rate": 6.63745729986633e-06, "loss": 0.0666, "step": 22640 }, { "epoch": 3.364027922174365, "grad_norm": 1.1390143632888794, "learning_rate": 6.635972077825635e-06, "loss": 0.0543, "step": 22650 }, { "epoch": 3.3655131442150603, "grad_norm": 1.215271234512329, "learning_rate": 6.63448685578494e-06, "loss": 0.0598, "step": 22660 }, { "epoch": 3.366998366255755, "grad_norm": 0.884739875793457, "learning_rate": 6.633001633744245e-06, "loss": 0.075, "step": 22670 }, { "epoch": 3.36848358829645, "grad_norm": 0.5074040293693542, "learning_rate": 6.63151641170355e-06, "loss": 0.0495, "step": 22680 }, { "epoch": 3.3699688103371455, "grad_norm": 1.5649079084396362, "learning_rate": 6.630031189662855e-06, "loss": 0.0591, "step": 22690 }, { "epoch": 3.3714540323778404, "grad_norm": 1.051287293434143, "learning_rate": 6.62854596762216e-06, "loss": 0.072, "step": 22700 }, { "epoch": 3.3729392544185357, "grad_norm": 0.6803810000419617, "learning_rate": 6.627060745581465e-06, "loss": 0.0678, "step": 22710 }, { "epoch": 3.3744244764592306, "grad_norm": 0.5742778778076172, "learning_rate": 6.62557552354077e-06, "loss": 0.0443, "step": 22720 }, { "epoch": 3.3759096984999255, "grad_norm": 0.5578451752662659, "learning_rate": 6.624090301500075e-06, "loss": 0.0693, "step": 22730 }, { "epoch": 3.377394920540621, "grad_norm": 0.9065197706222534, "learning_rate": 6.62260507945938e-06, "loss": 0.0615, "step": 22740 }, { "epoch": 3.3788801425813157, "grad_norm": 0.6993559002876282, "learning_rate": 6.6211198574186845e-06, "loss": 0.0793, "step": 22750 }, { "epoch": 3.380365364622011, "grad_norm": 1.2062517404556274, "learning_rate": 6.61963463537799e-06, "loss": 0.0769, "step": 22760 }, { "epoch": 3.381850586662706, "grad_norm": 0.5047938823699951, "learning_rate": 6.618149413337295e-06, "loss": 0.0767, "step": 22770 }, { "epoch": 3.3833358087034013, "grad_norm": 0.7543712854385376, "learning_rate": 6.616664191296599e-06, "loss": 0.0595, "step": 22780 }, { "epoch": 3.3848210307440962, "grad_norm": 1.2753219604492188, "learning_rate": 6.615178969255904e-06, "loss": 0.0673, "step": 22790 }, { "epoch": 3.386306252784791, "grad_norm": 1.725051760673523, "learning_rate": 6.613693747215209e-06, "loss": 0.0711, "step": 22800 }, { "epoch": 3.3877914748254865, "grad_norm": 0.7960348129272461, "learning_rate": 6.612208525174514e-06, "loss": 0.0603, "step": 22810 }, { "epoch": 3.3892766968661814, "grad_norm": 0.9733383655548096, "learning_rate": 6.610723303133819e-06, "loss": 0.0635, "step": 22820 }, { "epoch": 3.3907619189068767, "grad_norm": 0.8336113095283508, "learning_rate": 6.609238081093124e-06, "loss": 0.053, "step": 22830 }, { "epoch": 3.3922471409475716, "grad_norm": 0.8100163340568542, "learning_rate": 6.607752859052429e-06, "loss": 0.0821, "step": 22840 }, { "epoch": 3.393732362988267, "grad_norm": 0.5547587275505066, "learning_rate": 6.606267637011734e-06, "loss": 0.0632, "step": 22850 }, { "epoch": 3.395217585028962, "grad_norm": 0.4523991048336029, "learning_rate": 6.604782414971038e-06, "loss": 0.0573, "step": 22860 }, { "epoch": 3.3967028070696568, "grad_norm": 1.2900429964065552, "learning_rate": 6.603297192930344e-06, "loss": 0.0624, "step": 22870 }, { "epoch": 3.398188029110352, "grad_norm": 0.8760151267051697, "learning_rate": 6.601811970889649e-06, "loss": 0.0708, "step": 22880 }, { "epoch": 3.399673251151047, "grad_norm": 1.0388017892837524, "learning_rate": 6.600326748848953e-06, "loss": 0.0669, "step": 22890 }, { "epoch": 3.4011584731917424, "grad_norm": 1.0040520429611206, "learning_rate": 6.598841526808259e-06, "loss": 0.0545, "step": 22900 }, { "epoch": 3.4026436952324373, "grad_norm": 0.8709238767623901, "learning_rate": 6.597356304767564e-06, "loss": 0.0546, "step": 22910 }, { "epoch": 3.404128917273132, "grad_norm": 0.9482215642929077, "learning_rate": 6.595871082726868e-06, "loss": 0.0597, "step": 22920 }, { "epoch": 3.4056141393138275, "grad_norm": 0.9480280876159668, "learning_rate": 6.594385860686174e-06, "loss": 0.0628, "step": 22930 }, { "epoch": 3.4070993613545224, "grad_norm": 0.3669753074645996, "learning_rate": 6.592900638645479e-06, "loss": 0.0636, "step": 22940 }, { "epoch": 3.4085845833952177, "grad_norm": 0.4074847400188446, "learning_rate": 6.5914154166047825e-06, "loss": 0.0455, "step": 22950 }, { "epoch": 3.4100698054359126, "grad_norm": 0.976457417011261, "learning_rate": 6.589930194564088e-06, "loss": 0.0708, "step": 22960 }, { "epoch": 3.4115550274766075, "grad_norm": 0.8218967318534851, "learning_rate": 6.588444972523392e-06, "loss": 0.0435, "step": 22970 }, { "epoch": 3.413040249517303, "grad_norm": 0.7639546990394592, "learning_rate": 6.5869597504826975e-06, "loss": 0.0596, "step": 22980 }, { "epoch": 3.414525471557998, "grad_norm": 0.5619062781333923, "learning_rate": 6.585474528442003e-06, "loss": 0.0872, "step": 22990 }, { "epoch": 3.416010693598693, "grad_norm": 1.0580952167510986, "learning_rate": 6.583989306401307e-06, "loss": 0.0723, "step": 23000 }, { "epoch": 3.417495915639388, "grad_norm": 0.6083091497421265, "learning_rate": 6.5825040843606125e-06, "loss": 0.058, "step": 23010 }, { "epoch": 3.418981137680083, "grad_norm": 0.5163406133651733, "learning_rate": 6.581018862319918e-06, "loss": 0.0649, "step": 23020 }, { "epoch": 3.4204663597207783, "grad_norm": 1.1454685926437378, "learning_rate": 6.579533640279222e-06, "loss": 0.0502, "step": 23030 }, { "epoch": 3.421951581761473, "grad_norm": 0.774257481098175, "learning_rate": 6.5780484182385275e-06, "loss": 0.0803, "step": 23040 }, { "epoch": 3.4234368038021685, "grad_norm": 0.5684376955032349, "learning_rate": 6.576563196197832e-06, "loss": 0.0679, "step": 23050 }, { "epoch": 3.4249220258428634, "grad_norm": 1.2006176710128784, "learning_rate": 6.575077974157137e-06, "loss": 0.0824, "step": 23060 }, { "epoch": 3.4264072478835588, "grad_norm": 0.6941164135932922, "learning_rate": 6.5735927521164425e-06, "loss": 0.0684, "step": 23070 }, { "epoch": 3.4278924699242537, "grad_norm": 0.8499606847763062, "learning_rate": 6.572107530075746e-06, "loss": 0.069, "step": 23080 }, { "epoch": 3.4293776919649486, "grad_norm": 0.8829616904258728, "learning_rate": 6.570622308035051e-06, "loss": 0.0552, "step": 23090 }, { "epoch": 3.430862914005644, "grad_norm": 0.6436765789985657, "learning_rate": 6.5691370859943575e-06, "loss": 0.0682, "step": 23100 }, { "epoch": 3.432348136046339, "grad_norm": 0.8526592254638672, "learning_rate": 6.567651863953661e-06, "loss": 0.0473, "step": 23110 }, { "epoch": 3.433833358087034, "grad_norm": 0.9360619187355042, "learning_rate": 6.566166641912966e-06, "loss": 0.0771, "step": 23120 }, { "epoch": 3.435318580127729, "grad_norm": 1.5167564153671265, "learning_rate": 6.564681419872272e-06, "loss": 0.0585, "step": 23130 }, { "epoch": 3.4368038021684244, "grad_norm": 0.6640611886978149, "learning_rate": 6.563196197831576e-06, "loss": 0.0646, "step": 23140 }, { "epoch": 3.4382890242091193, "grad_norm": 0.8414837121963501, "learning_rate": 6.561710975790881e-06, "loss": 0.0584, "step": 23150 }, { "epoch": 3.439774246249814, "grad_norm": 0.364753395318985, "learning_rate": 6.560225753750186e-06, "loss": 0.054, "step": 23160 }, { "epoch": 3.4412594682905095, "grad_norm": 0.7285709977149963, "learning_rate": 6.558740531709491e-06, "loss": 0.0586, "step": 23170 }, { "epoch": 3.4427446903312044, "grad_norm": 0.9097562432289124, "learning_rate": 6.557255309668796e-06, "loss": 0.0407, "step": 23180 }, { "epoch": 3.4442299123719, "grad_norm": 0.6811128258705139, "learning_rate": 6.555770087628101e-06, "loss": 0.0655, "step": 23190 }, { "epoch": 3.4457151344125947, "grad_norm": 0.30362212657928467, "learning_rate": 6.554284865587406e-06, "loss": 0.0798, "step": 23200 }, { "epoch": 3.4472003564532896, "grad_norm": 0.7332984209060669, "learning_rate": 6.552799643546711e-06, "loss": 0.0654, "step": 23210 }, { "epoch": 3.448685578493985, "grad_norm": 1.3603813648223877, "learning_rate": 6.551314421506016e-06, "loss": 0.0624, "step": 23220 }, { "epoch": 3.45017080053468, "grad_norm": 0.3083489239215851, "learning_rate": 6.549829199465321e-06, "loss": 0.0615, "step": 23230 }, { "epoch": 3.451656022575375, "grad_norm": 1.117027759552002, "learning_rate": 6.548343977424626e-06, "loss": 0.0636, "step": 23240 }, { "epoch": 3.45314124461607, "grad_norm": 0.9651148319244385, "learning_rate": 6.54685875538393e-06, "loss": 0.0591, "step": 23250 }, { "epoch": 3.454626466656765, "grad_norm": 1.4626349210739136, "learning_rate": 6.545373533343235e-06, "loss": 0.0519, "step": 23260 }, { "epoch": 3.4561116886974603, "grad_norm": 0.6410413384437561, "learning_rate": 6.54388831130254e-06, "loss": 0.0643, "step": 23270 }, { "epoch": 3.4575969107381552, "grad_norm": 1.3073314428329468, "learning_rate": 6.542403089261845e-06, "loss": 0.0811, "step": 23280 }, { "epoch": 3.4590821327788506, "grad_norm": 0.36089351773262024, "learning_rate": 6.54091786722115e-06, "loss": 0.0532, "step": 23290 }, { "epoch": 3.4605673548195455, "grad_norm": 1.0440473556518555, "learning_rate": 6.539432645180455e-06, "loss": 0.0503, "step": 23300 }, { "epoch": 3.4620525768602404, "grad_norm": 0.4468558728694916, "learning_rate": 6.53794742313976e-06, "loss": 0.0672, "step": 23310 }, { "epoch": 3.4635377989009357, "grad_norm": 1.0561190843582153, "learning_rate": 6.536462201099065e-06, "loss": 0.065, "step": 23320 }, { "epoch": 3.4650230209416306, "grad_norm": 1.0147300958633423, "learning_rate": 6.53497697905837e-06, "loss": 0.0623, "step": 23330 }, { "epoch": 3.466508242982326, "grad_norm": 1.0339702367782593, "learning_rate": 6.533491757017675e-06, "loss": 0.0625, "step": 23340 }, { "epoch": 3.467993465023021, "grad_norm": 1.0387835502624512, "learning_rate": 6.53200653497698e-06, "loss": 0.0713, "step": 23350 }, { "epoch": 3.469478687063716, "grad_norm": 0.9874463081359863, "learning_rate": 6.530521312936285e-06, "loss": 0.0698, "step": 23360 }, { "epoch": 3.470963909104411, "grad_norm": 0.5879045724868774, "learning_rate": 6.52903609089559e-06, "loss": 0.053, "step": 23370 }, { "epoch": 3.472449131145106, "grad_norm": 0.4100988805294037, "learning_rate": 6.5275508688548935e-06, "loss": 0.0655, "step": 23380 }, { "epoch": 3.4739343531858013, "grad_norm": 1.068902611732483, "learning_rate": 6.5260656468142e-06, "loss": 0.0691, "step": 23390 }, { "epoch": 3.4754195752264962, "grad_norm": 1.1891417503356934, "learning_rate": 6.524580424773505e-06, "loss": 0.0615, "step": 23400 }, { "epoch": 3.4769047972671916, "grad_norm": 0.6840189695358276, "learning_rate": 6.5230952027328085e-06, "loss": 0.0698, "step": 23410 }, { "epoch": 3.4783900193078865, "grad_norm": 0.9377675652503967, "learning_rate": 6.521609980692114e-06, "loss": 0.0745, "step": 23420 }, { "epoch": 3.479875241348582, "grad_norm": 1.2862908840179443, "learning_rate": 6.520124758651419e-06, "loss": 0.0667, "step": 23430 }, { "epoch": 3.4813604633892767, "grad_norm": 0.8998900055885315, "learning_rate": 6.5186395366107235e-06, "loss": 0.0741, "step": 23440 }, { "epoch": 3.4828456854299716, "grad_norm": 1.0825127363204956, "learning_rate": 6.517154314570029e-06, "loss": 0.0732, "step": 23450 }, { "epoch": 3.484330907470667, "grad_norm": 0.8313888311386108, "learning_rate": 6.515669092529334e-06, "loss": 0.0722, "step": 23460 }, { "epoch": 3.485816129511362, "grad_norm": 0.6264531016349792, "learning_rate": 6.5141838704886384e-06, "loss": 0.0719, "step": 23470 }, { "epoch": 3.4873013515520572, "grad_norm": 0.6491361856460571, "learning_rate": 6.512698648447944e-06, "loss": 0.0572, "step": 23480 }, { "epoch": 3.488786573592752, "grad_norm": 0.5687041878700256, "learning_rate": 6.511213426407248e-06, "loss": 0.0584, "step": 23490 }, { "epoch": 3.490271795633447, "grad_norm": 0.7308996319770813, "learning_rate": 6.5097282043665534e-06, "loss": 0.064, "step": 23500 }, { "epoch": 3.4917570176741424, "grad_norm": 0.7591047883033752, "learning_rate": 6.508242982325859e-06, "loss": 0.0808, "step": 23510 }, { "epoch": 3.4932422397148373, "grad_norm": 1.3734276294708252, "learning_rate": 6.506757760285163e-06, "loss": 0.0553, "step": 23520 }, { "epoch": 3.4947274617555326, "grad_norm": 0.7951934337615967, "learning_rate": 6.5052725382444684e-06, "loss": 0.0522, "step": 23530 }, { "epoch": 3.4962126837962275, "grad_norm": 1.0081889629364014, "learning_rate": 6.503787316203774e-06, "loss": 0.0606, "step": 23540 }, { "epoch": 3.4976979058369224, "grad_norm": 0.7433433532714844, "learning_rate": 6.502302094163077e-06, "loss": 0.0499, "step": 23550 }, { "epoch": 3.4991831278776178, "grad_norm": 0.96690434217453, "learning_rate": 6.500816872122383e-06, "loss": 0.0702, "step": 23560 }, { "epoch": 3.5006683499183127, "grad_norm": 0.7072582244873047, "learning_rate": 6.499331650081687e-06, "loss": 0.0641, "step": 23570 }, { "epoch": 3.502153571959008, "grad_norm": 0.7534715533256531, "learning_rate": 6.497846428040992e-06, "loss": 0.0663, "step": 23580 }, { "epoch": 3.503638793999703, "grad_norm": 0.7846993207931519, "learning_rate": 6.496361206000298e-06, "loss": 0.0465, "step": 23590 }, { "epoch": 3.505124016040398, "grad_norm": 0.8195766806602478, "learning_rate": 6.494875983959602e-06, "loss": 0.0579, "step": 23600 }, { "epoch": 3.506609238081093, "grad_norm": 0.5940980911254883, "learning_rate": 6.493390761918907e-06, "loss": 0.054, "step": 23610 }, { "epoch": 3.508094460121788, "grad_norm": 0.36276596784591675, "learning_rate": 6.4919055398782126e-06, "loss": 0.0522, "step": 23620 }, { "epoch": 3.5095796821624834, "grad_norm": 0.5541412234306335, "learning_rate": 6.490420317837517e-06, "loss": 0.0748, "step": 23630 }, { "epoch": 3.5110649042031783, "grad_norm": 1.0632083415985107, "learning_rate": 6.488935095796822e-06, "loss": 0.0667, "step": 23640 }, { "epoch": 3.512550126243873, "grad_norm": 0.9306591749191284, "learning_rate": 6.4874498737561276e-06, "loss": 0.0906, "step": 23650 }, { "epoch": 3.5140353482845685, "grad_norm": 0.5102431774139404, "learning_rate": 6.485964651715432e-06, "loss": 0.0801, "step": 23660 }, { "epoch": 3.515520570325264, "grad_norm": 1.087111234664917, "learning_rate": 6.484479429674737e-06, "loss": 0.0702, "step": 23670 }, { "epoch": 3.517005792365959, "grad_norm": 0.9563620686531067, "learning_rate": 6.482994207634042e-06, "loss": 0.0557, "step": 23680 }, { "epoch": 3.5184910144066537, "grad_norm": 0.8023037314414978, "learning_rate": 6.481508985593347e-06, "loss": 0.0729, "step": 23690 }, { "epoch": 3.519976236447349, "grad_norm": 2.2834434509277344, "learning_rate": 6.480023763552652e-06, "loss": 0.0626, "step": 23700 }, { "epoch": 3.521461458488044, "grad_norm": 0.9204877018928528, "learning_rate": 6.478538541511956e-06, "loss": 0.0727, "step": 23710 }, { "epoch": 3.5229466805287393, "grad_norm": 0.6565443277359009, "learning_rate": 6.477053319471261e-06, "loss": 0.057, "step": 23720 }, { "epoch": 3.524431902569434, "grad_norm": 1.235333800315857, "learning_rate": 6.475568097430566e-06, "loss": 0.0834, "step": 23730 }, { "epoch": 3.525917124610129, "grad_norm": 0.5262104868888855, "learning_rate": 6.474082875389871e-06, "loss": 0.0619, "step": 23740 }, { "epoch": 3.5274023466508244, "grad_norm": 0.6873980164527893, "learning_rate": 6.472597653349176e-06, "loss": 0.0568, "step": 23750 }, { "epoch": 3.5288875686915193, "grad_norm": 0.794663667678833, "learning_rate": 6.471112431308481e-06, "loss": 0.0679, "step": 23760 }, { "epoch": 3.5303727907322147, "grad_norm": 0.4290243983268738, "learning_rate": 6.469627209267786e-06, "loss": 0.0682, "step": 23770 }, { "epoch": 3.5318580127729096, "grad_norm": 0.8954272866249084, "learning_rate": 6.468141987227091e-06, "loss": 0.0676, "step": 23780 }, { "epoch": 3.5333432348136045, "grad_norm": 1.1637274026870728, "learning_rate": 6.4666567651863956e-06, "loss": 0.0587, "step": 23790 }, { "epoch": 3.5348284568543, "grad_norm": 1.2074244022369385, "learning_rate": 6.465171543145701e-06, "loss": 0.0631, "step": 23800 }, { "epoch": 3.5363136788949947, "grad_norm": 1.0475409030914307, "learning_rate": 6.463686321105006e-06, "loss": 0.0657, "step": 23810 }, { "epoch": 3.53779890093569, "grad_norm": 0.9346685409545898, "learning_rate": 6.4622010990643106e-06, "loss": 0.059, "step": 23820 }, { "epoch": 3.539284122976385, "grad_norm": 1.0918734073638916, "learning_rate": 6.460715877023616e-06, "loss": 0.0723, "step": 23830 }, { "epoch": 3.54076934501708, "grad_norm": 0.8081991672515869, "learning_rate": 6.459230654982921e-06, "loss": 0.0414, "step": 23840 }, { "epoch": 3.542254567057775, "grad_norm": 0.7394802570343018, "learning_rate": 6.457745432942225e-06, "loss": 0.0583, "step": 23850 }, { "epoch": 3.54373978909847, "grad_norm": 0.40993183851242065, "learning_rate": 6.456260210901531e-06, "loss": 0.0516, "step": 23860 }, { "epoch": 3.5452250111391654, "grad_norm": 1.0917972326278687, "learning_rate": 6.454774988860836e-06, "loss": 0.0727, "step": 23870 }, { "epoch": 3.5467102331798603, "grad_norm": 0.5183740854263306, "learning_rate": 6.45328976682014e-06, "loss": 0.0638, "step": 23880 }, { "epoch": 3.5481954552205552, "grad_norm": 0.5838718414306641, "learning_rate": 6.451804544779445e-06, "loss": 0.0729, "step": 23890 }, { "epoch": 3.5496806772612506, "grad_norm": 1.3712800741195679, "learning_rate": 6.450319322738749e-06, "loss": 0.0893, "step": 23900 }, { "epoch": 3.5511658993019455, "grad_norm": 1.15109384059906, "learning_rate": 6.448834100698055e-06, "loss": 0.0761, "step": 23910 }, { "epoch": 3.552651121342641, "grad_norm": 0.6892343163490295, "learning_rate": 6.44734887865736e-06, "loss": 0.058, "step": 23920 }, { "epoch": 3.5541363433833357, "grad_norm": 0.5948600172996521, "learning_rate": 6.445863656616664e-06, "loss": 0.0765, "step": 23930 }, { "epoch": 3.5556215654240306, "grad_norm": 1.3563827276229858, "learning_rate": 6.44437843457597e-06, "loss": 0.063, "step": 23940 }, { "epoch": 3.557106787464726, "grad_norm": 1.584236741065979, "learning_rate": 6.442893212535275e-06, "loss": 0.0547, "step": 23950 }, { "epoch": 3.5585920095054213, "grad_norm": 0.9074768424034119, "learning_rate": 6.441407990494579e-06, "loss": 0.0747, "step": 23960 }, { "epoch": 3.560077231546116, "grad_norm": 0.729647159576416, "learning_rate": 6.439922768453885e-06, "loss": 0.068, "step": 23970 }, { "epoch": 3.561562453586811, "grad_norm": 0.7314901351928711, "learning_rate": 6.438437546413189e-06, "loss": 0.0637, "step": 23980 }, { "epoch": 3.5630476756275065, "grad_norm": 0.5402231812477112, "learning_rate": 6.436952324372494e-06, "loss": 0.0805, "step": 23990 }, { "epoch": 3.5645328976682014, "grad_norm": 0.49268588423728943, "learning_rate": 6.4354671023318e-06, "loss": 0.0752, "step": 24000 }, { "epoch": 3.5660181197088967, "grad_norm": 0.5215333700180054, "learning_rate": 6.433981880291103e-06, "loss": 0.0638, "step": 24010 }, { "epoch": 3.5675033417495916, "grad_norm": 0.7803683280944824, "learning_rate": 6.4324966582504085e-06, "loss": 0.0471, "step": 24020 }, { "epoch": 3.5689885637902865, "grad_norm": 0.8906722068786621, "learning_rate": 6.431011436209714e-06, "loss": 0.0708, "step": 24030 }, { "epoch": 3.570473785830982, "grad_norm": 1.0567264556884766, "learning_rate": 6.429526214169018e-06, "loss": 0.0658, "step": 24040 }, { "epoch": 3.5719590078716768, "grad_norm": 0.6956542134284973, "learning_rate": 6.4280409921283235e-06, "loss": 0.0808, "step": 24050 }, { "epoch": 3.573444229912372, "grad_norm": 0.4545905590057373, "learning_rate": 6.426555770087629e-06, "loss": 0.0548, "step": 24060 }, { "epoch": 3.574929451953067, "grad_norm": 0.5176657438278198, "learning_rate": 6.425070548046933e-06, "loss": 0.0424, "step": 24070 }, { "epoch": 3.576414673993762, "grad_norm": 0.5687969326972961, "learning_rate": 6.4235853260062385e-06, "loss": 0.0619, "step": 24080 }, { "epoch": 3.5778998960344572, "grad_norm": 0.5443336367607117, "learning_rate": 6.422100103965543e-06, "loss": 0.06, "step": 24090 }, { "epoch": 3.579385118075152, "grad_norm": 0.5380187034606934, "learning_rate": 6.420614881924848e-06, "loss": 0.0522, "step": 24100 }, { "epoch": 3.5808703401158475, "grad_norm": 1.4146056175231934, "learning_rate": 6.4191296598841535e-06, "loss": 0.0713, "step": 24110 }, { "epoch": 3.5823555621565424, "grad_norm": 1.016329288482666, "learning_rate": 6.417644437843458e-06, "loss": 0.0527, "step": 24120 }, { "epoch": 3.5838407841972373, "grad_norm": 0.656102180480957, "learning_rate": 6.416159215802763e-06, "loss": 0.0422, "step": 24130 }, { "epoch": 3.5853260062379326, "grad_norm": 0.3345576822757721, "learning_rate": 6.4146739937620685e-06, "loss": 0.0593, "step": 24140 }, { "epoch": 3.5868112282786275, "grad_norm": 0.9102980494499207, "learning_rate": 6.413188771721373e-06, "loss": 0.0616, "step": 24150 }, { "epoch": 3.588296450319323, "grad_norm": 0.9360125660896301, "learning_rate": 6.411703549680678e-06, "loss": 0.0526, "step": 24160 }, { "epoch": 3.5897816723600178, "grad_norm": 1.1276384592056274, "learning_rate": 6.4102183276399835e-06, "loss": 0.066, "step": 24170 }, { "epoch": 3.5912668944007127, "grad_norm": 0.8606460690498352, "learning_rate": 6.408733105599287e-06, "loss": 0.075, "step": 24180 }, { "epoch": 3.592752116441408, "grad_norm": 0.9443903565406799, "learning_rate": 6.407247883558592e-06, "loss": 0.0618, "step": 24190 }, { "epoch": 3.594237338482103, "grad_norm": 0.37735462188720703, "learning_rate": 6.405762661517897e-06, "loss": 0.0596, "step": 24200 }, { "epoch": 3.5957225605227983, "grad_norm": 0.828709602355957, "learning_rate": 6.404277439477202e-06, "loss": 0.051, "step": 24210 }, { "epoch": 3.597207782563493, "grad_norm": 0.9393916130065918, "learning_rate": 6.402792217436507e-06, "loss": 0.0636, "step": 24220 }, { "epoch": 3.598693004604188, "grad_norm": 1.130899429321289, "learning_rate": 6.401306995395812e-06, "loss": 0.0716, "step": 24230 }, { "epoch": 3.6001782266448834, "grad_norm": 0.730121374130249, "learning_rate": 6.399821773355117e-06, "loss": 0.0686, "step": 24240 }, { "epoch": 3.6016634486855788, "grad_norm": 1.095799446105957, "learning_rate": 6.398336551314422e-06, "loss": 0.0568, "step": 24250 }, { "epoch": 3.6031486707262737, "grad_norm": 1.1297565698623657, "learning_rate": 6.396851329273727e-06, "loss": 0.0611, "step": 24260 }, { "epoch": 3.6046338927669686, "grad_norm": 1.272932529449463, "learning_rate": 6.395366107233032e-06, "loss": 0.0617, "step": 24270 }, { "epoch": 3.606119114807664, "grad_norm": 1.3354920148849487, "learning_rate": 6.393880885192337e-06, "loss": 0.0724, "step": 24280 }, { "epoch": 3.607604336848359, "grad_norm": 0.8653399348258972, "learning_rate": 6.392395663151642e-06, "loss": 0.072, "step": 24290 }, { "epoch": 3.609089558889054, "grad_norm": 0.9461974501609802, "learning_rate": 6.390910441110947e-06, "loss": 0.0675, "step": 24300 }, { "epoch": 3.610574780929749, "grad_norm": 0.8618866205215454, "learning_rate": 6.389425219070251e-06, "loss": 0.0591, "step": 24310 }, { "epoch": 3.612060002970444, "grad_norm": 0.4727649390697479, "learning_rate": 6.387939997029556e-06, "loss": 0.0763, "step": 24320 }, { "epoch": 3.6135452250111393, "grad_norm": 1.1720751523971558, "learning_rate": 6.386454774988862e-06, "loss": 0.0532, "step": 24330 }, { "epoch": 3.615030447051834, "grad_norm": 0.5803553462028503, "learning_rate": 6.384969552948166e-06, "loss": 0.0632, "step": 24340 }, { "epoch": 3.6165156690925295, "grad_norm": 0.7322667241096497, "learning_rate": 6.383484330907471e-06, "loss": 0.0493, "step": 24350 }, { "epoch": 3.6180008911332244, "grad_norm": 0.9475997090339661, "learning_rate": 6.381999108866776e-06, "loss": 0.0617, "step": 24360 }, { "epoch": 3.6194861131739193, "grad_norm": 0.8482145667076111, "learning_rate": 6.380513886826081e-06, "loss": 0.0771, "step": 24370 }, { "epoch": 3.6209713352146147, "grad_norm": 0.8984361290931702, "learning_rate": 6.379028664785386e-06, "loss": 0.0663, "step": 24380 }, { "epoch": 3.6224565572553096, "grad_norm": 0.8019669651985168, "learning_rate": 6.377543442744691e-06, "loss": 0.0582, "step": 24390 }, { "epoch": 3.623941779296005, "grad_norm": 0.3070809543132782, "learning_rate": 6.376058220703996e-06, "loss": 0.0809, "step": 24400 }, { "epoch": 3.6254270013367, "grad_norm": 0.7207964062690735, "learning_rate": 6.374572998663301e-06, "loss": 0.0469, "step": 24410 }, { "epoch": 3.6269122233773947, "grad_norm": 0.755179226398468, "learning_rate": 6.373087776622605e-06, "loss": 0.0554, "step": 24420 }, { "epoch": 3.62839744541809, "grad_norm": 0.5037167072296143, "learning_rate": 6.371602554581911e-06, "loss": 0.0734, "step": 24430 }, { "epoch": 3.629882667458785, "grad_norm": 0.5806566476821899, "learning_rate": 6.370117332541216e-06, "loss": 0.0608, "step": 24440 }, { "epoch": 3.6313678894994803, "grad_norm": 1.1839208602905273, "learning_rate": 6.36863211050052e-06, "loss": 0.0766, "step": 24450 }, { "epoch": 3.632853111540175, "grad_norm": 0.4069625437259674, "learning_rate": 6.367146888459826e-06, "loss": 0.0609, "step": 24460 }, { "epoch": 3.63433833358087, "grad_norm": 0.9564564824104309, "learning_rate": 6.365661666419131e-06, "loss": 0.0662, "step": 24470 }, { "epoch": 3.6358235556215655, "grad_norm": 0.5861666202545166, "learning_rate": 6.3641764443784345e-06, "loss": 0.0597, "step": 24480 }, { "epoch": 3.6373087776622604, "grad_norm": 1.0000677108764648, "learning_rate": 6.36269122233774e-06, "loss": 0.0735, "step": 24490 }, { "epoch": 3.6387939997029557, "grad_norm": 0.9394515156745911, "learning_rate": 6.361206000297044e-06, "loss": 0.0565, "step": 24500 }, { "epoch": 3.6402792217436506, "grad_norm": 0.7486881017684937, "learning_rate": 6.3597207782563495e-06, "loss": 0.068, "step": 24510 }, { "epoch": 3.6417644437843455, "grad_norm": 0.7040836215019226, "learning_rate": 6.358235556215655e-06, "loss": 0.0592, "step": 24520 }, { "epoch": 3.643249665825041, "grad_norm": 1.6331576108932495, "learning_rate": 6.356750334174959e-06, "loss": 0.0806, "step": 24530 }, { "epoch": 3.644734887865736, "grad_norm": 0.8690558075904846, "learning_rate": 6.3552651121342645e-06, "loss": 0.0699, "step": 24540 }, { "epoch": 3.646220109906431, "grad_norm": 0.4748252332210541, "learning_rate": 6.35377989009357e-06, "loss": 0.0452, "step": 24550 }, { "epoch": 3.647705331947126, "grad_norm": 0.7027367949485779, "learning_rate": 6.352294668052874e-06, "loss": 0.0549, "step": 24560 }, { "epoch": 3.6491905539878213, "grad_norm": 0.8721972703933716, "learning_rate": 6.3508094460121795e-06, "loss": 0.0716, "step": 24570 }, { "epoch": 3.6506757760285162, "grad_norm": 0.5169384479522705, "learning_rate": 6.349324223971485e-06, "loss": 0.0529, "step": 24580 }, { "epoch": 3.6521609980692116, "grad_norm": 0.9389125108718872, "learning_rate": 6.347839001930789e-06, "loss": 0.0567, "step": 24590 }, { "epoch": 3.6536462201099065, "grad_norm": 0.5857188701629639, "learning_rate": 6.3463537798900945e-06, "loss": 0.0657, "step": 24600 }, { "epoch": 3.6551314421506014, "grad_norm": 1.0817548036575317, "learning_rate": 6.344868557849398e-06, "loss": 0.0675, "step": 24610 }, { "epoch": 3.6566166641912967, "grad_norm": 0.20750167965888977, "learning_rate": 6.343383335808704e-06, "loss": 0.0729, "step": 24620 }, { "epoch": 3.6581018862319916, "grad_norm": 0.8091875910758972, "learning_rate": 6.3418981137680095e-06, "loss": 0.077, "step": 24630 }, { "epoch": 3.659587108272687, "grad_norm": 1.0680159330368042, "learning_rate": 6.340412891727313e-06, "loss": 0.0823, "step": 24640 }, { "epoch": 3.661072330313382, "grad_norm": 0.6819460988044739, "learning_rate": 6.338927669686618e-06, "loss": 0.0765, "step": 24650 }, { "epoch": 3.6625575523540768, "grad_norm": 0.5348358750343323, "learning_rate": 6.337442447645924e-06, "loss": 0.0616, "step": 24660 }, { "epoch": 3.664042774394772, "grad_norm": 0.5871132016181946, "learning_rate": 6.335957225605228e-06, "loss": 0.0572, "step": 24670 }, { "epoch": 3.665527996435467, "grad_norm": 1.407729983329773, "learning_rate": 6.334472003564533e-06, "loss": 0.0809, "step": 24680 }, { "epoch": 3.6670132184761624, "grad_norm": 1.2429784536361694, "learning_rate": 6.332986781523839e-06, "loss": 0.077, "step": 24690 }, { "epoch": 3.6684984405168573, "grad_norm": 0.5828059911727905, "learning_rate": 6.331501559483143e-06, "loss": 0.0546, "step": 24700 }, { "epoch": 3.669983662557552, "grad_norm": 0.8209594488143921, "learning_rate": 6.330016337442448e-06, "loss": 0.0577, "step": 24710 }, { "epoch": 3.6714688845982475, "grad_norm": 0.982406497001648, "learning_rate": 6.328531115401753e-06, "loss": 0.048, "step": 24720 }, { "epoch": 3.6729541066389424, "grad_norm": 0.6951733827590942, "learning_rate": 6.327045893361058e-06, "loss": 0.0392, "step": 24730 }, { "epoch": 3.6744393286796377, "grad_norm": 0.7873642444610596, "learning_rate": 6.325560671320363e-06, "loss": 0.0755, "step": 24740 }, { "epoch": 3.6759245507203326, "grad_norm": 0.5620073080062866, "learning_rate": 6.324075449279668e-06, "loss": 0.0514, "step": 24750 }, { "epoch": 3.6774097727610275, "grad_norm": 0.9963449835777283, "learning_rate": 6.322590227238973e-06, "loss": 0.0903, "step": 24760 }, { "epoch": 3.678894994801723, "grad_norm": 0.5815417766571045, "learning_rate": 6.321105005198278e-06, "loss": 0.0651, "step": 24770 }, { "epoch": 3.680380216842418, "grad_norm": 0.6440846920013428, "learning_rate": 6.319619783157582e-06, "loss": 0.0617, "step": 24780 }, { "epoch": 3.681865438883113, "grad_norm": 1.67220139503479, "learning_rate": 6.318134561116888e-06, "loss": 0.0821, "step": 24790 }, { "epoch": 3.683350660923808, "grad_norm": 0.5515984296798706, "learning_rate": 6.316649339076193e-06, "loss": 0.0559, "step": 24800 }, { "epoch": 3.684835882964503, "grad_norm": 0.6719434857368469, "learning_rate": 6.315164117035497e-06, "loss": 0.048, "step": 24810 }, { "epoch": 3.6863211050051983, "grad_norm": 0.9868667721748352, "learning_rate": 6.313678894994802e-06, "loss": 0.0769, "step": 24820 }, { "epoch": 3.6878063270458936, "grad_norm": 0.6065626740455627, "learning_rate": 6.312193672954107e-06, "loss": 0.0749, "step": 24830 }, { "epoch": 3.6892915490865885, "grad_norm": 0.5721997618675232, "learning_rate": 6.310708450913412e-06, "loss": 0.0541, "step": 24840 }, { "epoch": 3.6907767711272834, "grad_norm": 0.5374056100845337, "learning_rate": 6.309223228872717e-06, "loss": 0.058, "step": 24850 }, { "epoch": 3.6922619931679788, "grad_norm": 0.7645929455757141, "learning_rate": 6.307738006832022e-06, "loss": 0.0908, "step": 24860 }, { "epoch": 3.6937472152086737, "grad_norm": 0.4349495470523834, "learning_rate": 6.306252784791327e-06, "loss": 0.0519, "step": 24870 }, { "epoch": 3.695232437249369, "grad_norm": 0.8713021278381348, "learning_rate": 6.304767562750632e-06, "loss": 0.0636, "step": 24880 }, { "epoch": 3.696717659290064, "grad_norm": 1.642775535583496, "learning_rate": 6.303282340709937e-06, "loss": 0.0604, "step": 24890 }, { "epoch": 3.698202881330759, "grad_norm": 1.0799657106399536, "learning_rate": 6.301797118669242e-06, "loss": 0.0684, "step": 24900 }, { "epoch": 3.699688103371454, "grad_norm": 1.1717464923858643, "learning_rate": 6.300311896628547e-06, "loss": 0.0724, "step": 24910 }, { "epoch": 3.701173325412149, "grad_norm": 0.6183547973632812, "learning_rate": 6.298826674587852e-06, "loss": 0.0655, "step": 24920 }, { "epoch": 3.7026585474528444, "grad_norm": 1.1880210638046265, "learning_rate": 6.297341452547157e-06, "loss": 0.0579, "step": 24930 }, { "epoch": 3.7041437694935393, "grad_norm": 0.7819348573684692, "learning_rate": 6.2958562305064605e-06, "loss": 0.0859, "step": 24940 }, { "epoch": 3.705628991534234, "grad_norm": 0.2400822639465332, "learning_rate": 6.294371008465766e-06, "loss": 0.0696, "step": 24950 }, { "epoch": 3.7071142135749295, "grad_norm": 0.9348524212837219, "learning_rate": 6.292885786425071e-06, "loss": 0.0821, "step": 24960 }, { "epoch": 3.7085994356156244, "grad_norm": 0.6501826047897339, "learning_rate": 6.2914005643843755e-06, "loss": 0.0694, "step": 24970 }, { "epoch": 3.71008465765632, "grad_norm": 0.9616579413414001, "learning_rate": 6.289915342343681e-06, "loss": 0.0566, "step": 24980 }, { "epoch": 3.7115698796970147, "grad_norm": 0.6568595170974731, "learning_rate": 6.288430120302986e-06, "loss": 0.0726, "step": 24990 }, { "epoch": 3.7130551017377096, "grad_norm": 0.4263269305229187, "learning_rate": 6.2869448982622904e-06, "loss": 0.0647, "step": 25000 }, { "epoch": 3.714540323778405, "grad_norm": 0.9449558258056641, "learning_rate": 6.285459676221596e-06, "loss": 0.0471, "step": 25010 }, { "epoch": 3.7160255458191, "grad_norm": 0.7658859491348267, "learning_rate": 6.2839744541809e-06, "loss": 0.0626, "step": 25020 }, { "epoch": 3.717510767859795, "grad_norm": 1.5539830923080444, "learning_rate": 6.2824892321402054e-06, "loss": 0.0663, "step": 25030 }, { "epoch": 3.71899598990049, "grad_norm": 1.5915319919586182, "learning_rate": 6.281004010099511e-06, "loss": 0.0729, "step": 25040 }, { "epoch": 3.720481211941185, "grad_norm": 0.37841907143592834, "learning_rate": 6.279518788058815e-06, "loss": 0.0725, "step": 25050 }, { "epoch": 3.7219664339818803, "grad_norm": 1.779516339302063, "learning_rate": 6.2780335660181204e-06, "loss": 0.0609, "step": 25060 }, { "epoch": 3.723451656022575, "grad_norm": 0.7953817844390869, "learning_rate": 6.276548343977426e-06, "loss": 0.0695, "step": 25070 }, { "epoch": 3.7249368780632706, "grad_norm": 0.8810813426971436, "learning_rate": 6.27506312193673e-06, "loss": 0.0714, "step": 25080 }, { "epoch": 3.7264221001039655, "grad_norm": 1.2131332159042358, "learning_rate": 6.2735778998960354e-06, "loss": 0.0642, "step": 25090 }, { "epoch": 3.7279073221446604, "grad_norm": 1.101547360420227, "learning_rate": 6.272092677855341e-06, "loss": 0.0596, "step": 25100 }, { "epoch": 3.7293925441853557, "grad_norm": 0.7129486203193665, "learning_rate": 6.270607455814644e-06, "loss": 0.0804, "step": 25110 }, { "epoch": 3.730877766226051, "grad_norm": 0.33758345246315, "learning_rate": 6.26912223377395e-06, "loss": 0.0565, "step": 25120 }, { "epoch": 3.732362988266746, "grad_norm": 0.6880179047584534, "learning_rate": 6.267637011733254e-06, "loss": 0.0703, "step": 25130 }, { "epoch": 3.733848210307441, "grad_norm": 0.6616323590278625, "learning_rate": 6.266151789692559e-06, "loss": 0.0585, "step": 25140 }, { "epoch": 3.735333432348136, "grad_norm": 0.4032650887966156, "learning_rate": 6.2646665676518646e-06, "loss": 0.0564, "step": 25150 }, { "epoch": 3.736818654388831, "grad_norm": 0.6478084325790405, "learning_rate": 6.263181345611169e-06, "loss": 0.0631, "step": 25160 }, { "epoch": 3.7383038764295264, "grad_norm": 0.5424610376358032, "learning_rate": 6.261696123570474e-06, "loss": 0.0668, "step": 25170 }, { "epoch": 3.7397890984702213, "grad_norm": 0.5494372844696045, "learning_rate": 6.2602109015297796e-06, "loss": 0.075, "step": 25180 }, { "epoch": 3.7412743205109162, "grad_norm": 0.7170408964157104, "learning_rate": 6.258725679489084e-06, "loss": 0.0684, "step": 25190 }, { "epoch": 3.7427595425516116, "grad_norm": 1.0184485912322998, "learning_rate": 6.257240457448389e-06, "loss": 0.0612, "step": 25200 }, { "epoch": 3.7442447645923065, "grad_norm": 1.0246021747589111, "learning_rate": 6.2557552354076946e-06, "loss": 0.0739, "step": 25210 }, { "epoch": 3.745729986633002, "grad_norm": 0.57315993309021, "learning_rate": 6.254270013366999e-06, "loss": 0.056, "step": 25220 }, { "epoch": 3.7472152086736967, "grad_norm": 1.1567462682724, "learning_rate": 6.252784791326304e-06, "loss": 0.0506, "step": 25230 }, { "epoch": 3.7487004307143916, "grad_norm": 0.8292871117591858, "learning_rate": 6.251299569285608e-06, "loss": 0.074, "step": 25240 }, { "epoch": 3.750185652755087, "grad_norm": 0.37577736377716064, "learning_rate": 6.249814347244913e-06, "loss": 0.0611, "step": 25250 }, { "epoch": 3.751670874795782, "grad_norm": 0.8664168119430542, "learning_rate": 6.248329125204219e-06, "loss": 0.0701, "step": 25260 }, { "epoch": 3.753156096836477, "grad_norm": 0.5670032501220703, "learning_rate": 6.246843903163523e-06, "loss": 0.0905, "step": 25270 }, { "epoch": 3.754641318877172, "grad_norm": 1.0470136404037476, "learning_rate": 6.245358681122828e-06, "loss": 0.0677, "step": 25280 }, { "epoch": 3.756126540917867, "grad_norm": 0.6110045313835144, "learning_rate": 6.243873459082133e-06, "loss": 0.0588, "step": 25290 }, { "epoch": 3.7576117629585624, "grad_norm": 0.8389524221420288, "learning_rate": 6.242388237041438e-06, "loss": 0.0741, "step": 25300 }, { "epoch": 3.7590969849992573, "grad_norm": 1.4117416143417358, "learning_rate": 6.240903015000743e-06, "loss": 0.0738, "step": 25310 }, { "epoch": 3.7605822070399526, "grad_norm": 0.7682881355285645, "learning_rate": 6.239417792960048e-06, "loss": 0.0679, "step": 25320 }, { "epoch": 3.7620674290806475, "grad_norm": 0.460290789604187, "learning_rate": 6.237932570919353e-06, "loss": 0.0719, "step": 25330 }, { "epoch": 3.7635526511213424, "grad_norm": 0.7808231711387634, "learning_rate": 6.236447348878658e-06, "loss": 0.072, "step": 25340 }, { "epoch": 3.7650378731620378, "grad_norm": 0.7019503712654114, "learning_rate": 6.2349621268379626e-06, "loss": 0.0669, "step": 25350 }, { "epoch": 3.7665230952027327, "grad_norm": 0.4001297354698181, "learning_rate": 6.233476904797268e-06, "loss": 0.0469, "step": 25360 }, { "epoch": 3.768008317243428, "grad_norm": 0.43783921003341675, "learning_rate": 6.231991682756573e-06, "loss": 0.0635, "step": 25370 }, { "epoch": 3.769493539284123, "grad_norm": 2.0623133182525635, "learning_rate": 6.2305064607158776e-06, "loss": 0.0614, "step": 25380 }, { "epoch": 3.770978761324818, "grad_norm": 0.8735755681991577, "learning_rate": 6.229021238675183e-06, "loss": 0.0732, "step": 25390 }, { "epoch": 3.772463983365513, "grad_norm": 0.5950528979301453, "learning_rate": 6.227536016634488e-06, "loss": 0.0434, "step": 25400 }, { "epoch": 3.7739492054062085, "grad_norm": 1.145756483078003, "learning_rate": 6.226050794593792e-06, "loss": 0.0663, "step": 25410 }, { "epoch": 3.7754344274469034, "grad_norm": 0.9675944447517395, "learning_rate": 6.224565572553097e-06, "loss": 0.051, "step": 25420 }, { "epoch": 3.7769196494875983, "grad_norm": 0.4051510989665985, "learning_rate": 6.223080350512402e-06, "loss": 0.0643, "step": 25430 }, { "epoch": 3.7784048715282936, "grad_norm": 1.5336406230926514, "learning_rate": 6.221595128471707e-06, "loss": 0.0639, "step": 25440 }, { "epoch": 3.7798900935689885, "grad_norm": 1.1730071306228638, "learning_rate": 6.220109906431012e-06, "loss": 0.0547, "step": 25450 }, { "epoch": 3.781375315609684, "grad_norm": 0.4078196585178375, "learning_rate": 6.218624684390316e-06, "loss": 0.0613, "step": 25460 }, { "epoch": 3.7828605376503788, "grad_norm": 0.5529675483703613, "learning_rate": 6.217139462349622e-06, "loss": 0.0591, "step": 25470 }, { "epoch": 3.7843457596910737, "grad_norm": 0.8793727159500122, "learning_rate": 6.215654240308927e-06, "loss": 0.0599, "step": 25480 }, { "epoch": 3.785830981731769, "grad_norm": 0.46623972058296204, "learning_rate": 6.214169018268231e-06, "loss": 0.0668, "step": 25490 }, { "epoch": 3.787316203772464, "grad_norm": 0.8581152558326721, "learning_rate": 6.212683796227537e-06, "loss": 0.0548, "step": 25500 }, { "epoch": 3.7888014258131593, "grad_norm": 1.0149903297424316, "learning_rate": 6.211198574186842e-06, "loss": 0.0669, "step": 25510 }, { "epoch": 3.790286647853854, "grad_norm": 0.6500771045684814, "learning_rate": 6.209713352146146e-06, "loss": 0.0825, "step": 25520 }, { "epoch": 3.791771869894549, "grad_norm": 0.389143705368042, "learning_rate": 6.208228130105452e-06, "loss": 0.0635, "step": 25530 }, { "epoch": 3.7932570919352444, "grad_norm": 0.9267625212669373, "learning_rate": 6.206742908064755e-06, "loss": 0.0611, "step": 25540 }, { "epoch": 3.7947423139759393, "grad_norm": 0.9734875559806824, "learning_rate": 6.205257686024061e-06, "loss": 0.0596, "step": 25550 }, { "epoch": 3.7962275360166347, "grad_norm": 1.2023221254348755, "learning_rate": 6.203772463983367e-06, "loss": 0.0666, "step": 25560 }, { "epoch": 3.7977127580573296, "grad_norm": 0.41107192635536194, "learning_rate": 6.20228724194267e-06, "loss": 0.0587, "step": 25570 }, { "epoch": 3.7991979800980245, "grad_norm": 0.35149720311164856, "learning_rate": 6.2008020199019755e-06, "loss": 0.0672, "step": 25580 }, { "epoch": 3.80068320213872, "grad_norm": 0.6217876076698303, "learning_rate": 6.199316797861281e-06, "loss": 0.0551, "step": 25590 }, { "epoch": 3.8021684241794147, "grad_norm": 0.991555392742157, "learning_rate": 6.197831575820585e-06, "loss": 0.0549, "step": 25600 }, { "epoch": 3.80365364622011, "grad_norm": 0.6766214370727539, "learning_rate": 6.1963463537798905e-06, "loss": 0.0671, "step": 25610 }, { "epoch": 3.805138868260805, "grad_norm": 0.981841504573822, "learning_rate": 6.194861131739196e-06, "loss": 0.0826, "step": 25620 }, { "epoch": 3.8066240903015, "grad_norm": 0.48015373945236206, "learning_rate": 6.1933759096985e-06, "loss": 0.0626, "step": 25630 }, { "epoch": 3.808109312342195, "grad_norm": 1.0952637195587158, "learning_rate": 6.1918906876578055e-06, "loss": 0.0672, "step": 25640 }, { "epoch": 3.80959453438289, "grad_norm": 0.9464254379272461, "learning_rate": 6.19040546561711e-06, "loss": 0.0747, "step": 25650 }, { "epoch": 3.8110797564235854, "grad_norm": 0.8909411430358887, "learning_rate": 6.188920243576415e-06, "loss": 0.0625, "step": 25660 }, { "epoch": 3.8125649784642803, "grad_norm": 0.5844570994377136, "learning_rate": 6.1874350215357205e-06, "loss": 0.0595, "step": 25670 }, { "epoch": 3.8140502005049752, "grad_norm": 0.5538705587387085, "learning_rate": 6.185949799495025e-06, "loss": 0.0721, "step": 25680 }, { "epoch": 3.8155354225456706, "grad_norm": 1.0891708135604858, "learning_rate": 6.18446457745433e-06, "loss": 0.0582, "step": 25690 }, { "epoch": 3.817020644586366, "grad_norm": 1.3899599313735962, "learning_rate": 6.1829793554136355e-06, "loss": 0.066, "step": 25700 }, { "epoch": 3.818505866627061, "grad_norm": 1.2233537435531616, "learning_rate": 6.181494133372939e-06, "loss": 0.0601, "step": 25710 }, { "epoch": 3.8199910886677557, "grad_norm": 1.2534723281860352, "learning_rate": 6.180008911332244e-06, "loss": 0.0591, "step": 25720 }, { "epoch": 3.821476310708451, "grad_norm": 0.7622737288475037, "learning_rate": 6.1785236892915505e-06, "loss": 0.0481, "step": 25730 }, { "epoch": 3.822961532749146, "grad_norm": 0.5585414171218872, "learning_rate": 6.177038467250854e-06, "loss": 0.0593, "step": 25740 }, { "epoch": 3.8244467547898413, "grad_norm": 0.9674488306045532, "learning_rate": 6.175553245210159e-06, "loss": 0.0751, "step": 25750 }, { "epoch": 3.825931976830536, "grad_norm": 0.8868407607078552, "learning_rate": 6.174068023169464e-06, "loss": 0.0672, "step": 25760 }, { "epoch": 3.827417198871231, "grad_norm": 0.5498301386833191, "learning_rate": 6.172582801128769e-06, "loss": 0.0674, "step": 25770 }, { "epoch": 3.8289024209119265, "grad_norm": 0.7588393092155457, "learning_rate": 6.171097579088074e-06, "loss": 0.0926, "step": 25780 }, { "epoch": 3.8303876429526214, "grad_norm": 0.8187506794929504, "learning_rate": 6.169612357047379e-06, "loss": 0.0668, "step": 25790 }, { "epoch": 3.8318728649933167, "grad_norm": 1.0678049325942993, "learning_rate": 6.168127135006684e-06, "loss": 0.0522, "step": 25800 }, { "epoch": 3.8333580870340116, "grad_norm": 0.4664575755596161, "learning_rate": 6.166641912965989e-06, "loss": 0.0593, "step": 25810 }, { "epoch": 3.8348433090747065, "grad_norm": 0.5488105416297913, "learning_rate": 6.165156690925294e-06, "loss": 0.0727, "step": 25820 }, { "epoch": 3.836328531115402, "grad_norm": 1.337288737297058, "learning_rate": 6.163671468884599e-06, "loss": 0.0738, "step": 25830 }, { "epoch": 3.8378137531560967, "grad_norm": 0.7804925441741943, "learning_rate": 6.162186246843904e-06, "loss": 0.0574, "step": 25840 }, { "epoch": 3.839298975196792, "grad_norm": 0.784286379814148, "learning_rate": 6.160701024803209e-06, "loss": 0.0615, "step": 25850 }, { "epoch": 3.840784197237487, "grad_norm": 0.9319252371788025, "learning_rate": 6.159215802762514e-06, "loss": 0.0705, "step": 25860 }, { "epoch": 3.842269419278182, "grad_norm": 1.4271937608718872, "learning_rate": 6.157730580721818e-06, "loss": 0.065, "step": 25870 }, { "epoch": 3.8437546413188772, "grad_norm": 1.0302149057388306, "learning_rate": 6.156245358681123e-06, "loss": 0.0713, "step": 25880 }, { "epoch": 3.845239863359572, "grad_norm": 0.7671197056770325, "learning_rate": 6.154760136640428e-06, "loss": 0.0585, "step": 25890 }, { "epoch": 3.8467250854002675, "grad_norm": 0.9330741763114929, "learning_rate": 6.153274914599733e-06, "loss": 0.0688, "step": 25900 }, { "epoch": 3.8482103074409624, "grad_norm": 1.258764624595642, "learning_rate": 6.151789692559038e-06, "loss": 0.0862, "step": 25910 }, { "epoch": 3.8496955294816573, "grad_norm": 0.9332870841026306, "learning_rate": 6.150304470518343e-06, "loss": 0.0856, "step": 25920 }, { "epoch": 3.8511807515223526, "grad_norm": 0.8132265210151672, "learning_rate": 6.148819248477648e-06, "loss": 0.0626, "step": 25930 }, { "epoch": 3.8526659735630475, "grad_norm": 0.781076192855835, "learning_rate": 6.147334026436953e-06, "loss": 0.0516, "step": 25940 }, { "epoch": 3.854151195603743, "grad_norm": 0.8629059791564941, "learning_rate": 6.145848804396257e-06, "loss": 0.0451, "step": 25950 }, { "epoch": 3.8556364176444378, "grad_norm": 0.7136142253875732, "learning_rate": 6.144363582355563e-06, "loss": 0.0697, "step": 25960 }, { "epoch": 3.8571216396851327, "grad_norm": 1.0320565700531006, "learning_rate": 6.142878360314868e-06, "loss": 0.0697, "step": 25970 }, { "epoch": 3.858606861725828, "grad_norm": 0.40204280614852905, "learning_rate": 6.141393138274172e-06, "loss": 0.0508, "step": 25980 }, { "epoch": 3.8600920837665234, "grad_norm": 0.9110078811645508, "learning_rate": 6.139907916233478e-06, "loss": 0.0751, "step": 25990 }, { "epoch": 3.8615773058072183, "grad_norm": 1.3291436433792114, "learning_rate": 6.138422694192783e-06, "loss": 0.0579, "step": 26000 }, { "epoch": 3.863062527847913, "grad_norm": 1.4911785125732422, "learning_rate": 6.1369374721520865e-06, "loss": 0.0796, "step": 26010 }, { "epoch": 3.8645477498886085, "grad_norm": 0.4904058873653412, "learning_rate": 6.135452250111393e-06, "loss": 0.0502, "step": 26020 }, { "epoch": 3.8660329719293034, "grad_norm": 0.5274474024772644, "learning_rate": 6.133967028070698e-06, "loss": 0.0784, "step": 26030 }, { "epoch": 3.8675181939699987, "grad_norm": 0.5127277970314026, "learning_rate": 6.1324818060300015e-06, "loss": 0.0738, "step": 26040 }, { "epoch": 3.8690034160106936, "grad_norm": 1.0338975191116333, "learning_rate": 6.130996583989307e-06, "loss": 0.0605, "step": 26050 }, { "epoch": 3.8704886380513885, "grad_norm": 0.818281888961792, "learning_rate": 6.129511361948611e-06, "loss": 0.0635, "step": 26060 }, { "epoch": 3.871973860092084, "grad_norm": 0.5960169434547424, "learning_rate": 6.1280261399079165e-06, "loss": 0.0827, "step": 26070 }, { "epoch": 3.873459082132779, "grad_norm": 1.0408084392547607, "learning_rate": 6.126540917867222e-06, "loss": 0.0479, "step": 26080 }, { "epoch": 3.874944304173474, "grad_norm": 1.4356499910354614, "learning_rate": 6.125055695826526e-06, "loss": 0.07, "step": 26090 }, { "epoch": 3.876429526214169, "grad_norm": 0.6846652626991272, "learning_rate": 6.1235704737858315e-06, "loss": 0.0552, "step": 26100 }, { "epoch": 3.877914748254864, "grad_norm": 0.6793112754821777, "learning_rate": 6.122085251745137e-06, "loss": 0.0655, "step": 26110 }, { "epoch": 3.8793999702955593, "grad_norm": 0.874123215675354, "learning_rate": 6.120600029704441e-06, "loss": 0.0608, "step": 26120 }, { "epoch": 3.880885192336254, "grad_norm": 1.1491456031799316, "learning_rate": 6.1191148076637465e-06, "loss": 0.0554, "step": 26130 }, { "epoch": 3.8823704143769495, "grad_norm": 0.9040345549583435, "learning_rate": 6.117629585623052e-06, "loss": 0.0579, "step": 26140 }, { "epoch": 3.8838556364176444, "grad_norm": 0.6160182952880859, "learning_rate": 6.116144363582356e-06, "loss": 0.0575, "step": 26150 }, { "epoch": 3.8853408584583393, "grad_norm": 1.2270127534866333, "learning_rate": 6.1146591415416615e-06, "loss": 0.0522, "step": 26160 }, { "epoch": 3.8868260804990347, "grad_norm": 0.9320999979972839, "learning_rate": 6.113173919500965e-06, "loss": 0.0774, "step": 26170 }, { "epoch": 3.8883113025397296, "grad_norm": 0.8286004066467285, "learning_rate": 6.11168869746027e-06, "loss": 0.0503, "step": 26180 }, { "epoch": 3.889796524580425, "grad_norm": 0.9038022756576538, "learning_rate": 6.110203475419576e-06, "loss": 0.0749, "step": 26190 }, { "epoch": 3.89128174662112, "grad_norm": 0.6100136041641235, "learning_rate": 6.10871825337888e-06, "loss": 0.0528, "step": 26200 }, { "epoch": 3.8927669686618147, "grad_norm": 1.1859416961669922, "learning_rate": 6.107233031338185e-06, "loss": 0.0621, "step": 26210 }, { "epoch": 3.89425219070251, "grad_norm": 0.968455970287323, "learning_rate": 6.105747809297491e-06, "loss": 0.0569, "step": 26220 }, { "epoch": 3.895737412743205, "grad_norm": 1.1066442728042603, "learning_rate": 6.104262587256795e-06, "loss": 0.0631, "step": 26230 }, { "epoch": 3.8972226347839003, "grad_norm": 0.8778436779975891, "learning_rate": 6.1027773652161e-06, "loss": 0.0619, "step": 26240 }, { "epoch": 3.898707856824595, "grad_norm": 0.9055481553077698, "learning_rate": 6.101292143175406e-06, "loss": 0.0501, "step": 26250 }, { "epoch": 3.90019307886529, "grad_norm": 0.7718532681465149, "learning_rate": 6.09980692113471e-06, "loss": 0.0688, "step": 26260 }, { "epoch": 3.9016783009059854, "grad_norm": 0.8786125183105469, "learning_rate": 6.098321699094015e-06, "loss": 0.0654, "step": 26270 }, { "epoch": 3.903163522946681, "grad_norm": 0.44965681433677673, "learning_rate": 6.09683647705332e-06, "loss": 0.0662, "step": 26280 }, { "epoch": 3.9046487449873757, "grad_norm": 0.6755138039588928, "learning_rate": 6.095351255012625e-06, "loss": 0.0705, "step": 26290 }, { "epoch": 3.9061339670280706, "grad_norm": 1.2151280641555786, "learning_rate": 6.09386603297193e-06, "loss": 0.0818, "step": 26300 }, { "epoch": 3.907619189068766, "grad_norm": 0.6397557258605957, "learning_rate": 6.092380810931235e-06, "loss": 0.0674, "step": 26310 }, { "epoch": 3.909104411109461, "grad_norm": 1.2610584497451782, "learning_rate": 6.09089558889054e-06, "loss": 0.0693, "step": 26320 }, { "epoch": 3.910589633150156, "grad_norm": 0.6538786292076111, "learning_rate": 6.089410366849845e-06, "loss": 0.0702, "step": 26330 }, { "epoch": 3.912074855190851, "grad_norm": 0.6008743643760681, "learning_rate": 6.087925144809149e-06, "loss": 0.0579, "step": 26340 }, { "epoch": 3.913560077231546, "grad_norm": 0.9000145792961121, "learning_rate": 6.086439922768454e-06, "loss": 0.0758, "step": 26350 }, { "epoch": 3.9150452992722413, "grad_norm": 0.43842408061027527, "learning_rate": 6.0849547007277595e-06, "loss": 0.0753, "step": 26360 }, { "epoch": 3.916530521312936, "grad_norm": 0.8281410932540894, "learning_rate": 6.083469478687064e-06, "loss": 0.0577, "step": 26370 }, { "epoch": 3.9180157433536316, "grad_norm": 0.7675429582595825, "learning_rate": 6.081984256646369e-06, "loss": 0.0612, "step": 26380 }, { "epoch": 3.9195009653943265, "grad_norm": 0.4206182658672333, "learning_rate": 6.080499034605674e-06, "loss": 0.0548, "step": 26390 }, { "epoch": 3.9209861874350214, "grad_norm": 0.7839239239692688, "learning_rate": 6.079013812564979e-06, "loss": 0.0602, "step": 26400 }, { "epoch": 3.9224714094757167, "grad_norm": 0.8620349168777466, "learning_rate": 6.077528590524284e-06, "loss": 0.0631, "step": 26410 }, { "epoch": 3.9239566315164116, "grad_norm": 0.6361775994300842, "learning_rate": 6.076043368483589e-06, "loss": 0.055, "step": 26420 }, { "epoch": 3.925441853557107, "grad_norm": 0.4495631158351898, "learning_rate": 6.074558146442894e-06, "loss": 0.053, "step": 26430 }, { "epoch": 3.926927075597802, "grad_norm": 0.644170343875885, "learning_rate": 6.073072924402199e-06, "loss": 0.0694, "step": 26440 }, { "epoch": 3.9284122976384968, "grad_norm": 0.987910270690918, "learning_rate": 6.071587702361504e-06, "loss": 0.0593, "step": 26450 }, { "epoch": 3.929897519679192, "grad_norm": 1.5340524911880493, "learning_rate": 6.070102480320809e-06, "loss": 0.0793, "step": 26460 }, { "epoch": 3.931382741719887, "grad_norm": 0.858149528503418, "learning_rate": 6.0686172582801125e-06, "loss": 0.0695, "step": 26470 }, { "epoch": 3.9328679637605823, "grad_norm": 0.4782363474369049, "learning_rate": 6.067132036239418e-06, "loss": 0.0689, "step": 26480 }, { "epoch": 3.9343531858012772, "grad_norm": 1.1806187629699707, "learning_rate": 6.065646814198724e-06, "loss": 0.0556, "step": 26490 }, { "epoch": 3.935838407841972, "grad_norm": 0.8682441711425781, "learning_rate": 6.0641615921580275e-06, "loss": 0.064, "step": 26500 }, { "epoch": 3.9373236298826675, "grad_norm": 0.6038737297058105, "learning_rate": 6.062676370117333e-06, "loss": 0.0628, "step": 26510 }, { "epoch": 3.9388088519233624, "grad_norm": 0.9265310764312744, "learning_rate": 6.061191148076638e-06, "loss": 0.0579, "step": 26520 }, { "epoch": 3.9402940739640577, "grad_norm": 1.1472411155700684, "learning_rate": 6.0597059260359424e-06, "loss": 0.0435, "step": 26530 }, { "epoch": 3.9417792960047526, "grad_norm": 0.5956252217292786, "learning_rate": 6.058220703995248e-06, "loss": 0.0651, "step": 26540 }, { "epoch": 3.9432645180454475, "grad_norm": 1.012588381767273, "learning_rate": 6.056735481954553e-06, "loss": 0.0659, "step": 26550 }, { "epoch": 3.944749740086143, "grad_norm": 0.8449077606201172, "learning_rate": 6.0552502599138574e-06, "loss": 0.0476, "step": 26560 }, { "epoch": 3.946234962126838, "grad_norm": 1.4247989654541016, "learning_rate": 6.053765037873163e-06, "loss": 0.0605, "step": 26570 }, { "epoch": 3.947720184167533, "grad_norm": 0.7127845883369446, "learning_rate": 6.052279815832467e-06, "loss": 0.0606, "step": 26580 }, { "epoch": 3.949205406208228, "grad_norm": 0.822958767414093, "learning_rate": 6.0507945937917724e-06, "loss": 0.0768, "step": 26590 }, { "epoch": 3.9506906282489234, "grad_norm": 1.0008773803710938, "learning_rate": 6.049309371751078e-06, "loss": 0.066, "step": 26600 }, { "epoch": 3.9521758502896183, "grad_norm": 0.5226492881774902, "learning_rate": 6.047824149710382e-06, "loss": 0.0507, "step": 26610 }, { "epoch": 3.9536610723303136, "grad_norm": 0.5270611047744751, "learning_rate": 6.0463389276696874e-06, "loss": 0.0596, "step": 26620 }, { "epoch": 3.9551462943710085, "grad_norm": 0.9116948843002319, "learning_rate": 6.044853705628993e-06, "loss": 0.0562, "step": 26630 }, { "epoch": 3.9566315164117034, "grad_norm": 0.8059374690055847, "learning_rate": 6.043368483588296e-06, "loss": 0.0457, "step": 26640 }, { "epoch": 3.9581167384523988, "grad_norm": 0.9144610166549683, "learning_rate": 6.041883261547602e-06, "loss": 0.0628, "step": 26650 }, { "epoch": 3.9596019604930937, "grad_norm": 0.5333276987075806, "learning_rate": 6.040398039506908e-06, "loss": 0.0497, "step": 26660 }, { "epoch": 3.961087182533789, "grad_norm": 1.1043226718902588, "learning_rate": 6.038912817466211e-06, "loss": 0.0534, "step": 26670 }, { "epoch": 3.962572404574484, "grad_norm": 0.7918885350227356, "learning_rate": 6.0374275954255166e-06, "loss": 0.062, "step": 26680 }, { "epoch": 3.964057626615179, "grad_norm": 1.52443528175354, "learning_rate": 6.035942373384821e-06, "loss": 0.0582, "step": 26690 }, { "epoch": 3.965542848655874, "grad_norm": 0.8226548433303833, "learning_rate": 6.034457151344126e-06, "loss": 0.0812, "step": 26700 }, { "epoch": 3.967028070696569, "grad_norm": 0.8545466661453247, "learning_rate": 6.0329719293034316e-06, "loss": 0.0554, "step": 26710 }, { "epoch": 3.9685132927372644, "grad_norm": 0.7938420176506042, "learning_rate": 6.031486707262736e-06, "loss": 0.0591, "step": 26720 }, { "epoch": 3.9699985147779593, "grad_norm": 1.5926915407180786, "learning_rate": 6.030001485222041e-06, "loss": 0.0677, "step": 26730 }, { "epoch": 3.971483736818654, "grad_norm": 0.5737379789352417, "learning_rate": 6.0285162631813466e-06, "loss": 0.0417, "step": 26740 }, { "epoch": 3.9729689588593495, "grad_norm": 0.7223270535469055, "learning_rate": 6.027031041140651e-06, "loss": 0.0727, "step": 26750 }, { "epoch": 3.9744541809000444, "grad_norm": 0.7630795836448669, "learning_rate": 6.025545819099956e-06, "loss": 0.0925, "step": 26760 }, { "epoch": 3.9759394029407398, "grad_norm": 1.0278931856155396, "learning_rate": 6.0240605970592616e-06, "loss": 0.0719, "step": 26770 }, { "epoch": 3.9774246249814347, "grad_norm": 1.7548854351043701, "learning_rate": 6.022575375018566e-06, "loss": 0.074, "step": 26780 }, { "epoch": 3.9789098470221296, "grad_norm": 0.629660427570343, "learning_rate": 6.021090152977871e-06, "loss": 0.0706, "step": 26790 }, { "epoch": 3.980395069062825, "grad_norm": 0.7200632095336914, "learning_rate": 6.019604930937175e-06, "loss": 0.0597, "step": 26800 }, { "epoch": 3.98188029110352, "grad_norm": 0.6580965518951416, "learning_rate": 6.01811970889648e-06, "loss": 0.055, "step": 26810 }, { "epoch": 3.983365513144215, "grad_norm": 0.9431211352348328, "learning_rate": 6.016634486855785e-06, "loss": 0.0658, "step": 26820 }, { "epoch": 3.98485073518491, "grad_norm": 1.132131576538086, "learning_rate": 6.01514926481509e-06, "loss": 0.0537, "step": 26830 }, { "epoch": 3.986335957225605, "grad_norm": 1.0278289318084717, "learning_rate": 6.013664042774395e-06, "loss": 0.0734, "step": 26840 }, { "epoch": 3.9878211792663003, "grad_norm": 0.5455945134162903, "learning_rate": 6.0121788207337e-06, "loss": 0.0656, "step": 26850 }, { "epoch": 3.9893064013069957, "grad_norm": 0.41397568583488464, "learning_rate": 6.010693598693005e-06, "loss": 0.0749, "step": 26860 }, { "epoch": 3.9907916233476906, "grad_norm": 1.662298560142517, "learning_rate": 6.00920837665231e-06, "loss": 0.0708, "step": 26870 }, { "epoch": 3.9922768453883855, "grad_norm": 0.9174128174781799, "learning_rate": 6.007723154611615e-06, "loss": 0.0524, "step": 26880 }, { "epoch": 3.993762067429081, "grad_norm": 1.349379539489746, "learning_rate": 6.00623793257092e-06, "loss": 0.0649, "step": 26890 }, { "epoch": 3.9952472894697757, "grad_norm": 0.9090511798858643, "learning_rate": 6.004752710530225e-06, "loss": 0.082, "step": 26900 }, { "epoch": 3.996732511510471, "grad_norm": 1.657594084739685, "learning_rate": 6.0032674884895296e-06, "loss": 0.0625, "step": 26910 }, { "epoch": 3.998217733551166, "grad_norm": 0.5109291672706604, "learning_rate": 6.001782266448835e-06, "loss": 0.0517, "step": 26920 }, { "epoch": 3.999702955591861, "grad_norm": 0.8310036659240723, "learning_rate": 6.00029704440814e-06, "loss": 0.0534, "step": 26930 }, { "epoch": 4.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.05716124549508095, "eval_runtime": 211.3669, "eval_samples_per_second": 179.872, "eval_steps_per_second": 5.625, "step": 26932 }, { "epoch": 4.001188177632556, "grad_norm": 0.7958173751831055, "learning_rate": 5.998811822367444e-06, "loss": 0.0648, "step": 26940 }, { "epoch": 4.0026733996732515, "grad_norm": 1.0557328462600708, "learning_rate": 5.99732660032675e-06, "loss": 0.0706, "step": 26950 }, { "epoch": 4.004158621713946, "grad_norm": 0.9724434018135071, "learning_rate": 5.995841378286055e-06, "loss": 0.0643, "step": 26960 }, { "epoch": 4.005643843754641, "grad_norm": 0.3466469645500183, "learning_rate": 5.994356156245359e-06, "loss": 0.0644, "step": 26970 }, { "epoch": 4.007129065795336, "grad_norm": 1.2057231664657593, "learning_rate": 5.992870934204664e-06, "loss": 0.0583, "step": 26980 }, { "epoch": 4.008614287836031, "grad_norm": 0.6493140459060669, "learning_rate": 5.991385712163968e-06, "loss": 0.054, "step": 26990 }, { "epoch": 4.010099509876727, "grad_norm": 1.1638193130493164, "learning_rate": 5.989900490123274e-06, "loss": 0.0723, "step": 27000 }, { "epoch": 4.011584731917422, "grad_norm": 1.2872439622879028, "learning_rate": 5.988415268082579e-06, "loss": 0.062, "step": 27010 }, { "epoch": 4.013069953958117, "grad_norm": 0.550545871257782, "learning_rate": 5.986930046041883e-06, "loss": 0.0556, "step": 27020 }, { "epoch": 4.014555175998812, "grad_norm": 0.6696489453315735, "learning_rate": 5.985444824001189e-06, "loss": 0.0567, "step": 27030 }, { "epoch": 4.0160403980395065, "grad_norm": 0.6009332537651062, "learning_rate": 5.983959601960494e-06, "loss": 0.065, "step": 27040 }, { "epoch": 4.017525620080202, "grad_norm": 0.652777373790741, "learning_rate": 5.982474379919798e-06, "loss": 0.0667, "step": 27050 }, { "epoch": 4.019010842120897, "grad_norm": 1.0050688982009888, "learning_rate": 5.980989157879104e-06, "loss": 0.0524, "step": 27060 }, { "epoch": 4.020496064161592, "grad_norm": 0.7190169095993042, "learning_rate": 5.979503935838409e-06, "loss": 0.0644, "step": 27070 }, { "epoch": 4.021981286202287, "grad_norm": 0.6970319151878357, "learning_rate": 5.978018713797713e-06, "loss": 0.0602, "step": 27080 }, { "epoch": 4.023466508242982, "grad_norm": 0.42795783281326294, "learning_rate": 5.976533491757019e-06, "loss": 0.0629, "step": 27090 }, { "epoch": 4.024951730283678, "grad_norm": 0.6158421635627747, "learning_rate": 5.975048269716322e-06, "loss": 0.0759, "step": 27100 }, { "epoch": 4.026436952324373, "grad_norm": 1.4070188999176025, "learning_rate": 5.9735630476756275e-06, "loss": 0.0618, "step": 27110 }, { "epoch": 4.0279221743650675, "grad_norm": 0.845020055770874, "learning_rate": 5.972077825634933e-06, "loss": 0.0675, "step": 27120 }, { "epoch": 4.029407396405762, "grad_norm": 0.945316731929779, "learning_rate": 5.970592603594237e-06, "loss": 0.0632, "step": 27130 }, { "epoch": 4.030892618446458, "grad_norm": 0.66912442445755, "learning_rate": 5.9691073815535425e-06, "loss": 0.0911, "step": 27140 }, { "epoch": 4.032377840487153, "grad_norm": 0.8626250624656677, "learning_rate": 5.967622159512848e-06, "loss": 0.0583, "step": 27150 }, { "epoch": 4.033863062527848, "grad_norm": 0.6361261606216431, "learning_rate": 5.966136937472152e-06, "loss": 0.0744, "step": 27160 }, { "epoch": 4.035348284568543, "grad_norm": 0.5740983486175537, "learning_rate": 5.9646517154314575e-06, "loss": 0.0663, "step": 27170 }, { "epoch": 4.036833506609238, "grad_norm": 1.0047610998153687, "learning_rate": 5.963166493390763e-06, "loss": 0.0693, "step": 27180 }, { "epoch": 4.038318728649934, "grad_norm": 0.9337833523750305, "learning_rate": 5.961681271350067e-06, "loss": 0.0608, "step": 27190 }, { "epoch": 4.0398039506906285, "grad_norm": 0.8169094324111938, "learning_rate": 5.9601960493093725e-06, "loss": 0.0591, "step": 27200 }, { "epoch": 4.041289172731323, "grad_norm": 0.7001012563705444, "learning_rate": 5.958710827268677e-06, "loss": 0.0627, "step": 27210 }, { "epoch": 4.042774394772018, "grad_norm": 0.5708398222923279, "learning_rate": 5.957225605227982e-06, "loss": 0.0448, "step": 27220 }, { "epoch": 4.044259616812713, "grad_norm": 0.5236473083496094, "learning_rate": 5.9557403831872875e-06, "loss": 0.0596, "step": 27230 }, { "epoch": 4.045744838853409, "grad_norm": 0.7374882698059082, "learning_rate": 5.954255161146591e-06, "loss": 0.0631, "step": 27240 }, { "epoch": 4.047230060894104, "grad_norm": 0.6501604318618774, "learning_rate": 5.952769939105897e-06, "loss": 0.0814, "step": 27250 }, { "epoch": 4.048715282934799, "grad_norm": 0.6356256604194641, "learning_rate": 5.9512847170652025e-06, "loss": 0.0734, "step": 27260 }, { "epoch": 4.050200504975494, "grad_norm": 1.1191198825836182, "learning_rate": 5.949799495024506e-06, "loss": 0.0536, "step": 27270 }, { "epoch": 4.051685727016189, "grad_norm": 1.2817082405090332, "learning_rate": 5.948314272983811e-06, "loss": 0.0651, "step": 27280 }, { "epoch": 4.053170949056884, "grad_norm": 0.793324887752533, "learning_rate": 5.946829050943117e-06, "loss": 0.0629, "step": 27290 }, { "epoch": 4.054656171097579, "grad_norm": 0.8034148216247559, "learning_rate": 5.945343828902421e-06, "loss": 0.0558, "step": 27300 }, { "epoch": 4.056141393138274, "grad_norm": 0.8231066465377808, "learning_rate": 5.943858606861726e-06, "loss": 0.0643, "step": 27310 }, { "epoch": 4.057626615178969, "grad_norm": 0.7635292410850525, "learning_rate": 5.942373384821031e-06, "loss": 0.076, "step": 27320 }, { "epoch": 4.059111837219664, "grad_norm": 0.9660631418228149, "learning_rate": 5.940888162780336e-06, "loss": 0.0491, "step": 27330 }, { "epoch": 4.06059705926036, "grad_norm": 1.2343653440475464, "learning_rate": 5.939402940739641e-06, "loss": 0.089, "step": 27340 }, { "epoch": 4.062082281301055, "grad_norm": 1.1457990407943726, "learning_rate": 5.937917718698946e-06, "loss": 0.0544, "step": 27350 }, { "epoch": 4.0635675033417495, "grad_norm": 0.5646716356277466, "learning_rate": 5.936432496658251e-06, "loss": 0.0587, "step": 27360 }, { "epoch": 4.065052725382444, "grad_norm": 0.5194732546806335, "learning_rate": 5.934947274617556e-06, "loss": 0.0533, "step": 27370 }, { "epoch": 4.066537947423139, "grad_norm": 1.2401492595672607, "learning_rate": 5.933462052576861e-06, "loss": 0.0585, "step": 27380 }, { "epoch": 4.068023169463835, "grad_norm": 0.6900237202644348, "learning_rate": 5.931976830536166e-06, "loss": 0.0612, "step": 27390 }, { "epoch": 4.06950839150453, "grad_norm": 1.1152747869491577, "learning_rate": 5.93049160849547e-06, "loss": 0.0562, "step": 27400 }, { "epoch": 4.070993613545225, "grad_norm": 1.0916218757629395, "learning_rate": 5.929006386454775e-06, "loss": 0.0718, "step": 27410 }, { "epoch": 4.07247883558592, "grad_norm": 0.3414757549762726, "learning_rate": 5.927521164414081e-06, "loss": 0.0556, "step": 27420 }, { "epoch": 4.073964057626615, "grad_norm": 0.3224826753139496, "learning_rate": 5.926035942373385e-06, "loss": 0.0722, "step": 27430 }, { "epoch": 4.0754492796673105, "grad_norm": 0.6068642139434814, "learning_rate": 5.92455072033269e-06, "loss": 0.0645, "step": 27440 }, { "epoch": 4.076934501708005, "grad_norm": 0.46677640080451965, "learning_rate": 5.923065498291995e-06, "loss": 0.0769, "step": 27450 }, { "epoch": 4.0784197237487, "grad_norm": 0.8329014778137207, "learning_rate": 5.9215802762513e-06, "loss": 0.0581, "step": 27460 }, { "epoch": 4.079904945789395, "grad_norm": 0.8308506011962891, "learning_rate": 5.920095054210605e-06, "loss": 0.0844, "step": 27470 }, { "epoch": 4.081390167830091, "grad_norm": 0.7580461502075195, "learning_rate": 5.91860983216991e-06, "loss": 0.0577, "step": 27480 }, { "epoch": 4.082875389870786, "grad_norm": 0.7593045234680176, "learning_rate": 5.917124610129215e-06, "loss": 0.0696, "step": 27490 }, { "epoch": 4.084360611911481, "grad_norm": 1.2238883972167969, "learning_rate": 5.91563938808852e-06, "loss": 0.0808, "step": 27500 }, { "epoch": 4.085845833952176, "grad_norm": 0.7654632329940796, "learning_rate": 5.914154166047824e-06, "loss": 0.0598, "step": 27510 }, { "epoch": 4.087331055992871, "grad_norm": 1.2114931344985962, "learning_rate": 5.91266894400713e-06, "loss": 0.0651, "step": 27520 }, { "epoch": 4.088816278033566, "grad_norm": 0.4927174746990204, "learning_rate": 5.911183721966435e-06, "loss": 0.0679, "step": 27530 }, { "epoch": 4.090301500074261, "grad_norm": 1.2465150356292725, "learning_rate": 5.909698499925739e-06, "loss": 0.0568, "step": 27540 }, { "epoch": 4.091786722114956, "grad_norm": 0.47399216890335083, "learning_rate": 5.908213277885045e-06, "loss": 0.0677, "step": 27550 }, { "epoch": 4.093271944155651, "grad_norm": 0.7320268750190735, "learning_rate": 5.90672805584435e-06, "loss": 0.0872, "step": 27560 }, { "epoch": 4.094757166196346, "grad_norm": 0.4289158582687378, "learning_rate": 5.9052428338036535e-06, "loss": 0.0718, "step": 27570 }, { "epoch": 4.096242388237042, "grad_norm": 0.8013712763786316, "learning_rate": 5.903757611762959e-06, "loss": 0.0584, "step": 27580 }, { "epoch": 4.097727610277737, "grad_norm": 0.7357133030891418, "learning_rate": 5.902272389722264e-06, "loss": 0.0568, "step": 27590 }, { "epoch": 4.099212832318432, "grad_norm": 0.7965511083602905, "learning_rate": 5.9007871676815685e-06, "loss": 0.0616, "step": 27600 }, { "epoch": 4.1006980543591265, "grad_norm": 0.9441985487937927, "learning_rate": 5.899301945640874e-06, "loss": 0.0695, "step": 27610 }, { "epoch": 4.102183276399821, "grad_norm": 0.781085193157196, "learning_rate": 5.897816723600178e-06, "loss": 0.0538, "step": 27620 }, { "epoch": 4.103668498440517, "grad_norm": 0.7707614302635193, "learning_rate": 5.8963315015594835e-06, "loss": 0.0568, "step": 27630 }, { "epoch": 4.105153720481212, "grad_norm": 0.6148474216461182, "learning_rate": 5.894846279518789e-06, "loss": 0.0533, "step": 27640 }, { "epoch": 4.106638942521907, "grad_norm": 0.46605074405670166, "learning_rate": 5.893361057478093e-06, "loss": 0.0554, "step": 27650 }, { "epoch": 4.108124164562602, "grad_norm": 1.4958701133728027, "learning_rate": 5.8918758354373985e-06, "loss": 0.0696, "step": 27660 }, { "epoch": 4.109609386603297, "grad_norm": 0.6936403512954712, "learning_rate": 5.890390613396704e-06, "loss": 0.0623, "step": 27670 }, { "epoch": 4.111094608643993, "grad_norm": 1.3368117809295654, "learning_rate": 5.888905391356008e-06, "loss": 0.0742, "step": 27680 }, { "epoch": 4.1125798306846875, "grad_norm": 1.0717170238494873, "learning_rate": 5.8874201693153135e-06, "loss": 0.0521, "step": 27690 }, { "epoch": 4.114065052725382, "grad_norm": 1.2059624195098877, "learning_rate": 5.885934947274619e-06, "loss": 0.0775, "step": 27700 }, { "epoch": 4.115550274766077, "grad_norm": 0.5689356923103333, "learning_rate": 5.884449725233923e-06, "loss": 0.0597, "step": 27710 }, { "epoch": 4.117035496806773, "grad_norm": 1.1349416971206665, "learning_rate": 5.8829645031932285e-06, "loss": 0.0588, "step": 27720 }, { "epoch": 4.118520718847468, "grad_norm": 0.9291647672653198, "learning_rate": 5.881479281152532e-06, "loss": 0.0506, "step": 27730 }, { "epoch": 4.120005940888163, "grad_norm": 0.9654160141944885, "learning_rate": 5.879994059111837e-06, "loss": 0.0616, "step": 27740 }, { "epoch": 4.121491162928858, "grad_norm": 0.6678839921951294, "learning_rate": 5.878508837071143e-06, "loss": 0.0505, "step": 27750 }, { "epoch": 4.122976384969553, "grad_norm": 0.7414665222167969, "learning_rate": 5.877023615030447e-06, "loss": 0.0543, "step": 27760 }, { "epoch": 4.124461607010248, "grad_norm": 1.0471031665802002, "learning_rate": 5.875538392989752e-06, "loss": 0.0598, "step": 27770 }, { "epoch": 4.125946829050943, "grad_norm": 0.4374360740184784, "learning_rate": 5.874053170949058e-06, "loss": 0.0503, "step": 27780 }, { "epoch": 4.127432051091638, "grad_norm": 1.4006041288375854, "learning_rate": 5.872567948908362e-06, "loss": 0.0502, "step": 27790 }, { "epoch": 4.128917273132333, "grad_norm": 0.6150054335594177, "learning_rate": 5.871082726867667e-06, "loss": 0.0639, "step": 27800 }, { "epoch": 4.130402495173028, "grad_norm": 0.6856205463409424, "learning_rate": 5.869597504826973e-06, "loss": 0.0635, "step": 27810 }, { "epoch": 4.131887717213724, "grad_norm": 0.5962873101234436, "learning_rate": 5.868112282786277e-06, "loss": 0.0538, "step": 27820 }, { "epoch": 4.133372939254419, "grad_norm": 1.0894391536712646, "learning_rate": 5.866627060745582e-06, "loss": 0.053, "step": 27830 }, { "epoch": 4.134858161295114, "grad_norm": 0.7894336581230164, "learning_rate": 5.865141838704887e-06, "loss": 0.0634, "step": 27840 }, { "epoch": 4.1363433833358085, "grad_norm": 0.5546037554740906, "learning_rate": 5.863656616664192e-06, "loss": 0.057, "step": 27850 }, { "epoch": 4.137828605376503, "grad_norm": 0.5174780488014221, "learning_rate": 5.862171394623497e-06, "loss": 0.0778, "step": 27860 }, { "epoch": 4.139313827417199, "grad_norm": 0.5153068900108337, "learning_rate": 5.860686172582801e-06, "loss": 0.0575, "step": 27870 }, { "epoch": 4.140799049457894, "grad_norm": 0.936219334602356, "learning_rate": 5.859200950542106e-06, "loss": 0.0567, "step": 27880 }, { "epoch": 4.142284271498589, "grad_norm": 0.8427369594573975, "learning_rate": 5.857715728501412e-06, "loss": 0.0647, "step": 27890 }, { "epoch": 4.143769493539284, "grad_norm": 0.8495630621910095, "learning_rate": 5.856230506460716e-06, "loss": 0.0648, "step": 27900 }, { "epoch": 4.145254715579979, "grad_norm": 1.1603466272354126, "learning_rate": 5.854745284420021e-06, "loss": 0.0584, "step": 27910 }, { "epoch": 4.146739937620675, "grad_norm": 0.9675669074058533, "learning_rate": 5.853260062379326e-06, "loss": 0.051, "step": 27920 }, { "epoch": 4.1482251596613695, "grad_norm": 0.7146968841552734, "learning_rate": 5.851774840338631e-06, "loss": 0.0536, "step": 27930 }, { "epoch": 4.149710381702064, "grad_norm": 0.6189327239990234, "learning_rate": 5.850289618297936e-06, "loss": 0.0807, "step": 27940 }, { "epoch": 4.151195603742759, "grad_norm": 1.1867696046829224, "learning_rate": 5.848804396257241e-06, "loss": 0.0652, "step": 27950 }, { "epoch": 4.152680825783454, "grad_norm": 1.0013492107391357, "learning_rate": 5.847319174216546e-06, "loss": 0.0767, "step": 27960 }, { "epoch": 4.15416604782415, "grad_norm": 0.9447083473205566, "learning_rate": 5.845833952175851e-06, "loss": 0.0527, "step": 27970 }, { "epoch": 4.155651269864845, "grad_norm": 0.6696498990058899, "learning_rate": 5.844348730135156e-06, "loss": 0.0426, "step": 27980 }, { "epoch": 4.15713649190554, "grad_norm": 0.5020086169242859, "learning_rate": 5.842863508094461e-06, "loss": 0.0635, "step": 27990 }, { "epoch": 4.158621713946235, "grad_norm": 0.3135513961315155, "learning_rate": 5.841378286053766e-06, "loss": 0.0669, "step": 28000 }, { "epoch": 4.16010693598693, "grad_norm": 0.532892107963562, "learning_rate": 5.839893064013071e-06, "loss": 0.0479, "step": 28010 }, { "epoch": 4.161592158027625, "grad_norm": 0.9354525804519653, "learning_rate": 5.838407841972376e-06, "loss": 0.0716, "step": 28020 }, { "epoch": 4.16307738006832, "grad_norm": 0.5970202684402466, "learning_rate": 5.8369226199316795e-06, "loss": 0.0754, "step": 28030 }, { "epoch": 4.164562602109015, "grad_norm": 0.3913607597351074, "learning_rate": 5.835437397890985e-06, "loss": 0.0508, "step": 28040 }, { "epoch": 4.16604782414971, "grad_norm": 0.6784725785255432, "learning_rate": 5.83395217585029e-06, "loss": 0.065, "step": 28050 }, { "epoch": 4.167533046190406, "grad_norm": 0.5708526968955994, "learning_rate": 5.8324669538095944e-06, "loss": 0.0527, "step": 28060 }, { "epoch": 4.169018268231101, "grad_norm": 0.23578904569149017, "learning_rate": 5.8309817317689e-06, "loss": 0.0572, "step": 28070 }, { "epoch": 4.170503490271796, "grad_norm": 0.5932421088218689, "learning_rate": 5.829496509728205e-06, "loss": 0.0754, "step": 28080 }, { "epoch": 4.171988712312491, "grad_norm": 0.3044351041316986, "learning_rate": 5.8280112876875094e-06, "loss": 0.0674, "step": 28090 }, { "epoch": 4.1734739343531855, "grad_norm": 0.8977261185646057, "learning_rate": 5.826526065646815e-06, "loss": 0.0568, "step": 28100 }, { "epoch": 4.174959156393881, "grad_norm": 0.9061443209648132, "learning_rate": 5.82504084360612e-06, "loss": 0.0864, "step": 28110 }, { "epoch": 4.176444378434576, "grad_norm": 0.8516085147857666, "learning_rate": 5.8235556215654244e-06, "loss": 0.0659, "step": 28120 }, { "epoch": 4.177929600475271, "grad_norm": 0.7346743941307068, "learning_rate": 5.82207039952473e-06, "loss": 0.0557, "step": 28130 }, { "epoch": 4.179414822515966, "grad_norm": 0.5565342903137207, "learning_rate": 5.820585177484034e-06, "loss": 0.0747, "step": 28140 }, { "epoch": 4.180900044556661, "grad_norm": 1.4357632398605347, "learning_rate": 5.8190999554433394e-06, "loss": 0.0723, "step": 28150 }, { "epoch": 4.182385266597357, "grad_norm": 0.7222462296485901, "learning_rate": 5.817614733402645e-06, "loss": 0.0552, "step": 28160 }, { "epoch": 4.1838704886380516, "grad_norm": 1.0323468446731567, "learning_rate": 5.816129511361948e-06, "loss": 0.0525, "step": 28170 }, { "epoch": 4.1853557106787465, "grad_norm": 0.4670025110244751, "learning_rate": 5.8146442893212544e-06, "loss": 0.0661, "step": 28180 }, { "epoch": 4.186840932719441, "grad_norm": 1.2583537101745605, "learning_rate": 5.81315906728056e-06, "loss": 0.0655, "step": 28190 }, { "epoch": 4.188326154760136, "grad_norm": 0.6650807857513428, "learning_rate": 5.811673845239863e-06, "loss": 0.0748, "step": 28200 }, { "epoch": 4.189811376800832, "grad_norm": 1.1885240077972412, "learning_rate": 5.8101886231991686e-06, "loss": 0.0543, "step": 28210 }, { "epoch": 4.191296598841527, "grad_norm": 0.7857943773269653, "learning_rate": 5.808703401158474e-06, "loss": 0.0827, "step": 28220 }, { "epoch": 4.192781820882222, "grad_norm": 0.4552346169948578, "learning_rate": 5.807218179117778e-06, "loss": 0.0584, "step": 28230 }, { "epoch": 4.194267042922917, "grad_norm": 0.7268344163894653, "learning_rate": 5.8057329570770836e-06, "loss": 0.0529, "step": 28240 }, { "epoch": 4.195752264963612, "grad_norm": 1.4268473386764526, "learning_rate": 5.804247735036388e-06, "loss": 0.0554, "step": 28250 }, { "epoch": 4.197237487004307, "grad_norm": 0.6411811709403992, "learning_rate": 5.802762512995693e-06, "loss": 0.0548, "step": 28260 }, { "epoch": 4.198722709045002, "grad_norm": 0.8503941297531128, "learning_rate": 5.8012772909549986e-06, "loss": 0.0649, "step": 28270 }, { "epoch": 4.200207931085697, "grad_norm": 0.8192022442817688, "learning_rate": 5.799792068914303e-06, "loss": 0.0422, "step": 28280 }, { "epoch": 4.201693153126392, "grad_norm": 0.9261953234672546, "learning_rate": 5.798306846873608e-06, "loss": 0.0596, "step": 28290 }, { "epoch": 4.203178375167088, "grad_norm": 0.4752441346645355, "learning_rate": 5.7968216248329136e-06, "loss": 0.0722, "step": 28300 }, { "epoch": 4.204663597207783, "grad_norm": 0.9527250528335571, "learning_rate": 5.795336402792218e-06, "loss": 0.0729, "step": 28310 }, { "epoch": 4.206148819248478, "grad_norm": 1.0327355861663818, "learning_rate": 5.793851180751523e-06, "loss": 0.0486, "step": 28320 }, { "epoch": 4.207634041289173, "grad_norm": 0.6200602054595947, "learning_rate": 5.7923659587108285e-06, "loss": 0.0771, "step": 28330 }, { "epoch": 4.2091192633298675, "grad_norm": 0.8519974946975708, "learning_rate": 5.790880736670132e-06, "loss": 0.0919, "step": 28340 }, { "epoch": 4.210604485370563, "grad_norm": 0.9221513271331787, "learning_rate": 5.789395514629437e-06, "loss": 0.0481, "step": 28350 }, { "epoch": 4.212089707411258, "grad_norm": 0.6180176734924316, "learning_rate": 5.787910292588742e-06, "loss": 0.0488, "step": 28360 }, { "epoch": 4.213574929451953, "grad_norm": 0.6846516728401184, "learning_rate": 5.786425070548047e-06, "loss": 0.0772, "step": 28370 }, { "epoch": 4.215060151492648, "grad_norm": 0.6172364950180054, "learning_rate": 5.784939848507352e-06, "loss": 0.06, "step": 28380 }, { "epoch": 4.216545373533343, "grad_norm": 0.6358994841575623, "learning_rate": 5.783454626466657e-06, "loss": 0.0586, "step": 28390 }, { "epoch": 4.218030595574039, "grad_norm": 1.1505266427993774, "learning_rate": 5.781969404425962e-06, "loss": 0.0718, "step": 28400 }, { "epoch": 4.219515817614734, "grad_norm": 1.0869107246398926, "learning_rate": 5.780484182385267e-06, "loss": 0.0634, "step": 28410 }, { "epoch": 4.2210010396554285, "grad_norm": 0.9144407510757446, "learning_rate": 5.778998960344572e-06, "loss": 0.0774, "step": 28420 }, { "epoch": 4.222486261696123, "grad_norm": 1.1511156558990479, "learning_rate": 5.777513738303877e-06, "loss": 0.0768, "step": 28430 }, { "epoch": 4.223971483736818, "grad_norm": 0.756784200668335, "learning_rate": 5.7760285162631816e-06, "loss": 0.0727, "step": 28440 }, { "epoch": 4.225456705777514, "grad_norm": 0.6658867001533508, "learning_rate": 5.774543294222487e-06, "loss": 0.0757, "step": 28450 }, { "epoch": 4.226941927818209, "grad_norm": 1.061873197555542, "learning_rate": 5.773058072181792e-06, "loss": 0.0628, "step": 28460 }, { "epoch": 4.228427149858904, "grad_norm": 1.0419481992721558, "learning_rate": 5.7715728501410965e-06, "loss": 0.0654, "step": 28470 }, { "epoch": 4.229912371899599, "grad_norm": 0.7116315364837646, "learning_rate": 5.770087628100402e-06, "loss": 0.0696, "step": 28480 }, { "epoch": 4.231397593940294, "grad_norm": 0.6918877959251404, "learning_rate": 5.768602406059707e-06, "loss": 0.0511, "step": 28490 }, { "epoch": 4.2328828159809895, "grad_norm": 0.6803731322288513, "learning_rate": 5.767117184019011e-06, "loss": 0.0639, "step": 28500 }, { "epoch": 4.234368038021684, "grad_norm": 1.3853286504745483, "learning_rate": 5.765631961978316e-06, "loss": 0.0749, "step": 28510 }, { "epoch": 4.235853260062379, "grad_norm": 0.3931192457675934, "learning_rate": 5.764146739937621e-06, "loss": 0.0612, "step": 28520 }, { "epoch": 4.237338482103074, "grad_norm": 1.1293772459030151, "learning_rate": 5.762661517896926e-06, "loss": 0.0627, "step": 28530 }, { "epoch": 4.238823704143769, "grad_norm": 0.9891557693481445, "learning_rate": 5.761176295856231e-06, "loss": 0.07, "step": 28540 }, { "epoch": 4.240308926184465, "grad_norm": 0.9113637208938599, "learning_rate": 5.759691073815535e-06, "loss": 0.0579, "step": 28550 }, { "epoch": 4.24179414822516, "grad_norm": 0.3572257161140442, "learning_rate": 5.758205851774841e-06, "loss": 0.0655, "step": 28560 }, { "epoch": 4.243279370265855, "grad_norm": 0.4821026027202606, "learning_rate": 5.756720629734146e-06, "loss": 0.0341, "step": 28570 }, { "epoch": 4.24476459230655, "grad_norm": 0.6576929092407227, "learning_rate": 5.75523540769345e-06, "loss": 0.0759, "step": 28580 }, { "epoch": 4.2462498143472445, "grad_norm": 0.7048593759536743, "learning_rate": 5.753750185652756e-06, "loss": 0.0598, "step": 28590 }, { "epoch": 4.24773503638794, "grad_norm": 0.4831177592277527, "learning_rate": 5.752264963612061e-06, "loss": 0.0613, "step": 28600 }, { "epoch": 4.249220258428635, "grad_norm": 0.7408661842346191, "learning_rate": 5.750779741571365e-06, "loss": 0.0561, "step": 28610 }, { "epoch": 4.25070548046933, "grad_norm": 1.274364948272705, "learning_rate": 5.749294519530671e-06, "loss": 0.0842, "step": 28620 }, { "epoch": 4.252190702510025, "grad_norm": 0.9360983371734619, "learning_rate": 5.747809297489976e-06, "loss": 0.0676, "step": 28630 }, { "epoch": 4.253675924550721, "grad_norm": 1.290618896484375, "learning_rate": 5.7463240754492795e-06, "loss": 0.0531, "step": 28640 }, { "epoch": 4.255161146591416, "grad_norm": 0.827001690864563, "learning_rate": 5.744838853408586e-06, "loss": 0.0509, "step": 28650 }, { "epoch": 4.2566463686321105, "grad_norm": 0.7342594861984253, "learning_rate": 5.743353631367889e-06, "loss": 0.0664, "step": 28660 }, { "epoch": 4.2581315906728054, "grad_norm": 1.440019130706787, "learning_rate": 5.7418684093271945e-06, "loss": 0.0868, "step": 28670 }, { "epoch": 4.2596168127135, "grad_norm": 0.7774254679679871, "learning_rate": 5.7403831872865e-06, "loss": 0.0623, "step": 28680 }, { "epoch": 4.261102034754196, "grad_norm": 0.99187171459198, "learning_rate": 5.738897965245804e-06, "loss": 0.0704, "step": 28690 }, { "epoch": 4.262587256794891, "grad_norm": 1.1952239274978638, "learning_rate": 5.7374127432051095e-06, "loss": 0.0571, "step": 28700 }, { "epoch": 4.264072478835586, "grad_norm": 0.7059710621833801, "learning_rate": 5.735927521164415e-06, "loss": 0.0551, "step": 28710 }, { "epoch": 4.265557700876281, "grad_norm": 0.8650168180465698, "learning_rate": 5.734442299123719e-06, "loss": 0.061, "step": 28720 }, { "epoch": 4.267042922916976, "grad_norm": 0.374861478805542, "learning_rate": 5.7329570770830245e-06, "loss": 0.0592, "step": 28730 }, { "epoch": 4.2685281449576715, "grad_norm": 0.9369165897369385, "learning_rate": 5.73147185504233e-06, "loss": 0.0645, "step": 28740 }, { "epoch": 4.270013366998366, "grad_norm": 0.5820393562316895, "learning_rate": 5.729986633001634e-06, "loss": 0.058, "step": 28750 }, { "epoch": 4.271498589039061, "grad_norm": 0.9628231525421143, "learning_rate": 5.7285014109609395e-06, "loss": 0.0548, "step": 28760 }, { "epoch": 4.272983811079756, "grad_norm": 0.6571143269538879, "learning_rate": 5.727016188920244e-06, "loss": 0.0568, "step": 28770 }, { "epoch": 4.274469033120451, "grad_norm": 1.2203211784362793, "learning_rate": 5.725530966879549e-06, "loss": 0.0647, "step": 28780 }, { "epoch": 4.275954255161147, "grad_norm": 1.1997301578521729, "learning_rate": 5.7240457448388545e-06, "loss": 0.0608, "step": 28790 }, { "epoch": 4.277439477201842, "grad_norm": 1.1112606525421143, "learning_rate": 5.722560522798158e-06, "loss": 0.0674, "step": 28800 }, { "epoch": 4.278924699242537, "grad_norm": 0.9697360992431641, "learning_rate": 5.721075300757463e-06, "loss": 0.0619, "step": 28810 }, { "epoch": 4.280409921283232, "grad_norm": 0.5399026274681091, "learning_rate": 5.7195900787167695e-06, "loss": 0.0707, "step": 28820 }, { "epoch": 4.2818951433239265, "grad_norm": 0.41528892517089844, "learning_rate": 5.718104856676073e-06, "loss": 0.0653, "step": 28830 }, { "epoch": 4.283380365364622, "grad_norm": 0.672845184803009, "learning_rate": 5.716619634635378e-06, "loss": 0.0751, "step": 28840 }, { "epoch": 4.284865587405317, "grad_norm": 0.8563491106033325, "learning_rate": 5.715134412594683e-06, "loss": 0.0424, "step": 28850 }, { "epoch": 4.286350809446012, "grad_norm": 0.6113738417625427, "learning_rate": 5.713649190553988e-06, "loss": 0.0584, "step": 28860 }, { "epoch": 4.287836031486707, "grad_norm": 0.9037973880767822, "learning_rate": 5.712163968513293e-06, "loss": 0.0495, "step": 28870 }, { "epoch": 4.289321253527403, "grad_norm": 1.3024934530258179, "learning_rate": 5.710678746472598e-06, "loss": 0.066, "step": 28880 }, { "epoch": 4.290806475568098, "grad_norm": 1.2819448709487915, "learning_rate": 5.709193524431903e-06, "loss": 0.0579, "step": 28890 }, { "epoch": 4.292291697608793, "grad_norm": 0.638242781162262, "learning_rate": 5.707708302391208e-06, "loss": 0.063, "step": 28900 }, { "epoch": 4.2937769196494875, "grad_norm": 0.6230556964874268, "learning_rate": 5.706223080350513e-06, "loss": 0.0522, "step": 28910 }, { "epoch": 4.295262141690182, "grad_norm": 1.8110722303390503, "learning_rate": 5.704737858309818e-06, "loss": 0.0818, "step": 28920 }, { "epoch": 4.296747363730878, "grad_norm": 0.7264518141746521, "learning_rate": 5.703252636269123e-06, "loss": 0.0551, "step": 28930 }, { "epoch": 4.298232585771573, "grad_norm": 0.7805657386779785, "learning_rate": 5.701767414228428e-06, "loss": 0.0676, "step": 28940 }, { "epoch": 4.299717807812268, "grad_norm": 0.32692262530326843, "learning_rate": 5.700282192187733e-06, "loss": 0.0511, "step": 28950 }, { "epoch": 4.301203029852963, "grad_norm": 0.7880625128746033, "learning_rate": 5.698796970147037e-06, "loss": 0.0785, "step": 28960 }, { "epoch": 4.302688251893658, "grad_norm": 0.8821731805801392, "learning_rate": 5.697311748106342e-06, "loss": 0.0541, "step": 28970 }, { "epoch": 4.304173473934354, "grad_norm": 1.0957586765289307, "learning_rate": 5.695826526065647e-06, "loss": 0.0664, "step": 28980 }, { "epoch": 4.3056586959750485, "grad_norm": 0.8482171297073364, "learning_rate": 5.694341304024952e-06, "loss": 0.0603, "step": 28990 }, { "epoch": 4.307143918015743, "grad_norm": 1.5131299495697021, "learning_rate": 5.692856081984257e-06, "loss": 0.0661, "step": 29000 }, { "epoch": 4.308629140056438, "grad_norm": 0.6365652680397034, "learning_rate": 5.691370859943562e-06, "loss": 0.0769, "step": 29010 }, { "epoch": 4.310114362097133, "grad_norm": 0.40474069118499756, "learning_rate": 5.689885637902867e-06, "loss": 0.0676, "step": 29020 }, { "epoch": 4.311599584137829, "grad_norm": 0.3460848927497864, "learning_rate": 5.688400415862172e-06, "loss": 0.0846, "step": 29030 }, { "epoch": 4.313084806178524, "grad_norm": 0.7716390490531921, "learning_rate": 5.686915193821477e-06, "loss": 0.0727, "step": 29040 }, { "epoch": 4.314570028219219, "grad_norm": 0.7166088223457336, "learning_rate": 5.685429971780782e-06, "loss": 0.0497, "step": 29050 }, { "epoch": 4.316055250259914, "grad_norm": 0.3296228349208832, "learning_rate": 5.683944749740087e-06, "loss": 0.073, "step": 29060 }, { "epoch": 4.317540472300609, "grad_norm": 0.9404149651527405, "learning_rate": 5.682459527699391e-06, "loss": 0.0506, "step": 29070 }, { "epoch": 4.319025694341304, "grad_norm": 1.397255301475525, "learning_rate": 5.680974305658697e-06, "loss": 0.0499, "step": 29080 }, { "epoch": 4.320510916381999, "grad_norm": 1.0144386291503906, "learning_rate": 5.679489083618002e-06, "loss": 0.0639, "step": 29090 }, { "epoch": 4.321996138422694, "grad_norm": 0.328948974609375, "learning_rate": 5.6780038615773055e-06, "loss": 0.0429, "step": 29100 }, { "epoch": 4.323481360463389, "grad_norm": 0.8655416965484619, "learning_rate": 5.676518639536611e-06, "loss": 0.0617, "step": 29110 }, { "epoch": 4.324966582504084, "grad_norm": 1.1616047620773315, "learning_rate": 5.675033417495917e-06, "loss": 0.0643, "step": 29120 }, { "epoch": 4.32645180454478, "grad_norm": 1.1419157981872559, "learning_rate": 5.6735481954552205e-06, "loss": 0.0594, "step": 29130 }, { "epoch": 4.327937026585475, "grad_norm": 0.5438464879989624, "learning_rate": 5.672062973414526e-06, "loss": 0.069, "step": 29140 }, { "epoch": 4.3294222486261695, "grad_norm": 0.4473876357078552, "learning_rate": 5.670577751373831e-06, "loss": 0.0613, "step": 29150 }, { "epoch": 4.330907470666864, "grad_norm": 0.7418670654296875, "learning_rate": 5.6690925293331355e-06, "loss": 0.0521, "step": 29160 }, { "epoch": 4.332392692707559, "grad_norm": 1.3394016027450562, "learning_rate": 5.667607307292441e-06, "loss": 0.0818, "step": 29170 }, { "epoch": 4.333877914748255, "grad_norm": 1.4490028619766235, "learning_rate": 5.666122085251745e-06, "loss": 0.0641, "step": 29180 }, { "epoch": 4.33536313678895, "grad_norm": 0.673684298992157, "learning_rate": 5.6646368632110505e-06, "loss": 0.0593, "step": 29190 }, { "epoch": 4.336848358829645, "grad_norm": 0.585928201675415, "learning_rate": 5.663151641170356e-06, "loss": 0.0466, "step": 29200 }, { "epoch": 4.33833358087034, "grad_norm": 0.650763213634491, "learning_rate": 5.66166641912966e-06, "loss": 0.0552, "step": 29210 }, { "epoch": 4.339818802911036, "grad_norm": 0.9486691951751709, "learning_rate": 5.6601811970889655e-06, "loss": 0.0636, "step": 29220 }, { "epoch": 4.3413040249517305, "grad_norm": 0.45562392473220825, "learning_rate": 5.658695975048271e-06, "loss": 0.0343, "step": 29230 }, { "epoch": 4.342789246992425, "grad_norm": 0.8459610342979431, "learning_rate": 5.657210753007575e-06, "loss": 0.0662, "step": 29240 }, { "epoch": 4.34427446903312, "grad_norm": 0.9982208609580994, "learning_rate": 5.6557255309668805e-06, "loss": 0.0562, "step": 29250 }, { "epoch": 4.345759691073815, "grad_norm": 0.28092893958091736, "learning_rate": 5.654240308926186e-06, "loss": 0.0588, "step": 29260 }, { "epoch": 4.347244913114511, "grad_norm": 0.7825678586959839, "learning_rate": 5.652755086885489e-06, "loss": 0.0692, "step": 29270 }, { "epoch": 4.348730135155206, "grad_norm": 0.8129569292068481, "learning_rate": 5.651269864844795e-06, "loss": 0.0737, "step": 29280 }, { "epoch": 4.350215357195901, "grad_norm": 0.7594356536865234, "learning_rate": 5.649784642804099e-06, "loss": 0.0693, "step": 29290 }, { "epoch": 4.351700579236596, "grad_norm": 0.6552794575691223, "learning_rate": 5.648299420763404e-06, "loss": 0.0517, "step": 29300 }, { "epoch": 4.353185801277291, "grad_norm": 0.8315490484237671, "learning_rate": 5.64681419872271e-06, "loss": 0.05, "step": 29310 }, { "epoch": 4.354671023317986, "grad_norm": 0.9624179005622864, "learning_rate": 5.645328976682014e-06, "loss": 0.0882, "step": 29320 }, { "epoch": 4.356156245358681, "grad_norm": 0.36779454350471497, "learning_rate": 5.643843754641319e-06, "loss": 0.0555, "step": 29330 }, { "epoch": 4.357641467399376, "grad_norm": 0.9770904183387756, "learning_rate": 5.642358532600625e-06, "loss": 0.0491, "step": 29340 }, { "epoch": 4.359126689440071, "grad_norm": 0.6968162655830383, "learning_rate": 5.640873310559929e-06, "loss": 0.0699, "step": 29350 }, { "epoch": 4.360611911480766, "grad_norm": 1.1757110357284546, "learning_rate": 5.639388088519234e-06, "loss": 0.0565, "step": 29360 }, { "epoch": 4.362097133521462, "grad_norm": 0.8306859135627747, "learning_rate": 5.637902866478539e-06, "loss": 0.0629, "step": 29370 }, { "epoch": 4.363582355562157, "grad_norm": 0.7834184169769287, "learning_rate": 5.636417644437844e-06, "loss": 0.0596, "step": 29380 }, { "epoch": 4.365067577602852, "grad_norm": 0.5488366484642029, "learning_rate": 5.634932422397149e-06, "loss": 0.0556, "step": 29390 }, { "epoch": 4.3665527996435465, "grad_norm": 1.1125340461730957, "learning_rate": 5.633447200356453e-06, "loss": 0.062, "step": 29400 }, { "epoch": 4.368038021684241, "grad_norm": 0.7609226107597351, "learning_rate": 5.631961978315759e-06, "loss": 0.0645, "step": 29410 }, { "epoch": 4.369523243724937, "grad_norm": 1.2974096536636353, "learning_rate": 5.630476756275064e-06, "loss": 0.0544, "step": 29420 }, { "epoch": 4.371008465765632, "grad_norm": 0.3472174108028412, "learning_rate": 5.628991534234368e-06, "loss": 0.0605, "step": 29430 }, { "epoch": 4.372493687806327, "grad_norm": 0.852185845375061, "learning_rate": 5.627506312193673e-06, "loss": 0.0698, "step": 29440 }, { "epoch": 4.373978909847022, "grad_norm": 0.5808961391448975, "learning_rate": 5.6260210901529785e-06, "loss": 0.0477, "step": 29450 }, { "epoch": 4.375464131887718, "grad_norm": 0.761407732963562, "learning_rate": 5.624535868112283e-06, "loss": 0.0502, "step": 29460 }, { "epoch": 4.376949353928413, "grad_norm": 0.5551052689552307, "learning_rate": 5.623050646071588e-06, "loss": 0.0631, "step": 29470 }, { "epoch": 4.3784345759691075, "grad_norm": 0.8643785715103149, "learning_rate": 5.621565424030893e-06, "loss": 0.0634, "step": 29480 }, { "epoch": 4.379919798009802, "grad_norm": 0.40301206707954407, "learning_rate": 5.620080201990198e-06, "loss": 0.0561, "step": 29490 }, { "epoch": 4.381405020050497, "grad_norm": 0.8390589952468872, "learning_rate": 5.618594979949503e-06, "loss": 0.0616, "step": 29500 }, { "epoch": 4.382890242091193, "grad_norm": 0.6667717099189758, "learning_rate": 5.617109757908808e-06, "loss": 0.0703, "step": 29510 }, { "epoch": 4.384375464131888, "grad_norm": 0.7195507884025574, "learning_rate": 5.615624535868113e-06, "loss": 0.0466, "step": 29520 }, { "epoch": 4.385860686172583, "grad_norm": 0.4358431100845337, "learning_rate": 5.614139313827418e-06, "loss": 0.0545, "step": 29530 }, { "epoch": 4.387345908213278, "grad_norm": 1.1485531330108643, "learning_rate": 5.612654091786723e-06, "loss": 0.0654, "step": 29540 }, { "epoch": 4.388831130253973, "grad_norm": 0.8820552825927734, "learning_rate": 5.611168869746028e-06, "loss": 0.0654, "step": 29550 }, { "epoch": 4.390316352294668, "grad_norm": 0.4499886631965637, "learning_rate": 5.609683647705333e-06, "loss": 0.0541, "step": 29560 }, { "epoch": 4.391801574335363, "grad_norm": 0.4159229099750519, "learning_rate": 5.608198425664637e-06, "loss": 0.0653, "step": 29570 }, { "epoch": 4.393286796376058, "grad_norm": 0.28467857837677, "learning_rate": 5.606713203623943e-06, "loss": 0.0597, "step": 29580 }, { "epoch": 4.394772018416753, "grad_norm": 0.20366248488426208, "learning_rate": 5.6052279815832464e-06, "loss": 0.0582, "step": 29590 }, { "epoch": 4.396257240457448, "grad_norm": 0.976061224937439, "learning_rate": 5.603742759542552e-06, "loss": 0.0503, "step": 29600 }, { "epoch": 4.397742462498144, "grad_norm": 0.4987535774707794, "learning_rate": 5.602257537501857e-06, "loss": 0.0509, "step": 29610 }, { "epoch": 4.399227684538839, "grad_norm": 0.7110401391983032, "learning_rate": 5.6007723154611614e-06, "loss": 0.0622, "step": 29620 }, { "epoch": 4.400712906579534, "grad_norm": 0.6875151991844177, "learning_rate": 5.599287093420467e-06, "loss": 0.064, "step": 29630 }, { "epoch": 4.4021981286202285, "grad_norm": 1.0403201580047607, "learning_rate": 5.597801871379772e-06, "loss": 0.065, "step": 29640 }, { "epoch": 4.403683350660923, "grad_norm": 0.5002561807632446, "learning_rate": 5.5963166493390764e-06, "loss": 0.045, "step": 29650 }, { "epoch": 4.405168572701619, "grad_norm": 0.8764870762825012, "learning_rate": 5.594831427298382e-06, "loss": 0.0539, "step": 29660 }, { "epoch": 4.406653794742314, "grad_norm": 0.687566339969635, "learning_rate": 5.593346205257687e-06, "loss": 0.0605, "step": 29670 }, { "epoch": 4.408139016783009, "grad_norm": 0.5420661568641663, "learning_rate": 5.5918609832169914e-06, "loss": 0.0709, "step": 29680 }, { "epoch": 4.409624238823704, "grad_norm": 0.5420895218849182, "learning_rate": 5.590375761176297e-06, "loss": 0.0897, "step": 29690 }, { "epoch": 4.411109460864399, "grad_norm": 1.2310397624969482, "learning_rate": 5.588890539135601e-06, "loss": 0.0684, "step": 29700 }, { "epoch": 4.412594682905095, "grad_norm": 0.5618414282798767, "learning_rate": 5.5874053170949064e-06, "loss": 0.0486, "step": 29710 }, { "epoch": 4.4140799049457895, "grad_norm": 0.9262076020240784, "learning_rate": 5.585920095054212e-06, "loss": 0.0551, "step": 29720 }, { "epoch": 4.415565126986484, "grad_norm": 1.0242255926132202, "learning_rate": 5.584434873013515e-06, "loss": 0.0567, "step": 29730 }, { "epoch": 4.417050349027179, "grad_norm": 1.1418559551239014, "learning_rate": 5.5829496509728206e-06, "loss": 0.0615, "step": 29740 }, { "epoch": 4.418535571067874, "grad_norm": 0.736282229423523, "learning_rate": 5.581464428932126e-06, "loss": 0.0567, "step": 29750 }, { "epoch": 4.42002079310857, "grad_norm": 0.9747437834739685, "learning_rate": 5.57997920689143e-06, "loss": 0.0755, "step": 29760 }, { "epoch": 4.421506015149265, "grad_norm": 1.3061301708221436, "learning_rate": 5.5784939848507356e-06, "loss": 0.0981, "step": 29770 }, { "epoch": 4.42299123718996, "grad_norm": 0.5164014101028442, "learning_rate": 5.577008762810041e-06, "loss": 0.0672, "step": 29780 }, { "epoch": 4.424476459230655, "grad_norm": 0.5094213485717773, "learning_rate": 5.575523540769345e-06, "loss": 0.0685, "step": 29790 }, { "epoch": 4.4259616812713505, "grad_norm": 0.5946676135063171, "learning_rate": 5.5740383187286506e-06, "loss": 0.0514, "step": 29800 }, { "epoch": 4.427446903312045, "grad_norm": 0.37052375078201294, "learning_rate": 5.572553096687955e-06, "loss": 0.0823, "step": 29810 }, { "epoch": 4.42893212535274, "grad_norm": 0.9672272205352783, "learning_rate": 5.57106787464726e-06, "loss": 0.0727, "step": 29820 }, { "epoch": 4.430417347393435, "grad_norm": 0.974509596824646, "learning_rate": 5.5695826526065656e-06, "loss": 0.0669, "step": 29830 }, { "epoch": 4.43190256943413, "grad_norm": 0.9510679244995117, "learning_rate": 5.56809743056587e-06, "loss": 0.0589, "step": 29840 }, { "epoch": 4.433387791474826, "grad_norm": 0.5893191695213318, "learning_rate": 5.566612208525175e-06, "loss": 0.0533, "step": 29850 }, { "epoch": 4.434873013515521, "grad_norm": 0.9645144939422607, "learning_rate": 5.5651269864844805e-06, "loss": 0.0706, "step": 29860 }, { "epoch": 4.436358235556216, "grad_norm": 1.2255542278289795, "learning_rate": 5.563641764443785e-06, "loss": 0.0733, "step": 29870 }, { "epoch": 4.437843457596911, "grad_norm": 0.8338664770126343, "learning_rate": 5.56215654240309e-06, "loss": 0.0475, "step": 29880 }, { "epoch": 4.4393286796376055, "grad_norm": 1.517331600189209, "learning_rate": 5.560671320362394e-06, "loss": 0.0762, "step": 29890 }, { "epoch": 4.440813901678301, "grad_norm": 0.5551873445510864, "learning_rate": 5.559186098321699e-06, "loss": 0.0626, "step": 29900 }, { "epoch": 4.442299123718996, "grad_norm": 1.3025144338607788, "learning_rate": 5.557700876281004e-06, "loss": 0.0495, "step": 29910 }, { "epoch": 4.443784345759691, "grad_norm": 0.9925779700279236, "learning_rate": 5.556215654240309e-06, "loss": 0.0545, "step": 29920 }, { "epoch": 4.445269567800386, "grad_norm": 1.2901450395584106, "learning_rate": 5.554730432199614e-06, "loss": 0.0499, "step": 29930 }, { "epoch": 4.446754789841081, "grad_norm": 1.417781114578247, "learning_rate": 5.553245210158919e-06, "loss": 0.0598, "step": 29940 }, { "epoch": 4.448240011881777, "grad_norm": 0.6772493720054626, "learning_rate": 5.551759988118224e-06, "loss": 0.0656, "step": 29950 }, { "epoch": 4.4497252339224715, "grad_norm": 1.4603149890899658, "learning_rate": 5.550274766077529e-06, "loss": 0.0698, "step": 29960 }, { "epoch": 4.4512104559631664, "grad_norm": 0.8940395712852478, "learning_rate": 5.548789544036834e-06, "loss": 0.081, "step": 29970 }, { "epoch": 4.452695678003861, "grad_norm": 0.7605615258216858, "learning_rate": 5.547304321996139e-06, "loss": 0.0584, "step": 29980 }, { "epoch": 4.454180900044556, "grad_norm": 0.46938732266426086, "learning_rate": 5.545819099955444e-06, "loss": 0.0674, "step": 29990 }, { "epoch": 4.455666122085252, "grad_norm": 0.3251242935657501, "learning_rate": 5.5443338779147485e-06, "loss": 0.0709, "step": 30000 }, { "epoch": 4.457151344125947, "grad_norm": 0.9395661354064941, "learning_rate": 5.542848655874054e-06, "loss": 0.06, "step": 30010 }, { "epoch": 4.458636566166642, "grad_norm": 0.8586018681526184, "learning_rate": 5.541363433833359e-06, "loss": 0.054, "step": 30020 }, { "epoch": 4.460121788207337, "grad_norm": 0.9087779521942139, "learning_rate": 5.539878211792663e-06, "loss": 0.0668, "step": 30030 }, { "epoch": 4.4616070102480325, "grad_norm": 0.4863450527191162, "learning_rate": 5.538392989751968e-06, "loss": 0.0802, "step": 30040 }, { "epoch": 4.463092232288727, "grad_norm": 0.5601634383201599, "learning_rate": 5.536907767711274e-06, "loss": 0.0634, "step": 30050 }, { "epoch": 4.464577454329422, "grad_norm": 1.1524817943572998, "learning_rate": 5.535422545670578e-06, "loss": 0.071, "step": 30060 }, { "epoch": 4.466062676370117, "grad_norm": 1.1712273359298706, "learning_rate": 5.533937323629883e-06, "loss": 0.0839, "step": 30070 }, { "epoch": 4.467547898410812, "grad_norm": 1.044423222541809, "learning_rate": 5.532452101589188e-06, "loss": 0.0563, "step": 30080 }, { "epoch": 4.469033120451508, "grad_norm": 0.6070758104324341, "learning_rate": 5.530966879548493e-06, "loss": 0.0672, "step": 30090 }, { "epoch": 4.470518342492203, "grad_norm": 1.0291193723678589, "learning_rate": 5.529481657507798e-06, "loss": 0.0705, "step": 30100 }, { "epoch": 4.472003564532898, "grad_norm": 1.1457722187042236, "learning_rate": 5.527996435467102e-06, "loss": 0.0608, "step": 30110 }, { "epoch": 4.473488786573593, "grad_norm": 0.4338544011116028, "learning_rate": 5.526511213426408e-06, "loss": 0.0477, "step": 30120 }, { "epoch": 4.4749740086142875, "grad_norm": 1.3526643514633179, "learning_rate": 5.525025991385713e-06, "loss": 0.0642, "step": 30130 }, { "epoch": 4.476459230654983, "grad_norm": 0.9014106392860413, "learning_rate": 5.523540769345017e-06, "loss": 0.0679, "step": 30140 }, { "epoch": 4.477944452695678, "grad_norm": 0.8641489744186401, "learning_rate": 5.522055547304323e-06, "loss": 0.0602, "step": 30150 }, { "epoch": 4.479429674736373, "grad_norm": 0.8070448637008667, "learning_rate": 5.520570325263628e-06, "loss": 0.0547, "step": 30160 }, { "epoch": 4.480914896777068, "grad_norm": 0.3844519257545471, "learning_rate": 5.519085103222932e-06, "loss": 0.05, "step": 30170 }, { "epoch": 4.482400118817763, "grad_norm": 0.6818383932113647, "learning_rate": 5.517599881182238e-06, "loss": 0.0652, "step": 30180 }, { "epoch": 4.483885340858459, "grad_norm": 0.952302098274231, "learning_rate": 5.516114659141543e-06, "loss": 0.0702, "step": 30190 }, { "epoch": 4.485370562899154, "grad_norm": 0.5874274969100952, "learning_rate": 5.5146294371008465e-06, "loss": 0.0667, "step": 30200 }, { "epoch": 4.4868557849398485, "grad_norm": 1.1029670238494873, "learning_rate": 5.513144215060152e-06, "loss": 0.0495, "step": 30210 }, { "epoch": 4.488341006980543, "grad_norm": 1.2427738904953003, "learning_rate": 5.511658993019456e-06, "loss": 0.0705, "step": 30220 }, { "epoch": 4.489826229021238, "grad_norm": 0.7121690511703491, "learning_rate": 5.5101737709787615e-06, "loss": 0.0513, "step": 30230 }, { "epoch": 4.491311451061934, "grad_norm": 0.7534963488578796, "learning_rate": 5.508688548938067e-06, "loss": 0.0754, "step": 30240 }, { "epoch": 4.492796673102629, "grad_norm": 1.0792324542999268, "learning_rate": 5.507203326897371e-06, "loss": 0.0459, "step": 30250 }, { "epoch": 4.494281895143324, "grad_norm": 0.8321207165718079, "learning_rate": 5.5057181048566765e-06, "loss": 0.0477, "step": 30260 }, { "epoch": 4.495767117184019, "grad_norm": 1.2834551334381104, "learning_rate": 5.504232882815982e-06, "loss": 0.0573, "step": 30270 }, { "epoch": 4.497252339224714, "grad_norm": 0.8430453538894653, "learning_rate": 5.502747660775286e-06, "loss": 0.0593, "step": 30280 }, { "epoch": 4.4987375612654095, "grad_norm": 0.8827682733535767, "learning_rate": 5.5012624387345915e-06, "loss": 0.0463, "step": 30290 }, { "epoch": 4.500222783306104, "grad_norm": 0.30821776390075684, "learning_rate": 5.499777216693896e-06, "loss": 0.0513, "step": 30300 }, { "epoch": 4.501708005346799, "grad_norm": 0.45330941677093506, "learning_rate": 5.498291994653201e-06, "loss": 0.0529, "step": 30310 }, { "epoch": 4.503193227387494, "grad_norm": 0.5481662750244141, "learning_rate": 5.4968067726125065e-06, "loss": 0.0699, "step": 30320 }, { "epoch": 4.504678449428189, "grad_norm": 2.128347873687744, "learning_rate": 5.49532155057181e-06, "loss": 0.0726, "step": 30330 }, { "epoch": 4.506163671468885, "grad_norm": 1.1982522010803223, "learning_rate": 5.493836328531116e-06, "loss": 0.051, "step": 30340 }, { "epoch": 4.50764889350958, "grad_norm": 0.745867908000946, "learning_rate": 5.4923511064904215e-06, "loss": 0.0637, "step": 30350 }, { "epoch": 4.509134115550275, "grad_norm": 0.6008644104003906, "learning_rate": 5.490865884449725e-06, "loss": 0.0771, "step": 30360 }, { "epoch": 4.51061933759097, "grad_norm": 0.9833894968032837, "learning_rate": 5.48938066240903e-06, "loss": 0.0682, "step": 30370 }, { "epoch": 4.5121045596316645, "grad_norm": 0.38291507959365845, "learning_rate": 5.487895440368336e-06, "loss": 0.0547, "step": 30380 }, { "epoch": 4.51358978167236, "grad_norm": 0.5758116841316223, "learning_rate": 5.48641021832764e-06, "loss": 0.065, "step": 30390 }, { "epoch": 4.515075003713055, "grad_norm": 1.0570487976074219, "learning_rate": 5.484924996286945e-06, "loss": 0.0725, "step": 30400 }, { "epoch": 4.51656022575375, "grad_norm": 1.7718385457992554, "learning_rate": 5.48343977424625e-06, "loss": 0.0619, "step": 30410 }, { "epoch": 4.518045447794445, "grad_norm": 1.0061246156692505, "learning_rate": 5.481954552205555e-06, "loss": 0.0701, "step": 30420 }, { "epoch": 4.519530669835141, "grad_norm": 0.5751404762268066, "learning_rate": 5.48046933016486e-06, "loss": 0.0736, "step": 30430 }, { "epoch": 4.521015891875836, "grad_norm": 0.612650990486145, "learning_rate": 5.478984108124165e-06, "loss": 0.0581, "step": 30440 }, { "epoch": 4.5225011139165305, "grad_norm": 0.7062779664993286, "learning_rate": 5.47749888608347e-06, "loss": 0.0566, "step": 30450 }, { "epoch": 4.523986335957225, "grad_norm": 1.410231351852417, "learning_rate": 5.476013664042775e-06, "loss": 0.0513, "step": 30460 }, { "epoch": 4.52547155799792, "grad_norm": 0.4978226125240326, "learning_rate": 5.47452844200208e-06, "loss": 0.0683, "step": 30470 }, { "epoch": 4.526956780038616, "grad_norm": 0.6738853454589844, "learning_rate": 5.473043219961385e-06, "loss": 0.0414, "step": 30480 }, { "epoch": 4.528442002079311, "grad_norm": 1.0385953187942505, "learning_rate": 5.47155799792069e-06, "loss": 0.0648, "step": 30490 }, { "epoch": 4.529927224120006, "grad_norm": 0.38971370458602905, "learning_rate": 5.470072775879994e-06, "loss": 0.0453, "step": 30500 }, { "epoch": 4.531412446160701, "grad_norm": 1.1835120916366577, "learning_rate": 5.468587553839299e-06, "loss": 0.0792, "step": 30510 }, { "epoch": 4.532897668201396, "grad_norm": 0.6529055833816528, "learning_rate": 5.467102331798604e-06, "loss": 0.0783, "step": 30520 }, { "epoch": 4.5343828902420915, "grad_norm": 1.151580810546875, "learning_rate": 5.465617109757909e-06, "loss": 0.0629, "step": 30530 }, { "epoch": 4.535868112282786, "grad_norm": 0.6512138843536377, "learning_rate": 5.464131887717214e-06, "loss": 0.0854, "step": 30540 }, { "epoch": 4.537353334323481, "grad_norm": 0.6657187342643738, "learning_rate": 5.462646665676519e-06, "loss": 0.0546, "step": 30550 }, { "epoch": 4.538838556364176, "grad_norm": 0.7522188425064087, "learning_rate": 5.461161443635824e-06, "loss": 0.0613, "step": 30560 }, { "epoch": 4.540323778404872, "grad_norm": 1.369394063949585, "learning_rate": 5.459676221595129e-06, "loss": 0.0562, "step": 30570 }, { "epoch": 4.541809000445567, "grad_norm": 0.35974112153053284, "learning_rate": 5.458190999554434e-06, "loss": 0.0573, "step": 30580 }, { "epoch": 4.543294222486262, "grad_norm": 1.749398946762085, "learning_rate": 5.456705777513739e-06, "loss": 0.0653, "step": 30590 }, { "epoch": 4.544779444526957, "grad_norm": 1.257237195968628, "learning_rate": 5.455220555473044e-06, "loss": 0.0798, "step": 30600 }, { "epoch": 4.546264666567652, "grad_norm": 1.2745774984359741, "learning_rate": 5.453735333432349e-06, "loss": 0.0729, "step": 30610 }, { "epoch": 4.547749888608347, "grad_norm": 1.1518669128417969, "learning_rate": 5.452250111391654e-06, "loss": 0.0579, "step": 30620 }, { "epoch": 4.549235110649042, "grad_norm": 0.6923162341117859, "learning_rate": 5.450764889350958e-06, "loss": 0.0494, "step": 30630 }, { "epoch": 4.550720332689737, "grad_norm": 0.6949621438980103, "learning_rate": 5.449279667310264e-06, "loss": 0.0612, "step": 30640 }, { "epoch": 4.552205554730432, "grad_norm": 0.6583867073059082, "learning_rate": 5.447794445269569e-06, "loss": 0.0589, "step": 30650 }, { "epoch": 4.553690776771127, "grad_norm": 0.3452644348144531, "learning_rate": 5.4463092232288725e-06, "loss": 0.0571, "step": 30660 }, { "epoch": 4.555175998811823, "grad_norm": 0.7595973610877991, "learning_rate": 5.444824001188178e-06, "loss": 0.0708, "step": 30670 }, { "epoch": 4.556661220852518, "grad_norm": 0.611835241317749, "learning_rate": 5.443338779147483e-06, "loss": 0.0548, "step": 30680 }, { "epoch": 4.558146442893213, "grad_norm": 0.9598454833030701, "learning_rate": 5.4418535571067875e-06, "loss": 0.0659, "step": 30690 }, { "epoch": 4.5596316649339075, "grad_norm": 0.8964837193489075, "learning_rate": 5.440368335066093e-06, "loss": 0.0727, "step": 30700 }, { "epoch": 4.561116886974602, "grad_norm": 0.995827853679657, "learning_rate": 5.438883113025398e-06, "loss": 0.0731, "step": 30710 }, { "epoch": 4.562602109015298, "grad_norm": 0.6015493273735046, "learning_rate": 5.4373978909847025e-06, "loss": 0.0634, "step": 30720 }, { "epoch": 4.564087331055993, "grad_norm": 0.38653287291526794, "learning_rate": 5.435912668944008e-06, "loss": 0.1031, "step": 30730 }, { "epoch": 4.565572553096688, "grad_norm": 1.0255281925201416, "learning_rate": 5.434427446903312e-06, "loss": 0.0685, "step": 30740 }, { "epoch": 4.567057775137383, "grad_norm": 0.8919768929481506, "learning_rate": 5.4329422248626175e-06, "loss": 0.0638, "step": 30750 }, { "epoch": 4.568542997178078, "grad_norm": 0.8848775625228882, "learning_rate": 5.431457002821923e-06, "loss": 0.0867, "step": 30760 }, { "epoch": 4.570028219218774, "grad_norm": 0.7540958523750305, "learning_rate": 5.429971780781227e-06, "loss": 0.058, "step": 30770 }, { "epoch": 4.5715134412594685, "grad_norm": 1.1147123575210571, "learning_rate": 5.4284865587405325e-06, "loss": 0.0732, "step": 30780 }, { "epoch": 4.572998663300163, "grad_norm": 0.8272676467895508, "learning_rate": 5.427001336699838e-06, "loss": 0.0537, "step": 30790 }, { "epoch": 4.574483885340858, "grad_norm": 0.6863977313041687, "learning_rate": 5.425516114659141e-06, "loss": 0.0675, "step": 30800 }, { "epoch": 4.575969107381553, "grad_norm": 0.7427458167076111, "learning_rate": 5.4240308926184475e-06, "loss": 0.058, "step": 30810 }, { "epoch": 4.577454329422249, "grad_norm": 0.6880619525909424, "learning_rate": 5.422545670577751e-06, "loss": 0.0402, "step": 30820 }, { "epoch": 4.578939551462944, "grad_norm": 0.84503173828125, "learning_rate": 5.421060448537056e-06, "loss": 0.0731, "step": 30830 }, { "epoch": 4.580424773503639, "grad_norm": 0.5866569876670837, "learning_rate": 5.419575226496362e-06, "loss": 0.0614, "step": 30840 }, { "epoch": 4.581909995544334, "grad_norm": 0.6142920851707458, "learning_rate": 5.418090004455666e-06, "loss": 0.0683, "step": 30850 }, { "epoch": 4.5833952175850285, "grad_norm": 0.8540779948234558, "learning_rate": 5.416604782414971e-06, "loss": 0.0405, "step": 30860 }, { "epoch": 4.584880439625724, "grad_norm": 0.5846472978591919, "learning_rate": 5.415119560374277e-06, "loss": 0.0505, "step": 30870 }, { "epoch": 4.586365661666419, "grad_norm": 0.5436474680900574, "learning_rate": 5.413634338333581e-06, "loss": 0.0493, "step": 30880 }, { "epoch": 4.587850883707114, "grad_norm": 0.6070688366889954, "learning_rate": 5.412149116292886e-06, "loss": 0.0539, "step": 30890 }, { "epoch": 4.589336105747809, "grad_norm": 0.39871883392333984, "learning_rate": 5.410663894252192e-06, "loss": 0.0595, "step": 30900 }, { "epoch": 4.590821327788504, "grad_norm": 0.9332908391952515, "learning_rate": 5.409178672211496e-06, "loss": 0.0591, "step": 30910 }, { "epoch": 4.5923065498292, "grad_norm": 1.1472511291503906, "learning_rate": 5.407693450170801e-06, "loss": 0.0755, "step": 30920 }, { "epoch": 4.593791771869895, "grad_norm": 0.8061735033988953, "learning_rate": 5.406208228130106e-06, "loss": 0.064, "step": 30930 }, { "epoch": 4.5952769939105895, "grad_norm": 0.8099207878112793, "learning_rate": 5.404723006089411e-06, "loss": 0.0661, "step": 30940 }, { "epoch": 4.596762215951284, "grad_norm": 1.1036105155944824, "learning_rate": 5.403237784048716e-06, "loss": 0.0533, "step": 30950 }, { "epoch": 4.598247437991979, "grad_norm": 0.9364911317825317, "learning_rate": 5.40175256200802e-06, "loss": 0.0875, "step": 30960 }, { "epoch": 4.599732660032675, "grad_norm": 0.85829097032547, "learning_rate": 5.400267339967325e-06, "loss": 0.068, "step": 30970 }, { "epoch": 4.60121788207337, "grad_norm": 0.7444193959236145, "learning_rate": 5.3987821179266304e-06, "loss": 0.0678, "step": 30980 }, { "epoch": 4.602703104114065, "grad_norm": 0.9241144061088562, "learning_rate": 5.397296895885935e-06, "loss": 0.0663, "step": 30990 }, { "epoch": 4.60418832615476, "grad_norm": 0.8394775390625, "learning_rate": 5.39581167384524e-06, "loss": 0.0513, "step": 31000 }, { "epoch": 4.605673548195456, "grad_norm": 0.821804404258728, "learning_rate": 5.3943264518045454e-06, "loss": 0.0526, "step": 31010 }, { "epoch": 4.6071587702361505, "grad_norm": 0.6810859441757202, "learning_rate": 5.39284122976385e-06, "loss": 0.0569, "step": 31020 }, { "epoch": 4.608643992276845, "grad_norm": 0.8569344282150269, "learning_rate": 5.391356007723155e-06, "loss": 0.0646, "step": 31030 }, { "epoch": 4.61012921431754, "grad_norm": 0.4078651964664459, "learning_rate": 5.38987078568246e-06, "loss": 0.0527, "step": 31040 }, { "epoch": 4.611614436358235, "grad_norm": 0.6530470848083496, "learning_rate": 5.388385563641765e-06, "loss": 0.0549, "step": 31050 }, { "epoch": 4.613099658398931, "grad_norm": 1.1770886182785034, "learning_rate": 5.38690034160107e-06, "loss": 0.0705, "step": 31060 }, { "epoch": 4.614584880439626, "grad_norm": 0.842705488204956, "learning_rate": 5.385415119560375e-06, "loss": 0.0563, "step": 31070 }, { "epoch": 4.616070102480321, "grad_norm": 1.254603624343872, "learning_rate": 5.38392989751968e-06, "loss": 0.0849, "step": 31080 }, { "epoch": 4.617555324521016, "grad_norm": 0.6766051650047302, "learning_rate": 5.382444675478985e-06, "loss": 0.0649, "step": 31090 }, { "epoch": 4.619040546561711, "grad_norm": 1.1732462644577026, "learning_rate": 5.38095945343829e-06, "loss": 0.063, "step": 31100 }, { "epoch": 4.620525768602406, "grad_norm": 1.3196903467178345, "learning_rate": 5.379474231397595e-06, "loss": 0.0698, "step": 31110 }, { "epoch": 4.622010990643101, "grad_norm": 0.4553806185722351, "learning_rate": 5.3779890093569e-06, "loss": 0.0576, "step": 31120 }, { "epoch": 4.623496212683796, "grad_norm": 1.2071045637130737, "learning_rate": 5.376503787316204e-06, "loss": 0.0571, "step": 31130 }, { "epoch": 4.624981434724491, "grad_norm": 0.4207744002342224, "learning_rate": 5.375018565275509e-06, "loss": 0.0475, "step": 31140 }, { "epoch": 4.626466656765187, "grad_norm": 0.45888543128967285, "learning_rate": 5.3735333432348134e-06, "loss": 0.0706, "step": 31150 }, { "epoch": 4.627951878805882, "grad_norm": 0.9755649566650391, "learning_rate": 5.372048121194119e-06, "loss": 0.0709, "step": 31160 }, { "epoch": 4.629437100846577, "grad_norm": 0.9244090914726257, "learning_rate": 5.370562899153424e-06, "loss": 0.0689, "step": 31170 }, { "epoch": 4.630922322887272, "grad_norm": 0.8915614485740662, "learning_rate": 5.3690776771127284e-06, "loss": 0.0475, "step": 31180 }, { "epoch": 4.6324075449279665, "grad_norm": 0.4594666063785553, "learning_rate": 5.367592455072034e-06, "loss": 0.0586, "step": 31190 }, { "epoch": 4.633892766968662, "grad_norm": 0.8010812401771545, "learning_rate": 5.366107233031339e-06, "loss": 0.0768, "step": 31200 }, { "epoch": 4.635377989009357, "grad_norm": 1.156713604927063, "learning_rate": 5.3646220109906434e-06, "loss": 0.0781, "step": 31210 }, { "epoch": 4.636863211050052, "grad_norm": 0.8136284351348877, "learning_rate": 5.363136788949949e-06, "loss": 0.0639, "step": 31220 }, { "epoch": 4.638348433090747, "grad_norm": 0.629618763923645, "learning_rate": 5.361651566909254e-06, "loss": 0.0516, "step": 31230 }, { "epoch": 4.639833655131442, "grad_norm": 0.6487692594528198, "learning_rate": 5.360166344868558e-06, "loss": 0.0741, "step": 31240 }, { "epoch": 4.641318877172138, "grad_norm": 1.1369839906692505, "learning_rate": 5.358681122827864e-06, "loss": 0.0638, "step": 31250 }, { "epoch": 4.6428040992128325, "grad_norm": 0.9577314853668213, "learning_rate": 5.357195900787167e-06, "loss": 0.0665, "step": 31260 }, { "epoch": 4.6442893212535274, "grad_norm": 0.8269117474555969, "learning_rate": 5.3557106787464726e-06, "loss": 0.0651, "step": 31270 }, { "epoch": 4.645774543294222, "grad_norm": 1.153686285018921, "learning_rate": 5.354225456705779e-06, "loss": 0.0672, "step": 31280 }, { "epoch": 4.647259765334917, "grad_norm": 1.1111594438552856, "learning_rate": 5.352740234665082e-06, "loss": 0.0706, "step": 31290 }, { "epoch": 4.648744987375613, "grad_norm": 0.9219614863395691, "learning_rate": 5.3512550126243876e-06, "loss": 0.0569, "step": 31300 }, { "epoch": 4.650230209416308, "grad_norm": 1.7073825597763062, "learning_rate": 5.349769790583693e-06, "loss": 0.0652, "step": 31310 }, { "epoch": 4.651715431457003, "grad_norm": 0.46422526240348816, "learning_rate": 5.348284568542997e-06, "loss": 0.063, "step": 31320 }, { "epoch": 4.653200653497698, "grad_norm": 0.552732527256012, "learning_rate": 5.3467993465023026e-06, "loss": 0.0611, "step": 31330 }, { "epoch": 4.654685875538393, "grad_norm": 1.0013930797576904, "learning_rate": 5.345314124461607e-06, "loss": 0.0662, "step": 31340 }, { "epoch": 4.656171097579088, "grad_norm": 0.7439885139465332, "learning_rate": 5.343828902420912e-06, "loss": 0.0656, "step": 31350 }, { "epoch": 4.657656319619783, "grad_norm": 0.7561959624290466, "learning_rate": 5.3423436803802176e-06, "loss": 0.0618, "step": 31360 }, { "epoch": 4.659141541660478, "grad_norm": 0.8221492767333984, "learning_rate": 5.340858458339522e-06, "loss": 0.0635, "step": 31370 }, { "epoch": 4.660626763701173, "grad_norm": 0.8837724328041077, "learning_rate": 5.339373236298827e-06, "loss": 0.0687, "step": 31380 }, { "epoch": 4.662111985741868, "grad_norm": 1.0688098669052124, "learning_rate": 5.3378880142581325e-06, "loss": 0.0675, "step": 31390 }, { "epoch": 4.663597207782564, "grad_norm": 1.1708858013153076, "learning_rate": 5.336402792217437e-06, "loss": 0.0461, "step": 31400 }, { "epoch": 4.665082429823259, "grad_norm": 0.5535678863525391, "learning_rate": 5.334917570176742e-06, "loss": 0.0766, "step": 31410 }, { "epoch": 4.666567651863954, "grad_norm": 0.7136598229408264, "learning_rate": 5.3334323481360475e-06, "loss": 0.0584, "step": 31420 }, { "epoch": 4.6680528739046485, "grad_norm": 0.8499895334243774, "learning_rate": 5.331947126095351e-06, "loss": 0.0668, "step": 31430 }, { "epoch": 4.669538095945343, "grad_norm": 0.7829546332359314, "learning_rate": 5.330461904054656e-06, "loss": 0.0529, "step": 31440 }, { "epoch": 4.671023317986039, "grad_norm": 0.8320195078849792, "learning_rate": 5.328976682013961e-06, "loss": 0.0704, "step": 31450 }, { "epoch": 4.672508540026734, "grad_norm": 1.6752804517745972, "learning_rate": 5.327491459973266e-06, "loss": 0.0642, "step": 31460 }, { "epoch": 4.673993762067429, "grad_norm": 2.0226798057556152, "learning_rate": 5.326006237932571e-06, "loss": 0.0701, "step": 31470 }, { "epoch": 4.675478984108124, "grad_norm": 0.8305063247680664, "learning_rate": 5.324521015891876e-06, "loss": 0.0608, "step": 31480 }, { "epoch": 4.676964206148819, "grad_norm": 1.0752133131027222, "learning_rate": 5.323035793851181e-06, "loss": 0.0582, "step": 31490 }, { "epoch": 4.678449428189515, "grad_norm": 0.8303189277648926, "learning_rate": 5.321550571810486e-06, "loss": 0.064, "step": 31500 }, { "epoch": 4.6799346502302095, "grad_norm": 0.772408127784729, "learning_rate": 5.320065349769791e-06, "loss": 0.0655, "step": 31510 }, { "epoch": 4.681419872270904, "grad_norm": 0.7097247242927551, "learning_rate": 5.318580127729096e-06, "loss": 0.063, "step": 31520 }, { "epoch": 4.682905094311599, "grad_norm": 0.8347052931785583, "learning_rate": 5.317094905688401e-06, "loss": 0.066, "step": 31530 }, { "epoch": 4.684390316352294, "grad_norm": 0.6834461688995361, "learning_rate": 5.315609683647706e-06, "loss": 0.0633, "step": 31540 }, { "epoch": 4.68587553839299, "grad_norm": 0.6303119659423828, "learning_rate": 5.314124461607011e-06, "loss": 0.0639, "step": 31550 }, { "epoch": 4.687360760433685, "grad_norm": 1.4411765336990356, "learning_rate": 5.312639239566315e-06, "loss": 0.0647, "step": 31560 }, { "epoch": 4.68884598247438, "grad_norm": 1.3410730361938477, "learning_rate": 5.311154017525621e-06, "loss": 0.0612, "step": 31570 }, { "epoch": 4.690331204515075, "grad_norm": 0.6906945109367371, "learning_rate": 5.309668795484926e-06, "loss": 0.0445, "step": 31580 }, { "epoch": 4.6918164265557705, "grad_norm": 0.22727198898792267, "learning_rate": 5.30818357344423e-06, "loss": 0.0492, "step": 31590 }, { "epoch": 4.693301648596465, "grad_norm": 1.2610745429992676, "learning_rate": 5.306698351403535e-06, "loss": 0.0703, "step": 31600 }, { "epoch": 4.69478687063716, "grad_norm": 1.4050370454788208, "learning_rate": 5.30521312936284e-06, "loss": 0.0723, "step": 31610 }, { "epoch": 4.696272092677855, "grad_norm": 0.4102284014225006, "learning_rate": 5.303727907322145e-06, "loss": 0.0648, "step": 31620 }, { "epoch": 4.69775731471855, "grad_norm": 0.755649745464325, "learning_rate": 5.30224268528145e-06, "loss": 0.054, "step": 31630 }, { "epoch": 4.699242536759246, "grad_norm": 0.6751394867897034, "learning_rate": 5.300757463240755e-06, "loss": 0.0532, "step": 31640 }, { "epoch": 4.700727758799941, "grad_norm": 0.7214498519897461, "learning_rate": 5.29927224120006e-06, "loss": 0.0989, "step": 31650 }, { "epoch": 4.702212980840636, "grad_norm": 1.3037954568862915, "learning_rate": 5.297787019159365e-06, "loss": 0.0623, "step": 31660 }, { "epoch": 4.703698202881331, "grad_norm": 0.7185441851615906, "learning_rate": 5.296301797118669e-06, "loss": 0.0603, "step": 31670 }, { "epoch": 4.7051834249220255, "grad_norm": 0.603461503982544, "learning_rate": 5.294816575077975e-06, "loss": 0.047, "step": 31680 }, { "epoch": 4.706668646962721, "grad_norm": 1.2898454666137695, "learning_rate": 5.29333135303728e-06, "loss": 0.0605, "step": 31690 }, { "epoch": 4.708153869003416, "grad_norm": 0.8753405213356018, "learning_rate": 5.291846130996584e-06, "loss": 0.0577, "step": 31700 }, { "epoch": 4.709639091044111, "grad_norm": 0.7883022427558899, "learning_rate": 5.29036090895589e-06, "loss": 0.0709, "step": 31710 }, { "epoch": 4.711124313084806, "grad_norm": 1.3050919771194458, "learning_rate": 5.288875686915195e-06, "loss": 0.0621, "step": 31720 }, { "epoch": 4.712609535125502, "grad_norm": 0.40759310126304626, "learning_rate": 5.2873904648744985e-06, "loss": 0.0464, "step": 31730 }, { "epoch": 4.714094757166197, "grad_norm": 1.0722618103027344, "learning_rate": 5.285905242833805e-06, "loss": 0.0794, "step": 31740 }, { "epoch": 4.7155799792068915, "grad_norm": 0.8702318072319031, "learning_rate": 5.284420020793108e-06, "loss": 0.0605, "step": 31750 }, { "epoch": 4.717065201247586, "grad_norm": 0.5412219166755676, "learning_rate": 5.2829347987524135e-06, "loss": 0.051, "step": 31760 }, { "epoch": 4.718550423288281, "grad_norm": 1.113112211227417, "learning_rate": 5.281449576711719e-06, "loss": 0.056, "step": 31770 }, { "epoch": 4.720035645328977, "grad_norm": 0.8241788148880005, "learning_rate": 5.279964354671023e-06, "loss": 0.0544, "step": 31780 }, { "epoch": 4.721520867369672, "grad_norm": 0.5303086638450623, "learning_rate": 5.2784791326303285e-06, "loss": 0.0695, "step": 31790 }, { "epoch": 4.723006089410367, "grad_norm": 0.9856883883476257, "learning_rate": 5.276993910589634e-06, "loss": 0.0656, "step": 31800 }, { "epoch": 4.724491311451062, "grad_norm": 0.6378732323646545, "learning_rate": 5.275508688548938e-06, "loss": 0.0544, "step": 31810 }, { "epoch": 4.725976533491757, "grad_norm": 1.0125995874404907, "learning_rate": 5.2740234665082435e-06, "loss": 0.0615, "step": 31820 }, { "epoch": 4.7274617555324525, "grad_norm": 0.9217480421066284, "learning_rate": 5.272538244467549e-06, "loss": 0.0501, "step": 31830 }, { "epoch": 4.728946977573147, "grad_norm": 0.46245720982551575, "learning_rate": 5.271053022426853e-06, "loss": 0.0613, "step": 31840 }, { "epoch": 4.730432199613842, "grad_norm": 0.4453304708003998, "learning_rate": 5.2695678003861585e-06, "loss": 0.0519, "step": 31850 }, { "epoch": 4.731917421654537, "grad_norm": 0.49151769280433655, "learning_rate": 5.268082578345463e-06, "loss": 0.0868, "step": 31860 }, { "epoch": 4.733402643695232, "grad_norm": 0.8002780675888062, "learning_rate": 5.266597356304768e-06, "loss": 0.0646, "step": 31870 }, { "epoch": 4.734887865735928, "grad_norm": 0.7369169592857361, "learning_rate": 5.2651121342640735e-06, "loss": 0.0504, "step": 31880 }, { "epoch": 4.736373087776623, "grad_norm": 0.8730064630508423, "learning_rate": 5.263626912223377e-06, "loss": 0.0489, "step": 31890 }, { "epoch": 4.737858309817318, "grad_norm": 0.6994331479072571, "learning_rate": 5.262141690182682e-06, "loss": 0.0675, "step": 31900 }, { "epoch": 4.739343531858013, "grad_norm": 0.7177156209945679, "learning_rate": 5.260656468141988e-06, "loss": 0.0464, "step": 31910 }, { "epoch": 4.7408287538987075, "grad_norm": 0.7728835940361023, "learning_rate": 5.259171246101292e-06, "loss": 0.0595, "step": 31920 }, { "epoch": 4.742313975939403, "grad_norm": 0.6116012334823608, "learning_rate": 5.257686024060597e-06, "loss": 0.0587, "step": 31930 }, { "epoch": 4.743799197980098, "grad_norm": 0.7422853112220764, "learning_rate": 5.256200802019903e-06, "loss": 0.0667, "step": 31940 }, { "epoch": 4.745284420020793, "grad_norm": 0.7492051720619202, "learning_rate": 5.254715579979207e-06, "loss": 0.0621, "step": 31950 }, { "epoch": 4.746769642061488, "grad_norm": 0.4576432406902313, "learning_rate": 5.253230357938512e-06, "loss": 0.0736, "step": 31960 }, { "epoch": 4.748254864102183, "grad_norm": 1.2411152124404907, "learning_rate": 5.251745135897817e-06, "loss": 0.0642, "step": 31970 }, { "epoch": 4.749740086142879, "grad_norm": 1.3862706422805786, "learning_rate": 5.250259913857122e-06, "loss": 0.0428, "step": 31980 }, { "epoch": 4.751225308183574, "grad_norm": 0.7565192580223083, "learning_rate": 5.248774691816427e-06, "loss": 0.0641, "step": 31990 }, { "epoch": 4.7527105302242685, "grad_norm": 0.395856112241745, "learning_rate": 5.247289469775732e-06, "loss": 0.0677, "step": 32000 }, { "epoch": 4.754195752264963, "grad_norm": 0.7864809036254883, "learning_rate": 5.245804247735037e-06, "loss": 0.0391, "step": 32010 }, { "epoch": 4.755680974305658, "grad_norm": 0.42430078983306885, "learning_rate": 5.244319025694342e-06, "loss": 0.0513, "step": 32020 }, { "epoch": 4.757166196346354, "grad_norm": 0.6918451189994812, "learning_rate": 5.242833803653647e-06, "loss": 0.05, "step": 32030 }, { "epoch": 4.758651418387049, "grad_norm": 0.4560699164867401, "learning_rate": 5.241348581612952e-06, "loss": 0.0538, "step": 32040 }, { "epoch": 4.760136640427744, "grad_norm": 0.4515649378299713, "learning_rate": 5.239863359572257e-06, "loss": 0.0642, "step": 32050 }, { "epoch": 4.761621862468439, "grad_norm": 1.9437707662582397, "learning_rate": 5.238378137531561e-06, "loss": 0.0594, "step": 32060 }, { "epoch": 4.763107084509134, "grad_norm": 0.34391626715660095, "learning_rate": 5.236892915490866e-06, "loss": 0.0708, "step": 32070 }, { "epoch": 4.7645923065498295, "grad_norm": 0.8328548073768616, "learning_rate": 5.235407693450171e-06, "loss": 0.0597, "step": 32080 }, { "epoch": 4.766077528590524, "grad_norm": 1.0488471984863281, "learning_rate": 5.233922471409476e-06, "loss": 0.0652, "step": 32090 }, { "epoch": 4.767562750631219, "grad_norm": 0.7127065658569336, "learning_rate": 5.232437249368781e-06, "loss": 0.038, "step": 32100 }, { "epoch": 4.769047972671914, "grad_norm": 1.633682370185852, "learning_rate": 5.230952027328086e-06, "loss": 0.0747, "step": 32110 }, { "epoch": 4.770533194712609, "grad_norm": 0.8790189623832703, "learning_rate": 5.229466805287391e-06, "loss": 0.0874, "step": 32120 }, { "epoch": 4.772018416753305, "grad_norm": 0.8211573362350464, "learning_rate": 5.227981583246696e-06, "loss": 0.0602, "step": 32130 }, { "epoch": 4.773503638794, "grad_norm": 0.5534783601760864, "learning_rate": 5.226496361206001e-06, "loss": 0.0494, "step": 32140 }, { "epoch": 4.774988860834695, "grad_norm": 2.4223477840423584, "learning_rate": 5.225011139165306e-06, "loss": 0.0718, "step": 32150 }, { "epoch": 4.7764740828753895, "grad_norm": 0.7991631031036377, "learning_rate": 5.223525917124611e-06, "loss": 0.0774, "step": 32160 }, { "epoch": 4.777959304916085, "grad_norm": 0.8528579473495483, "learning_rate": 5.222040695083916e-06, "loss": 0.0747, "step": 32170 }, { "epoch": 4.77944452695678, "grad_norm": 1.0776245594024658, "learning_rate": 5.220555473043221e-06, "loss": 0.0685, "step": 32180 }, { "epoch": 4.780929748997475, "grad_norm": 0.5044242143630981, "learning_rate": 5.2190702510025245e-06, "loss": 0.0745, "step": 32190 }, { "epoch": 4.78241497103817, "grad_norm": 0.6456106901168823, "learning_rate": 5.21758502896183e-06, "loss": 0.0816, "step": 32200 }, { "epoch": 4.783900193078865, "grad_norm": 0.7129038572311401, "learning_rate": 5.216099806921136e-06, "loss": 0.0666, "step": 32210 }, { "epoch": 4.785385415119561, "grad_norm": 0.7487207651138306, "learning_rate": 5.2146145848804395e-06, "loss": 0.0559, "step": 32220 }, { "epoch": 4.786870637160256, "grad_norm": 0.36898553371429443, "learning_rate": 5.213129362839745e-06, "loss": 0.0508, "step": 32230 }, { "epoch": 4.7883558592009505, "grad_norm": 0.6122729182243347, "learning_rate": 5.21164414079905e-06, "loss": 0.0675, "step": 32240 }, { "epoch": 4.789841081241645, "grad_norm": 0.7119560837745667, "learning_rate": 5.2101589187583545e-06, "loss": 0.0548, "step": 32250 }, { "epoch": 4.79132630328234, "grad_norm": 1.06340754032135, "learning_rate": 5.20867369671766e-06, "loss": 0.0627, "step": 32260 }, { "epoch": 4.792811525323036, "grad_norm": 0.45462164282798767, "learning_rate": 5.207188474676964e-06, "loss": 0.0612, "step": 32270 }, { "epoch": 4.794296747363731, "grad_norm": 0.6373799443244934, "learning_rate": 5.2057032526362695e-06, "loss": 0.0815, "step": 32280 }, { "epoch": 4.795781969404426, "grad_norm": 0.4400479793548584, "learning_rate": 5.204218030595575e-06, "loss": 0.0726, "step": 32290 }, { "epoch": 4.797267191445121, "grad_norm": 0.8259301781654358, "learning_rate": 5.202732808554879e-06, "loss": 0.064, "step": 32300 }, { "epoch": 4.798752413485817, "grad_norm": 1.5500998497009277, "learning_rate": 5.2012475865141845e-06, "loss": 0.0605, "step": 32310 }, { "epoch": 4.8002376355265115, "grad_norm": 1.0718823671340942, "learning_rate": 5.19976236447349e-06, "loss": 0.0576, "step": 32320 }, { "epoch": 4.801722857567206, "grad_norm": 0.33371061086654663, "learning_rate": 5.198277142432794e-06, "loss": 0.0446, "step": 32330 }, { "epoch": 4.803208079607901, "grad_norm": 0.66288161277771, "learning_rate": 5.1967919203920995e-06, "loss": 0.0664, "step": 32340 }, { "epoch": 4.804693301648596, "grad_norm": 1.0212267637252808, "learning_rate": 5.195306698351405e-06, "loss": 0.0466, "step": 32350 }, { "epoch": 4.806178523689292, "grad_norm": 0.8142063617706299, "learning_rate": 5.193821476310708e-06, "loss": 0.0621, "step": 32360 }, { "epoch": 4.807663745729987, "grad_norm": 0.7264366745948792, "learning_rate": 5.192336254270014e-06, "loss": 0.0608, "step": 32370 }, { "epoch": 4.809148967770682, "grad_norm": 0.8260073065757751, "learning_rate": 5.190851032229318e-06, "loss": 0.0604, "step": 32380 }, { "epoch": 4.810634189811377, "grad_norm": 0.9729313254356384, "learning_rate": 5.189365810188623e-06, "loss": 0.0602, "step": 32390 }, { "epoch": 4.812119411852072, "grad_norm": 1.0569586753845215, "learning_rate": 5.187880588147929e-06, "loss": 0.0552, "step": 32400 }, { "epoch": 4.813604633892767, "grad_norm": 0.6376864314079285, "learning_rate": 5.186395366107233e-06, "loss": 0.0519, "step": 32410 }, { "epoch": 4.815089855933462, "grad_norm": 0.7130823135375977, "learning_rate": 5.184910144066538e-06, "loss": 0.0675, "step": 32420 }, { "epoch": 4.816575077974157, "grad_norm": 0.6740859746932983, "learning_rate": 5.183424922025844e-06, "loss": 0.0618, "step": 32430 }, { "epoch": 4.818060300014852, "grad_norm": 0.7788185477256775, "learning_rate": 5.181939699985148e-06, "loss": 0.0739, "step": 32440 }, { "epoch": 4.819545522055547, "grad_norm": 0.9308912754058838, "learning_rate": 5.180454477944453e-06, "loss": 0.0571, "step": 32450 }, { "epoch": 4.821030744096243, "grad_norm": 0.6136270761489868, "learning_rate": 5.178969255903759e-06, "loss": 0.0771, "step": 32460 }, { "epoch": 4.822515966136938, "grad_norm": 0.9954419732093811, "learning_rate": 5.177484033863063e-06, "loss": 0.0707, "step": 32470 }, { "epoch": 4.824001188177633, "grad_norm": 0.6545411348342896, "learning_rate": 5.175998811822368e-06, "loss": 0.0826, "step": 32480 }, { "epoch": 4.8254864102183275, "grad_norm": 0.5363044142723083, "learning_rate": 5.174513589781672e-06, "loss": 0.0612, "step": 32490 }, { "epoch": 4.826971632259022, "grad_norm": 0.7954429984092712, "learning_rate": 5.173028367740978e-06, "loss": 0.0538, "step": 32500 }, { "epoch": 4.828456854299718, "grad_norm": 0.9464428424835205, "learning_rate": 5.171543145700283e-06, "loss": 0.0724, "step": 32510 }, { "epoch": 4.829942076340413, "grad_norm": 0.3676034212112427, "learning_rate": 5.170057923659587e-06, "loss": 0.0498, "step": 32520 }, { "epoch": 4.831427298381108, "grad_norm": 0.337434858083725, "learning_rate": 5.168572701618892e-06, "loss": 0.078, "step": 32530 }, { "epoch": 4.832912520421803, "grad_norm": 0.7221982479095459, "learning_rate": 5.1670874795781974e-06, "loss": 0.0685, "step": 32540 }, { "epoch": 4.834397742462498, "grad_norm": 1.243183970451355, "learning_rate": 5.165602257537502e-06, "loss": 0.0501, "step": 32550 }, { "epoch": 4.8358829645031935, "grad_norm": 0.7665337324142456, "learning_rate": 5.164117035496807e-06, "loss": 0.076, "step": 32560 }, { "epoch": 4.8373681865438884, "grad_norm": 1.4218223094940186, "learning_rate": 5.1626318134561124e-06, "loss": 0.0524, "step": 32570 }, { "epoch": 4.838853408584583, "grad_norm": 0.9128448963165283, "learning_rate": 5.161146591415417e-06, "loss": 0.0589, "step": 32580 }, { "epoch": 4.840338630625278, "grad_norm": 0.6334198713302612, "learning_rate": 5.159661369374722e-06, "loss": 0.0573, "step": 32590 }, { "epoch": 4.841823852665973, "grad_norm": 0.7682795524597168, "learning_rate": 5.158176147334027e-06, "loss": 0.059, "step": 32600 }, { "epoch": 4.843309074706669, "grad_norm": 0.635378897190094, "learning_rate": 5.156690925293332e-06, "loss": 0.0628, "step": 32610 }, { "epoch": 4.844794296747364, "grad_norm": 0.6622408628463745, "learning_rate": 5.155205703252637e-06, "loss": 0.0479, "step": 32620 }, { "epoch": 4.846279518788059, "grad_norm": 0.47650235891342163, "learning_rate": 5.153720481211942e-06, "loss": 0.042, "step": 32630 }, { "epoch": 4.847764740828754, "grad_norm": 1.37141752243042, "learning_rate": 5.152235259171247e-06, "loss": 0.0691, "step": 32640 }, { "epoch": 4.8492499628694485, "grad_norm": 0.7843444347381592, "learning_rate": 5.150750037130552e-06, "loss": 0.0672, "step": 32650 }, { "epoch": 4.850735184910144, "grad_norm": 0.6222081780433655, "learning_rate": 5.149264815089856e-06, "loss": 0.0684, "step": 32660 }, { "epoch": 4.852220406950839, "grad_norm": 0.8082340955734253, "learning_rate": 5.147779593049161e-06, "loss": 0.0612, "step": 32670 }, { "epoch": 4.853705628991534, "grad_norm": 1.1446117162704468, "learning_rate": 5.146294371008467e-06, "loss": 0.0497, "step": 32680 }, { "epoch": 4.855190851032229, "grad_norm": 0.883907675743103, "learning_rate": 5.144809148967771e-06, "loss": 0.07, "step": 32690 }, { "epoch": 4.856676073072924, "grad_norm": 1.053536295890808, "learning_rate": 5.143323926927076e-06, "loss": 0.0419, "step": 32700 }, { "epoch": 4.85816129511362, "grad_norm": 1.5421696901321411, "learning_rate": 5.1418387048863804e-06, "loss": 0.0677, "step": 32710 }, { "epoch": 4.859646517154315, "grad_norm": 0.4656846523284912, "learning_rate": 5.140353482845686e-06, "loss": 0.0475, "step": 32720 }, { "epoch": 4.8611317391950095, "grad_norm": 0.9899516105651855, "learning_rate": 5.138868260804991e-06, "loss": 0.0601, "step": 32730 }, { "epoch": 4.862616961235704, "grad_norm": 0.761489987373352, "learning_rate": 5.1373830387642954e-06, "loss": 0.0657, "step": 32740 }, { "epoch": 4.8641021832764, "grad_norm": 1.031511664390564, "learning_rate": 5.135897816723601e-06, "loss": 0.0576, "step": 32750 }, { "epoch": 4.865587405317095, "grad_norm": 0.9384909272193909, "learning_rate": 5.134412594682906e-06, "loss": 0.0769, "step": 32760 }, { "epoch": 4.86707262735779, "grad_norm": 0.9842746257781982, "learning_rate": 5.13292737264221e-06, "loss": 0.0606, "step": 32770 }, { "epoch": 4.868557849398485, "grad_norm": 0.6807608008384705, "learning_rate": 5.131442150601516e-06, "loss": 0.0425, "step": 32780 }, { "epoch": 4.87004307143918, "grad_norm": 0.8210038542747498, "learning_rate": 5.12995692856082e-06, "loss": 0.0708, "step": 32790 }, { "epoch": 4.871528293479876, "grad_norm": 0.6338664293289185, "learning_rate": 5.128471706520125e-06, "loss": 0.0817, "step": 32800 }, { "epoch": 4.8730135155205705, "grad_norm": 0.8477684259414673, "learning_rate": 5.126986484479431e-06, "loss": 0.0597, "step": 32810 }, { "epoch": 4.874498737561265, "grad_norm": 1.4142152070999146, "learning_rate": 5.125501262438734e-06, "loss": 0.0901, "step": 32820 }, { "epoch": 4.87598395960196, "grad_norm": 0.40460431575775146, "learning_rate": 5.1240160403980396e-06, "loss": 0.04, "step": 32830 }, { "epoch": 4.877469181642655, "grad_norm": 1.283377766609192, "learning_rate": 5.122530818357345e-06, "loss": 0.0509, "step": 32840 }, { "epoch": 4.878954403683351, "grad_norm": 1.3284411430358887, "learning_rate": 5.121045596316649e-06, "loss": 0.0806, "step": 32850 }, { "epoch": 4.880439625724046, "grad_norm": 1.282408356666565, "learning_rate": 5.1195603742759546e-06, "loss": 0.0674, "step": 32860 }, { "epoch": 4.881924847764741, "grad_norm": 0.5994721055030823, "learning_rate": 5.11807515223526e-06, "loss": 0.0485, "step": 32870 }, { "epoch": 4.883410069805436, "grad_norm": 1.1487478017807007, "learning_rate": 5.116589930194564e-06, "loss": 0.0633, "step": 32880 }, { "epoch": 4.8848952918461315, "grad_norm": 0.6418612599372864, "learning_rate": 5.1151047081538696e-06, "loss": 0.069, "step": 32890 }, { "epoch": 4.886380513886826, "grad_norm": 1.3160861730575562, "learning_rate": 5.113619486113174e-06, "loss": 0.0682, "step": 32900 }, { "epoch": 4.887865735927521, "grad_norm": 0.5356642007827759, "learning_rate": 5.112134264072479e-06, "loss": 0.0533, "step": 32910 }, { "epoch": 4.889350957968216, "grad_norm": 1.1569596529006958, "learning_rate": 5.1106490420317845e-06, "loss": 0.0737, "step": 32920 }, { "epoch": 4.890836180008911, "grad_norm": 0.5608000755310059, "learning_rate": 5.109163819991089e-06, "loss": 0.0584, "step": 32930 }, { "epoch": 4.892321402049607, "grad_norm": 1.6011561155319214, "learning_rate": 5.107678597950394e-06, "loss": 0.0649, "step": 32940 }, { "epoch": 4.893806624090302, "grad_norm": 1.0872085094451904, "learning_rate": 5.1061933759096995e-06, "loss": 0.0581, "step": 32950 }, { "epoch": 4.895291846130997, "grad_norm": 0.6621091961860657, "learning_rate": 5.104708153869003e-06, "loss": 0.0683, "step": 32960 }, { "epoch": 4.896777068171692, "grad_norm": 0.9341262578964233, "learning_rate": 5.103222931828309e-06, "loss": 0.0765, "step": 32970 }, { "epoch": 4.8982622902123865, "grad_norm": 0.588538408279419, "learning_rate": 5.1017377097876145e-06, "loss": 0.0541, "step": 32980 }, { "epoch": 4.899747512253082, "grad_norm": 0.7938698530197144, "learning_rate": 5.100252487746918e-06, "loss": 0.0553, "step": 32990 }, { "epoch": 4.901232734293777, "grad_norm": 0.8380831480026245, "learning_rate": 5.098767265706223e-06, "loss": 0.0764, "step": 33000 }, { "epoch": 4.902717956334472, "grad_norm": 1.6798778772354126, "learning_rate": 5.097282043665528e-06, "loss": 0.0699, "step": 33010 }, { "epoch": 4.904203178375167, "grad_norm": 0.6386799812316895, "learning_rate": 5.095796821624833e-06, "loss": 0.0549, "step": 33020 }, { "epoch": 4.905688400415862, "grad_norm": 1.2105910778045654, "learning_rate": 5.094311599584138e-06, "loss": 0.0629, "step": 33030 }, { "epoch": 4.907173622456558, "grad_norm": 1.107356309890747, "learning_rate": 5.092826377543443e-06, "loss": 0.0683, "step": 33040 }, { "epoch": 4.9086588444972525, "grad_norm": 0.8204216361045837, "learning_rate": 5.091341155502748e-06, "loss": 0.0598, "step": 33050 }, { "epoch": 4.910144066537947, "grad_norm": 0.5383062958717346, "learning_rate": 5.089855933462053e-06, "loss": 0.0588, "step": 33060 }, { "epoch": 4.911629288578642, "grad_norm": 0.49663034081459045, "learning_rate": 5.088370711421358e-06, "loss": 0.0498, "step": 33070 }, { "epoch": 4.913114510619337, "grad_norm": 1.1822824478149414, "learning_rate": 5.086885489380663e-06, "loss": 0.0645, "step": 33080 }, { "epoch": 4.914599732660033, "grad_norm": 0.9482120275497437, "learning_rate": 5.085400267339968e-06, "loss": 0.0811, "step": 33090 }, { "epoch": 4.916084954700728, "grad_norm": 0.5398374795913696, "learning_rate": 5.083915045299273e-06, "loss": 0.0769, "step": 33100 }, { "epoch": 4.917570176741423, "grad_norm": 0.3150595724582672, "learning_rate": 5.082429823258578e-06, "loss": 0.0511, "step": 33110 }, { "epoch": 4.919055398782118, "grad_norm": 0.9777528047561646, "learning_rate": 5.080944601217882e-06, "loss": 0.0644, "step": 33120 }, { "epoch": 4.920540620822813, "grad_norm": 0.9668843746185303, "learning_rate": 5.079459379177187e-06, "loss": 0.0626, "step": 33130 }, { "epoch": 4.922025842863508, "grad_norm": 1.1156543493270874, "learning_rate": 5.077974157136492e-06, "loss": 0.0419, "step": 33140 }, { "epoch": 4.923511064904203, "grad_norm": 1.1860060691833496, "learning_rate": 5.076488935095797e-06, "loss": 0.0638, "step": 33150 }, { "epoch": 4.924996286944898, "grad_norm": 1.0625947713851929, "learning_rate": 5.075003713055102e-06, "loss": 0.044, "step": 33160 }, { "epoch": 4.926481508985593, "grad_norm": 0.5870559811592102, "learning_rate": 5.073518491014407e-06, "loss": 0.0604, "step": 33170 }, { "epoch": 4.927966731026288, "grad_norm": 0.8210609555244446, "learning_rate": 5.072033268973712e-06, "loss": 0.0859, "step": 33180 }, { "epoch": 4.929451953066984, "grad_norm": 0.7899804711341858, "learning_rate": 5.070548046933017e-06, "loss": 0.0592, "step": 33190 }, { "epoch": 4.930937175107679, "grad_norm": 1.406674861907959, "learning_rate": 5.069062824892321e-06, "loss": 0.0589, "step": 33200 }, { "epoch": 4.932422397148374, "grad_norm": 0.7716672420501709, "learning_rate": 5.067577602851627e-06, "loss": 0.0601, "step": 33210 }, { "epoch": 4.9339076191890685, "grad_norm": 1.136020302772522, "learning_rate": 5.066092380810932e-06, "loss": 0.0507, "step": 33220 }, { "epoch": 4.935392841229763, "grad_norm": 0.45281824469566345, "learning_rate": 5.064607158770236e-06, "loss": 0.0524, "step": 33230 }, { "epoch": 4.936878063270459, "grad_norm": 1.0124505758285522, "learning_rate": 5.063121936729542e-06, "loss": 0.0647, "step": 33240 }, { "epoch": 4.938363285311154, "grad_norm": 0.6182017922401428, "learning_rate": 5.061636714688847e-06, "loss": 0.0627, "step": 33250 }, { "epoch": 4.939848507351849, "grad_norm": 1.3870701789855957, "learning_rate": 5.060151492648151e-06, "loss": 0.0633, "step": 33260 }, { "epoch": 4.941333729392544, "grad_norm": 0.8915348649024963, "learning_rate": 5.058666270607457e-06, "loss": 0.0554, "step": 33270 }, { "epoch": 4.942818951433239, "grad_norm": 1.2844725847244263, "learning_rate": 5.057181048566762e-06, "loss": 0.0586, "step": 33280 }, { "epoch": 4.944304173473935, "grad_norm": 1.5099384784698486, "learning_rate": 5.0556958265260655e-06, "loss": 0.0726, "step": 33290 }, { "epoch": 4.9457893955146295, "grad_norm": 1.3242284059524536, "learning_rate": 5.054210604485371e-06, "loss": 0.0591, "step": 33300 }, { "epoch": 4.947274617555324, "grad_norm": 0.7010951042175293, "learning_rate": 5.052725382444675e-06, "loss": 0.0463, "step": 33310 }, { "epoch": 4.948759839596019, "grad_norm": 0.6471588611602783, "learning_rate": 5.0512401604039805e-06, "loss": 0.0459, "step": 33320 }, { "epoch": 4.950245061636715, "grad_norm": 0.5342668294906616, "learning_rate": 5.049754938363286e-06, "loss": 0.0565, "step": 33330 }, { "epoch": 4.95173028367741, "grad_norm": 1.4089100360870361, "learning_rate": 5.04826971632259e-06, "loss": 0.0671, "step": 33340 }, { "epoch": 4.953215505718105, "grad_norm": 0.394208163022995, "learning_rate": 5.0467844942818955e-06, "loss": 0.0545, "step": 33350 }, { "epoch": 4.9547007277588, "grad_norm": 0.9516841769218445, "learning_rate": 5.045299272241201e-06, "loss": 0.0496, "step": 33360 }, { "epoch": 4.956185949799495, "grad_norm": 0.5728833079338074, "learning_rate": 5.043814050200505e-06, "loss": 0.0779, "step": 33370 }, { "epoch": 4.9576711718401905, "grad_norm": 0.43828102946281433, "learning_rate": 5.0423288281598105e-06, "loss": 0.0485, "step": 33380 }, { "epoch": 4.959156393880885, "grad_norm": 0.5014750957489014, "learning_rate": 5.040843606119116e-06, "loss": 0.0463, "step": 33390 }, { "epoch": 4.96064161592158, "grad_norm": 0.7262021899223328, "learning_rate": 5.03935838407842e-06, "loss": 0.0836, "step": 33400 }, { "epoch": 4.962126837962275, "grad_norm": 1.1639851331710815, "learning_rate": 5.0378731620377255e-06, "loss": 0.0868, "step": 33410 }, { "epoch": 4.96361206000297, "grad_norm": 0.6585859656333923, "learning_rate": 5.036387939997029e-06, "loss": 0.0589, "step": 33420 }, { "epoch": 4.965097282043666, "grad_norm": 0.9515668153762817, "learning_rate": 5.034902717956334e-06, "loss": 0.0399, "step": 33430 }, { "epoch": 4.966582504084361, "grad_norm": 0.8517674803733826, "learning_rate": 5.0334174959156405e-06, "loss": 0.0762, "step": 33440 }, { "epoch": 4.968067726125056, "grad_norm": 0.5472394227981567, "learning_rate": 5.031932273874944e-06, "loss": 0.0492, "step": 33450 }, { "epoch": 4.9695529481657505, "grad_norm": 0.8790189027786255, "learning_rate": 5.030447051834249e-06, "loss": 0.0505, "step": 33460 }, { "epoch": 4.971038170206446, "grad_norm": 0.8520390391349792, "learning_rate": 5.028961829793555e-06, "loss": 0.0493, "step": 33470 }, { "epoch": 4.972523392247141, "grad_norm": 1.544259786605835, "learning_rate": 5.027476607752859e-06, "loss": 0.0642, "step": 33480 }, { "epoch": 4.974008614287836, "grad_norm": 1.0220324993133545, "learning_rate": 5.025991385712164e-06, "loss": 0.0544, "step": 33490 }, { "epoch": 4.975493836328531, "grad_norm": 1.8916149139404297, "learning_rate": 5.02450616367147e-06, "loss": 0.0572, "step": 33500 }, { "epoch": 4.976979058369226, "grad_norm": 0.38026267290115356, "learning_rate": 5.023020941630774e-06, "loss": 0.0508, "step": 33510 }, { "epoch": 4.978464280409922, "grad_norm": 0.7838777899742126, "learning_rate": 5.021535719590079e-06, "loss": 0.0639, "step": 33520 }, { "epoch": 4.979949502450617, "grad_norm": 0.5688890218734741, "learning_rate": 5.020050497549384e-06, "loss": 0.0562, "step": 33530 }, { "epoch": 4.9814347244913115, "grad_norm": 1.2140443325042725, "learning_rate": 5.018565275508689e-06, "loss": 0.0674, "step": 33540 }, { "epoch": 4.982919946532006, "grad_norm": 0.6314716935157776, "learning_rate": 5.017080053467994e-06, "loss": 0.0465, "step": 33550 }, { "epoch": 4.984405168572701, "grad_norm": 0.8482723832130432, "learning_rate": 5.015594831427299e-06, "loss": 0.0597, "step": 33560 }, { "epoch": 4.985890390613397, "grad_norm": 0.5596425533294678, "learning_rate": 5.014109609386604e-06, "loss": 0.089, "step": 33570 }, { "epoch": 4.987375612654092, "grad_norm": 2.0052719116210938, "learning_rate": 5.012624387345909e-06, "loss": 0.0507, "step": 33580 }, { "epoch": 4.988860834694787, "grad_norm": 0.601597249507904, "learning_rate": 5.011139165305213e-06, "loss": 0.0589, "step": 33590 }, { "epoch": 4.990346056735482, "grad_norm": 1.0099836587905884, "learning_rate": 5.009653943264518e-06, "loss": 0.062, "step": 33600 }, { "epoch": 4.991831278776177, "grad_norm": 0.6398152709007263, "learning_rate": 5.008168721223824e-06, "loss": 0.0454, "step": 33610 }, { "epoch": 4.9933165008168725, "grad_norm": 0.9657235145568848, "learning_rate": 5.006683499183128e-06, "loss": 0.0655, "step": 33620 }, { "epoch": 4.994801722857567, "grad_norm": 0.9990649223327637, "learning_rate": 5.005198277142433e-06, "loss": 0.0651, "step": 33630 }, { "epoch": 4.996286944898262, "grad_norm": 1.8582215309143066, "learning_rate": 5.003713055101738e-06, "loss": 0.0835, "step": 33640 }, { "epoch": 4.997772166938957, "grad_norm": 0.8416517972946167, "learning_rate": 5.002227833061043e-06, "loss": 0.0548, "step": 33650 }, { "epoch": 4.999257388979652, "grad_norm": 0.7155054211616516, "learning_rate": 5.000742611020348e-06, "loss": 0.0511, "step": 33660 }, { "epoch": 5.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.05618023872375488, "eval_runtime": 214.6654, "eval_samples_per_second": 177.108, "eval_steps_per_second": 5.539, "step": 33665 }, { "epoch": 5.000742611020348, "grad_norm": 0.4117800295352936, "learning_rate": 4.9992573889796535e-06, "loss": 0.0447, "step": 33670 }, { "epoch": 5.002227833061043, "grad_norm": 0.7881307601928711, "learning_rate": 4.997772166938958e-06, "loss": 0.0575, "step": 33680 }, { "epoch": 5.003713055101738, "grad_norm": 1.1118077039718628, "learning_rate": 4.996286944898262e-06, "loss": 0.0808, "step": 33690 }, { "epoch": 5.005198277142433, "grad_norm": 0.7774804830551147, "learning_rate": 4.994801722857568e-06, "loss": 0.0579, "step": 33700 }, { "epoch": 5.0066834991831275, "grad_norm": 1.4578781127929688, "learning_rate": 4.993316500816873e-06, "loss": 0.0578, "step": 33710 }, { "epoch": 5.008168721223823, "grad_norm": 0.4488222002983093, "learning_rate": 4.991831278776177e-06, "loss": 0.0558, "step": 33720 }, { "epoch": 5.009653943264518, "grad_norm": 1.2088385820388794, "learning_rate": 4.990346056735483e-06, "loss": 0.0727, "step": 33730 }, { "epoch": 5.011139165305213, "grad_norm": 0.7863116264343262, "learning_rate": 4.988860834694788e-06, "loss": 0.0745, "step": 33740 }, { "epoch": 5.012624387345908, "grad_norm": 0.9223276972770691, "learning_rate": 4.987375612654092e-06, "loss": 0.05, "step": 33750 }, { "epoch": 5.014109609386603, "grad_norm": 0.6770899295806885, "learning_rate": 4.985890390613397e-06, "loss": 0.0599, "step": 33760 }, { "epoch": 5.015594831427299, "grad_norm": 1.0792419910430908, "learning_rate": 4.984405168572702e-06, "loss": 0.0634, "step": 33770 }, { "epoch": 5.017080053467994, "grad_norm": 0.860308051109314, "learning_rate": 4.982919946532007e-06, "loss": 0.0586, "step": 33780 }, { "epoch": 5.0185652755086885, "grad_norm": 0.962019145488739, "learning_rate": 4.981434724491312e-06, "loss": 0.0683, "step": 33790 }, { "epoch": 5.020050497549383, "grad_norm": 0.9268213510513306, "learning_rate": 4.979949502450617e-06, "loss": 0.0537, "step": 33800 }, { "epoch": 5.021535719590078, "grad_norm": 0.7560243010520935, "learning_rate": 4.9784642804099215e-06, "loss": 0.048, "step": 33810 }, { "epoch": 5.023020941630774, "grad_norm": 1.0545380115509033, "learning_rate": 4.976979058369227e-06, "loss": 0.0501, "step": 33820 }, { "epoch": 5.024506163671469, "grad_norm": 1.0352262258529663, "learning_rate": 4.975493836328531e-06, "loss": 0.0679, "step": 33830 }, { "epoch": 5.025991385712164, "grad_norm": 0.8052208423614502, "learning_rate": 4.9740086142878365e-06, "loss": 0.0492, "step": 33840 }, { "epoch": 5.027476607752859, "grad_norm": 0.7468528151512146, "learning_rate": 4.972523392247141e-06, "loss": 0.0567, "step": 33850 }, { "epoch": 5.0289618297935545, "grad_norm": 0.5573667287826538, "learning_rate": 4.971038170206446e-06, "loss": 0.0649, "step": 33860 }, { "epoch": 5.0304470518342495, "grad_norm": 0.9307456016540527, "learning_rate": 4.9695529481657515e-06, "loss": 0.0623, "step": 33870 }, { "epoch": 5.031932273874944, "grad_norm": 0.5896973013877869, "learning_rate": 4.968067726125056e-06, "loss": 0.0626, "step": 33880 }, { "epoch": 5.033417495915639, "grad_norm": 0.6693680286407471, "learning_rate": 4.966582504084361e-06, "loss": 0.0719, "step": 33890 }, { "epoch": 5.034902717956334, "grad_norm": 0.6010963320732117, "learning_rate": 4.9650972820436665e-06, "loss": 0.0776, "step": 33900 }, { "epoch": 5.03638793999703, "grad_norm": 0.5392886996269226, "learning_rate": 4.963612060002971e-06, "loss": 0.07, "step": 33910 }, { "epoch": 5.037873162037725, "grad_norm": 0.7472323179244995, "learning_rate": 4.962126837962275e-06, "loss": 0.0626, "step": 33920 }, { "epoch": 5.03935838407842, "grad_norm": 0.7688446640968323, "learning_rate": 4.960641615921581e-06, "loss": 0.0565, "step": 33930 }, { "epoch": 5.040843606119115, "grad_norm": 0.4238692820072174, "learning_rate": 4.959156393880886e-06, "loss": 0.0855, "step": 33940 }, { "epoch": 5.0423288281598095, "grad_norm": 0.6260663270950317, "learning_rate": 4.95767117184019e-06, "loss": 0.0485, "step": 33950 }, { "epoch": 5.043814050200505, "grad_norm": 0.6706405282020569, "learning_rate": 4.956185949799496e-06, "loss": 0.0611, "step": 33960 }, { "epoch": 5.0452992722412, "grad_norm": 0.6401503086090088, "learning_rate": 4.954700727758801e-06, "loss": 0.0511, "step": 33970 }, { "epoch": 5.046784494281895, "grad_norm": 0.6273691058158875, "learning_rate": 4.953215505718105e-06, "loss": 0.0626, "step": 33980 }, { "epoch": 5.04826971632259, "grad_norm": 0.9785528779029846, "learning_rate": 4.95173028367741e-06, "loss": 0.0588, "step": 33990 }, { "epoch": 5.049754938363285, "grad_norm": 0.3964395225048065, "learning_rate": 4.950245061636715e-06, "loss": 0.0576, "step": 34000 }, { "epoch": 5.051240160403981, "grad_norm": 0.4322415292263031, "learning_rate": 4.94875983959602e-06, "loss": 0.0535, "step": 34010 }, { "epoch": 5.052725382444676, "grad_norm": 0.8794994354248047, "learning_rate": 4.947274617555325e-06, "loss": 0.0588, "step": 34020 }, { "epoch": 5.0542106044853705, "grad_norm": 0.6617201566696167, "learning_rate": 4.94578939551463e-06, "loss": 0.0521, "step": 34030 }, { "epoch": 5.055695826526065, "grad_norm": 1.4307634830474854, "learning_rate": 4.944304173473935e-06, "loss": 0.0665, "step": 34040 }, { "epoch": 5.05718104856676, "grad_norm": 0.6375547051429749, "learning_rate": 4.94281895143324e-06, "loss": 0.0585, "step": 34050 }, { "epoch": 5.058666270607456, "grad_norm": 0.8604044914245605, "learning_rate": 4.941333729392544e-06, "loss": 0.0535, "step": 34060 }, { "epoch": 5.060151492648151, "grad_norm": 0.6541344523429871, "learning_rate": 4.9398485073518494e-06, "loss": 0.0677, "step": 34070 }, { "epoch": 5.061636714688846, "grad_norm": 0.762144148349762, "learning_rate": 4.938363285311155e-06, "loss": 0.0715, "step": 34080 }, { "epoch": 5.063121936729541, "grad_norm": 0.5391594767570496, "learning_rate": 4.936878063270459e-06, "loss": 0.0642, "step": 34090 }, { "epoch": 5.064607158770237, "grad_norm": 1.1164581775665283, "learning_rate": 4.9353928412297644e-06, "loss": 0.0419, "step": 34100 }, { "epoch": 5.0660923808109315, "grad_norm": 0.7693032026290894, "learning_rate": 4.933907619189069e-06, "loss": 0.0487, "step": 34110 }, { "epoch": 5.067577602851626, "grad_norm": 0.8046525716781616, "learning_rate": 4.932422397148374e-06, "loss": 0.0592, "step": 34120 }, { "epoch": 5.069062824892321, "grad_norm": 0.6247583627700806, "learning_rate": 4.930937175107679e-06, "loss": 0.0488, "step": 34130 }, { "epoch": 5.070548046933016, "grad_norm": 0.9493995308876038, "learning_rate": 4.929451953066984e-06, "loss": 0.067, "step": 34140 }, { "epoch": 5.072033268973712, "grad_norm": 1.048176884651184, "learning_rate": 4.927966731026289e-06, "loss": 0.0658, "step": 34150 }, { "epoch": 5.073518491014407, "grad_norm": 0.8611918687820435, "learning_rate": 4.926481508985594e-06, "loss": 0.0542, "step": 34160 }, { "epoch": 5.075003713055102, "grad_norm": 1.0431722402572632, "learning_rate": 4.924996286944899e-06, "loss": 0.054, "step": 34170 }, { "epoch": 5.076488935095797, "grad_norm": 1.1769628524780273, "learning_rate": 4.923511064904203e-06, "loss": 0.0505, "step": 34180 }, { "epoch": 5.077974157136492, "grad_norm": 0.6343939900398254, "learning_rate": 4.9220258428635086e-06, "loss": 0.0526, "step": 34190 }, { "epoch": 5.079459379177187, "grad_norm": 0.9443486332893372, "learning_rate": 4.920540620822814e-06, "loss": 0.0564, "step": 34200 }, { "epoch": 5.080944601217882, "grad_norm": 0.21072953939437866, "learning_rate": 4.919055398782118e-06, "loss": 0.0509, "step": 34210 }, { "epoch": 5.082429823258577, "grad_norm": 0.9125813841819763, "learning_rate": 4.917570176741423e-06, "loss": 0.0515, "step": 34220 }, { "epoch": 5.083915045299272, "grad_norm": 0.8106305003166199, "learning_rate": 4.916084954700728e-06, "loss": 0.0618, "step": 34230 }, { "epoch": 5.085400267339967, "grad_norm": 0.6208940744400024, "learning_rate": 4.914599732660033e-06, "loss": 0.059, "step": 34240 }, { "epoch": 5.086885489380663, "grad_norm": 0.6824192404747009, "learning_rate": 4.913114510619338e-06, "loss": 0.0577, "step": 34250 }, { "epoch": 5.088370711421358, "grad_norm": 1.3406444787979126, "learning_rate": 4.911629288578643e-06, "loss": 0.062, "step": 34260 }, { "epoch": 5.089855933462053, "grad_norm": 0.6709977388381958, "learning_rate": 4.910144066537948e-06, "loss": 0.0607, "step": 34270 }, { "epoch": 5.0913411555027475, "grad_norm": 1.1426851749420166, "learning_rate": 4.908658844497253e-06, "loss": 0.0632, "step": 34280 }, { "epoch": 5.092826377543442, "grad_norm": 0.7882207036018372, "learning_rate": 4.907173622456557e-06, "loss": 0.0425, "step": 34290 }, { "epoch": 5.094311599584138, "grad_norm": 0.7153235077857971, "learning_rate": 4.905688400415862e-06, "loss": 0.0523, "step": 34300 }, { "epoch": 5.095796821624833, "grad_norm": 0.745721161365509, "learning_rate": 4.904203178375168e-06, "loss": 0.0705, "step": 34310 }, { "epoch": 5.097282043665528, "grad_norm": 0.5836763978004456, "learning_rate": 4.902717956334472e-06, "loss": 0.0697, "step": 34320 }, { "epoch": 5.098767265706223, "grad_norm": 0.8141874074935913, "learning_rate": 4.901232734293777e-06, "loss": 0.0689, "step": 34330 }, { "epoch": 5.100252487746918, "grad_norm": 0.5741795897483826, "learning_rate": 4.899747512253083e-06, "loss": 0.0559, "step": 34340 }, { "epoch": 5.1017377097876135, "grad_norm": 1.1511962413787842, "learning_rate": 4.898262290212387e-06, "loss": 0.0805, "step": 34350 }, { "epoch": 5.103222931828308, "grad_norm": 0.4399915933609009, "learning_rate": 4.8967770681716916e-06, "loss": 0.0588, "step": 34360 }, { "epoch": 5.104708153869003, "grad_norm": 1.116751790046692, "learning_rate": 4.895291846130997e-06, "loss": 0.0603, "step": 34370 }, { "epoch": 5.106193375909698, "grad_norm": 1.0357329845428467, "learning_rate": 4.893806624090302e-06, "loss": 0.0722, "step": 34380 }, { "epoch": 5.107678597950393, "grad_norm": 0.8936920166015625, "learning_rate": 4.8923214020496066e-06, "loss": 0.0594, "step": 34390 }, { "epoch": 5.109163819991089, "grad_norm": 0.6210498213768005, "learning_rate": 4.890836180008912e-06, "loss": 0.0659, "step": 34400 }, { "epoch": 5.110649042031784, "grad_norm": 0.8246908187866211, "learning_rate": 4.889350957968217e-06, "loss": 0.0665, "step": 34410 }, { "epoch": 5.112134264072479, "grad_norm": 0.7300816178321838, "learning_rate": 4.8878657359275216e-06, "loss": 0.0747, "step": 34420 }, { "epoch": 5.113619486113174, "grad_norm": 1.2019017934799194, "learning_rate": 4.886380513886827e-06, "loss": 0.0448, "step": 34430 }, { "epoch": 5.115104708153869, "grad_norm": 0.4036964774131775, "learning_rate": 4.884895291846131e-06, "loss": 0.0507, "step": 34440 }, { "epoch": 5.116589930194564, "grad_norm": 1.3310085535049438, "learning_rate": 4.8834100698054365e-06, "loss": 0.0529, "step": 34450 }, { "epoch": 5.118075152235259, "grad_norm": 1.1880275011062622, "learning_rate": 4.881924847764741e-06, "loss": 0.0679, "step": 34460 }, { "epoch": 5.119560374275954, "grad_norm": 1.0951112508773804, "learning_rate": 4.880439625724046e-06, "loss": 0.0645, "step": 34470 }, { "epoch": 5.121045596316649, "grad_norm": 0.6842232346534729, "learning_rate": 4.878954403683351e-06, "loss": 0.0741, "step": 34480 }, { "epoch": 5.122530818357345, "grad_norm": 0.63397216796875, "learning_rate": 4.877469181642656e-06, "loss": 0.0609, "step": 34490 }, { "epoch": 5.12401604039804, "grad_norm": 1.2980011701583862, "learning_rate": 4.875983959601961e-06, "loss": 0.0582, "step": 34500 }, { "epoch": 5.125501262438735, "grad_norm": 0.2617751955986023, "learning_rate": 4.874498737561266e-06, "loss": 0.0736, "step": 34510 }, { "epoch": 5.1269864844794295, "grad_norm": 0.5681473016738892, "learning_rate": 4.87301351552057e-06, "loss": 0.0579, "step": 34520 }, { "epoch": 5.128471706520124, "grad_norm": 1.5799667835235596, "learning_rate": 4.871528293479875e-06, "loss": 0.066, "step": 34530 }, { "epoch": 5.12995692856082, "grad_norm": 0.9241040349006653, "learning_rate": 4.870043071439181e-06, "loss": 0.0729, "step": 34540 }, { "epoch": 5.131442150601515, "grad_norm": 0.26680564880371094, "learning_rate": 4.868557849398485e-06, "loss": 0.047, "step": 34550 }, { "epoch": 5.13292737264221, "grad_norm": 1.0289782285690308, "learning_rate": 4.86707262735779e-06, "loss": 0.048, "step": 34560 }, { "epoch": 5.134412594682905, "grad_norm": 1.0188406705856323, "learning_rate": 4.865587405317096e-06, "loss": 0.0545, "step": 34570 }, { "epoch": 5.1358978167236, "grad_norm": 0.6055546402931213, "learning_rate": 4.8641021832764e-06, "loss": 0.0789, "step": 34580 }, { "epoch": 5.137383038764296, "grad_norm": 0.93790203332901, "learning_rate": 4.8626169612357045e-06, "loss": 0.0591, "step": 34590 }, { "epoch": 5.1388682608049905, "grad_norm": 0.7455692291259766, "learning_rate": 4.86113173919501e-06, "loss": 0.059, "step": 34600 }, { "epoch": 5.140353482845685, "grad_norm": 0.9879961013793945, "learning_rate": 4.859646517154315e-06, "loss": 0.0687, "step": 34610 }, { "epoch": 5.14183870488638, "grad_norm": 0.674579381942749, "learning_rate": 4.8581612951136195e-06, "loss": 0.0573, "step": 34620 }, { "epoch": 5.143323926927075, "grad_norm": 0.9698799848556519, "learning_rate": 4.856676073072925e-06, "loss": 0.0607, "step": 34630 }, { "epoch": 5.144809148967771, "grad_norm": 0.29647594690322876, "learning_rate": 4.85519085103223e-06, "loss": 0.0559, "step": 34640 }, { "epoch": 5.146294371008466, "grad_norm": 1.5583728551864624, "learning_rate": 4.8537056289915345e-06, "loss": 0.0736, "step": 34650 }, { "epoch": 5.147779593049161, "grad_norm": 0.7042451500892639, "learning_rate": 4.85222040695084e-06, "loss": 0.0563, "step": 34660 }, { "epoch": 5.149264815089856, "grad_norm": 0.41057154536247253, "learning_rate": 4.850735184910145e-06, "loss": 0.0389, "step": 34670 }, { "epoch": 5.1507500371305515, "grad_norm": 1.1116803884506226, "learning_rate": 4.8492499628694495e-06, "loss": 0.0567, "step": 34680 }, { "epoch": 5.152235259171246, "grad_norm": 0.4490911364555359, "learning_rate": 4.847764740828754e-06, "loss": 0.0732, "step": 34690 }, { "epoch": 5.153720481211941, "grad_norm": 0.765703558921814, "learning_rate": 4.846279518788059e-06, "loss": 0.0677, "step": 34700 }, { "epoch": 5.155205703252636, "grad_norm": 0.9491612315177917, "learning_rate": 4.8447942967473645e-06, "loss": 0.0615, "step": 34710 }, { "epoch": 5.156690925293331, "grad_norm": 0.7063356041908264, "learning_rate": 4.843309074706669e-06, "loss": 0.0549, "step": 34720 }, { "epoch": 5.158176147334027, "grad_norm": 1.0340287685394287, "learning_rate": 4.841823852665974e-06, "loss": 0.0737, "step": 34730 }, { "epoch": 5.159661369374722, "grad_norm": 0.32890424132347107, "learning_rate": 4.840338630625279e-06, "loss": 0.0611, "step": 34740 }, { "epoch": 5.161146591415417, "grad_norm": 0.7864364981651306, "learning_rate": 4.838853408584584e-06, "loss": 0.0666, "step": 34750 }, { "epoch": 5.1626318134561116, "grad_norm": 1.1317648887634277, "learning_rate": 4.837368186543888e-06, "loss": 0.0566, "step": 34760 }, { "epoch": 5.1641170354968065, "grad_norm": 0.5009697079658508, "learning_rate": 4.835882964503194e-06, "loss": 0.0515, "step": 34770 }, { "epoch": 5.165602257537502, "grad_norm": 0.48454317450523376, "learning_rate": 4.834397742462498e-06, "loss": 0.0527, "step": 34780 }, { "epoch": 5.167087479578197, "grad_norm": 1.3548284769058228, "learning_rate": 4.832912520421803e-06, "loss": 0.0682, "step": 34790 }, { "epoch": 5.168572701618892, "grad_norm": 0.651645839214325, "learning_rate": 4.831427298381109e-06, "loss": 0.0599, "step": 34800 }, { "epoch": 5.170057923659587, "grad_norm": 1.2181872129440308, "learning_rate": 4.829942076340413e-06, "loss": 0.0586, "step": 34810 }, { "epoch": 5.171543145700282, "grad_norm": 1.0582828521728516, "learning_rate": 4.828456854299718e-06, "loss": 0.0513, "step": 34820 }, { "epoch": 5.173028367740978, "grad_norm": 0.7807072997093201, "learning_rate": 4.826971632259023e-06, "loss": 0.0573, "step": 34830 }, { "epoch": 5.1745135897816725, "grad_norm": 0.5891880989074707, "learning_rate": 4.825486410218328e-06, "loss": 0.0467, "step": 34840 }, { "epoch": 5.175998811822367, "grad_norm": 0.9987158179283142, "learning_rate": 4.8240011881776325e-06, "loss": 0.0489, "step": 34850 }, { "epoch": 5.177484033863062, "grad_norm": 0.3595871925354004, "learning_rate": 4.822515966136938e-06, "loss": 0.054, "step": 34860 }, { "epoch": 5.178969255903757, "grad_norm": 0.3779183626174927, "learning_rate": 4.821030744096243e-06, "loss": 0.0775, "step": 34870 }, { "epoch": 5.180454477944453, "grad_norm": 1.291839838027954, "learning_rate": 4.8195455220555475e-06, "loss": 0.0671, "step": 34880 }, { "epoch": 5.181939699985148, "grad_norm": 0.7555186152458191, "learning_rate": 4.818060300014852e-06, "loss": 0.064, "step": 34890 }, { "epoch": 5.183424922025843, "grad_norm": 0.6942147612571716, "learning_rate": 4.816575077974158e-06, "loss": 0.072, "step": 34900 }, { "epoch": 5.184910144066538, "grad_norm": 0.8907058238983154, "learning_rate": 4.8150898559334625e-06, "loss": 0.0692, "step": 34910 }, { "epoch": 5.186395366107233, "grad_norm": 0.659183144569397, "learning_rate": 4.813604633892767e-06, "loss": 0.0538, "step": 34920 }, { "epoch": 5.187880588147928, "grad_norm": 0.8931286931037903, "learning_rate": 4.812119411852072e-06, "loss": 0.079, "step": 34930 }, { "epoch": 5.189365810188623, "grad_norm": 0.6598271131515503, "learning_rate": 4.8106341898113775e-06, "loss": 0.0729, "step": 34940 }, { "epoch": 5.190851032229318, "grad_norm": 0.7790974378585815, "learning_rate": 4.809148967770682e-06, "loss": 0.0578, "step": 34950 }, { "epoch": 5.192336254270013, "grad_norm": 0.3787277936935425, "learning_rate": 4.807663745729987e-06, "loss": 0.0529, "step": 34960 }, { "epoch": 5.193821476310708, "grad_norm": 0.6995694041252136, "learning_rate": 4.8061785236892925e-06, "loss": 0.0679, "step": 34970 }, { "epoch": 5.195306698351404, "grad_norm": 0.7316018342971802, "learning_rate": 4.804693301648597e-06, "loss": 0.0663, "step": 34980 }, { "epoch": 5.196791920392099, "grad_norm": 1.4191354513168335, "learning_rate": 4.803208079607901e-06, "loss": 0.0587, "step": 34990 }, { "epoch": 5.198277142432794, "grad_norm": 1.3133004903793335, "learning_rate": 4.801722857567207e-06, "loss": 0.0755, "step": 35000 }, { "epoch": 5.1997623644734885, "grad_norm": 0.6993300914764404, "learning_rate": 4.800237635526512e-06, "loss": 0.0602, "step": 35010 }, { "epoch": 5.201247586514184, "grad_norm": 0.69782555103302, "learning_rate": 4.798752413485816e-06, "loss": 0.0606, "step": 35020 }, { "epoch": 5.202732808554879, "grad_norm": 0.7031997442245483, "learning_rate": 4.797267191445122e-06, "loss": 0.0618, "step": 35030 }, { "epoch": 5.204218030595574, "grad_norm": 0.747891366481781, "learning_rate": 4.795781969404426e-06, "loss": 0.0742, "step": 35040 }, { "epoch": 5.205703252636269, "grad_norm": 0.6401386260986328, "learning_rate": 4.794296747363731e-06, "loss": 0.0798, "step": 35050 }, { "epoch": 5.207188474676964, "grad_norm": 0.8471872210502625, "learning_rate": 4.792811525323036e-06, "loss": 0.0501, "step": 35060 }, { "epoch": 5.20867369671766, "grad_norm": 1.545289397239685, "learning_rate": 4.791326303282341e-06, "loss": 0.0782, "step": 35070 }, { "epoch": 5.210158918758355, "grad_norm": 0.8901092410087585, "learning_rate": 4.789841081241646e-06, "loss": 0.0615, "step": 35080 }, { "epoch": 5.2116441407990495, "grad_norm": 1.0877093076705933, "learning_rate": 4.788355859200951e-06, "loss": 0.0654, "step": 35090 }, { "epoch": 5.213129362839744, "grad_norm": 0.7853423357009888, "learning_rate": 4.786870637160256e-06, "loss": 0.0552, "step": 35100 }, { "epoch": 5.214614584880439, "grad_norm": 0.6040746569633484, "learning_rate": 4.7853854151195605e-06, "loss": 0.0516, "step": 35110 }, { "epoch": 5.216099806921135, "grad_norm": 0.8198233246803284, "learning_rate": 4.783900193078866e-06, "loss": 0.0672, "step": 35120 }, { "epoch": 5.21758502896183, "grad_norm": 1.4005907773971558, "learning_rate": 4.782414971038171e-06, "loss": 0.0624, "step": 35130 }, { "epoch": 5.219070251002525, "grad_norm": 1.1273680925369263, "learning_rate": 4.7809297489974755e-06, "loss": 0.0539, "step": 35140 }, { "epoch": 5.22055547304322, "grad_norm": 0.905612051486969, "learning_rate": 4.77944452695678e-06, "loss": 0.0539, "step": 35150 }, { "epoch": 5.222040695083915, "grad_norm": 0.6797011494636536, "learning_rate": 4.777959304916085e-06, "loss": 0.0768, "step": 35160 }, { "epoch": 5.2235259171246105, "grad_norm": 0.8509185314178467, "learning_rate": 4.7764740828753905e-06, "loss": 0.0522, "step": 35170 }, { "epoch": 5.225011139165305, "grad_norm": 1.077875018119812, "learning_rate": 4.774988860834695e-06, "loss": 0.0516, "step": 35180 }, { "epoch": 5.226496361206, "grad_norm": 1.1827176809310913, "learning_rate": 4.773503638794e-06, "loss": 0.0462, "step": 35190 }, { "epoch": 5.227981583246695, "grad_norm": 0.8252261281013489, "learning_rate": 4.7720184167533055e-06, "loss": 0.0675, "step": 35200 }, { "epoch": 5.22946680528739, "grad_norm": 0.774476945400238, "learning_rate": 4.77053319471261e-06, "loss": 0.0569, "step": 35210 }, { "epoch": 5.230952027328086, "grad_norm": 0.9155932068824768, "learning_rate": 4.769047972671914e-06, "loss": 0.0558, "step": 35220 }, { "epoch": 5.232437249368781, "grad_norm": 0.3924713730812073, "learning_rate": 4.76756275063122e-06, "loss": 0.0638, "step": 35230 }, { "epoch": 5.233922471409476, "grad_norm": 0.5012497305870056, "learning_rate": 4.766077528590525e-06, "loss": 0.04, "step": 35240 }, { "epoch": 5.2354076934501705, "grad_norm": 0.6418888568878174, "learning_rate": 4.764592306549829e-06, "loss": 0.0638, "step": 35250 }, { "epoch": 5.236892915490866, "grad_norm": 0.4431953430175781, "learning_rate": 4.763107084509135e-06, "loss": 0.0554, "step": 35260 }, { "epoch": 5.238378137531561, "grad_norm": 0.49925869703292847, "learning_rate": 4.76162186246844e-06, "loss": 0.0614, "step": 35270 }, { "epoch": 5.239863359572256, "grad_norm": 0.9694715738296509, "learning_rate": 4.760136640427744e-06, "loss": 0.0441, "step": 35280 }, { "epoch": 5.241348581612951, "grad_norm": 0.6737119555473328, "learning_rate": 4.758651418387049e-06, "loss": 0.0461, "step": 35290 }, { "epoch": 5.242833803653646, "grad_norm": 0.8961270451545715, "learning_rate": 4.757166196346354e-06, "loss": 0.0561, "step": 35300 }, { "epoch": 5.244319025694342, "grad_norm": 0.651147186756134, "learning_rate": 4.755680974305659e-06, "loss": 0.0434, "step": 35310 }, { "epoch": 5.245804247735037, "grad_norm": 0.953199028968811, "learning_rate": 4.754195752264964e-06, "loss": 0.0578, "step": 35320 }, { "epoch": 5.2472894697757315, "grad_norm": 1.1796019077301025, "learning_rate": 4.752710530224269e-06, "loss": 0.0701, "step": 35330 }, { "epoch": 5.248774691816426, "grad_norm": 0.7786931991577148, "learning_rate": 4.751225308183574e-06, "loss": 0.0571, "step": 35340 }, { "epoch": 5.250259913857121, "grad_norm": 1.258894443511963, "learning_rate": 4.749740086142879e-06, "loss": 0.0786, "step": 35350 }, { "epoch": 5.251745135897817, "grad_norm": 1.521615743637085, "learning_rate": 4.748254864102184e-06, "loss": 0.0561, "step": 35360 }, { "epoch": 5.253230357938512, "grad_norm": 0.750752329826355, "learning_rate": 4.7467696420614885e-06, "loss": 0.053, "step": 35370 }, { "epoch": 5.254715579979207, "grad_norm": 0.878555953502655, "learning_rate": 4.745284420020794e-06, "loss": 0.0724, "step": 35380 }, { "epoch": 5.256200802019902, "grad_norm": 1.4904576539993286, "learning_rate": 4.743799197980098e-06, "loss": 0.0623, "step": 35390 }, { "epoch": 5.257686024060597, "grad_norm": 0.8942855596542358, "learning_rate": 4.7423139759394035e-06, "loss": 0.0502, "step": 35400 }, { "epoch": 5.2591712461012925, "grad_norm": 0.8960702419281006, "learning_rate": 4.740828753898708e-06, "loss": 0.0553, "step": 35410 }, { "epoch": 5.260656468141987, "grad_norm": 0.7556365728378296, "learning_rate": 4.739343531858013e-06, "loss": 0.0752, "step": 35420 }, { "epoch": 5.262141690182682, "grad_norm": 0.9979767799377441, "learning_rate": 4.7378583098173185e-06, "loss": 0.0508, "step": 35430 }, { "epoch": 5.263626912223377, "grad_norm": 1.3748942613601685, "learning_rate": 4.736373087776623e-06, "loss": 0.057, "step": 35440 }, { "epoch": 5.265112134264072, "grad_norm": 0.3763701617717743, "learning_rate": 4.734887865735928e-06, "loss": 0.0622, "step": 35450 }, { "epoch": 5.266597356304768, "grad_norm": 0.26023438572883606, "learning_rate": 4.733402643695233e-06, "loss": 0.0478, "step": 35460 }, { "epoch": 5.268082578345463, "grad_norm": 0.8818972706794739, "learning_rate": 4.731917421654538e-06, "loss": 0.0469, "step": 35470 }, { "epoch": 5.269567800386158, "grad_norm": 0.8154204487800598, "learning_rate": 4.730432199613842e-06, "loss": 0.0468, "step": 35480 }, { "epoch": 5.271053022426853, "grad_norm": 0.6178163886070251, "learning_rate": 4.728946977573148e-06, "loss": 0.0577, "step": 35490 }, { "epoch": 5.2725382444675475, "grad_norm": 0.4771282374858856, "learning_rate": 4.727461755532453e-06, "loss": 0.0541, "step": 35500 }, { "epoch": 5.274023466508243, "grad_norm": 0.9545867443084717, "learning_rate": 4.725976533491757e-06, "loss": 0.0833, "step": 35510 }, { "epoch": 5.275508688548938, "grad_norm": 0.8452116847038269, "learning_rate": 4.724491311451062e-06, "loss": 0.061, "step": 35520 }, { "epoch": 5.276993910589633, "grad_norm": 0.7856245040893555, "learning_rate": 4.723006089410367e-06, "loss": 0.0682, "step": 35530 }, { "epoch": 5.278479132630328, "grad_norm": 1.091416358947754, "learning_rate": 4.721520867369672e-06, "loss": 0.0579, "step": 35540 }, { "epoch": 5.279964354671023, "grad_norm": 0.6916674971580505, "learning_rate": 4.720035645328977e-06, "loss": 0.0567, "step": 35550 }, { "epoch": 5.281449576711719, "grad_norm": 0.31788578629493713, "learning_rate": 4.718550423288282e-06, "loss": 0.053, "step": 35560 }, { "epoch": 5.282934798752414, "grad_norm": 0.7865982055664062, "learning_rate": 4.717065201247587e-06, "loss": 0.0658, "step": 35570 }, { "epoch": 5.2844200207931085, "grad_norm": 1.0544800758361816, "learning_rate": 4.715579979206892e-06, "loss": 0.0736, "step": 35580 }, { "epoch": 5.285905242833803, "grad_norm": 1.0144555568695068, "learning_rate": 4.714094757166196e-06, "loss": 0.0639, "step": 35590 }, { "epoch": 5.287390464874499, "grad_norm": 0.7130341529846191, "learning_rate": 4.712609535125502e-06, "loss": 0.0569, "step": 35600 }, { "epoch": 5.288875686915194, "grad_norm": 0.952660083770752, "learning_rate": 4.711124313084807e-06, "loss": 0.0642, "step": 35610 }, { "epoch": 5.290360908955889, "grad_norm": 0.8427135348320007, "learning_rate": 4.709639091044111e-06, "loss": 0.0543, "step": 35620 }, { "epoch": 5.291846130996584, "grad_norm": 0.6305361390113831, "learning_rate": 4.7081538690034164e-06, "loss": 0.0621, "step": 35630 }, { "epoch": 5.293331353037279, "grad_norm": 1.340817928314209, "learning_rate": 4.706668646962722e-06, "loss": 0.0593, "step": 35640 }, { "epoch": 5.2948165750779745, "grad_norm": 0.468432754278183, "learning_rate": 4.705183424922026e-06, "loss": 0.0495, "step": 35650 }, { "epoch": 5.296301797118669, "grad_norm": 0.8716316819190979, "learning_rate": 4.7036982028813314e-06, "loss": 0.0673, "step": 35660 }, { "epoch": 5.297787019159364, "grad_norm": 0.9076347351074219, "learning_rate": 4.702212980840636e-06, "loss": 0.0635, "step": 35670 }, { "epoch": 5.299272241200059, "grad_norm": 0.8860725164413452, "learning_rate": 4.700727758799941e-06, "loss": 0.0555, "step": 35680 }, { "epoch": 5.300757463240754, "grad_norm": 1.1657594442367554, "learning_rate": 4.699242536759246e-06, "loss": 0.0633, "step": 35690 }, { "epoch": 5.30224268528145, "grad_norm": 0.8625801205635071, "learning_rate": 4.697757314718551e-06, "loss": 0.0536, "step": 35700 }, { "epoch": 5.303727907322145, "grad_norm": 0.6598368883132935, "learning_rate": 4.696272092677856e-06, "loss": 0.0477, "step": 35710 }, { "epoch": 5.30521312936284, "grad_norm": 0.6790146827697754, "learning_rate": 4.6947868706371606e-06, "loss": 0.0618, "step": 35720 }, { "epoch": 5.306698351403535, "grad_norm": 1.139267921447754, "learning_rate": 4.693301648596466e-06, "loss": 0.0502, "step": 35730 }, { "epoch": 5.3081835734442295, "grad_norm": 0.5348055362701416, "learning_rate": 4.69181642655577e-06, "loss": 0.0546, "step": 35740 }, { "epoch": 5.309668795484925, "grad_norm": 0.9072430729866028, "learning_rate": 4.6903312045150756e-06, "loss": 0.0733, "step": 35750 }, { "epoch": 5.31115401752562, "grad_norm": 0.2694229483604431, "learning_rate": 4.68884598247438e-06, "loss": 0.0471, "step": 35760 }, { "epoch": 5.312639239566315, "grad_norm": 0.8150109648704529, "learning_rate": 4.687360760433685e-06, "loss": 0.0565, "step": 35770 }, { "epoch": 5.31412446160701, "grad_norm": 0.405373752117157, "learning_rate": 4.68587553839299e-06, "loss": 0.0637, "step": 35780 }, { "epoch": 5.315609683647705, "grad_norm": 1.0095162391662598, "learning_rate": 4.684390316352295e-06, "loss": 0.054, "step": 35790 }, { "epoch": 5.317094905688401, "grad_norm": 0.8304887413978577, "learning_rate": 4.6829050943116e-06, "loss": 0.0648, "step": 35800 }, { "epoch": 5.318580127729096, "grad_norm": 0.7152771353721619, "learning_rate": 4.681419872270905e-06, "loss": 0.0592, "step": 35810 }, { "epoch": 5.3200653497697905, "grad_norm": 0.6170614957809448, "learning_rate": 4.679934650230209e-06, "loss": 0.0611, "step": 35820 }, { "epoch": 5.321550571810485, "grad_norm": 0.7695423364639282, "learning_rate": 4.678449428189515e-06, "loss": 0.0759, "step": 35830 }, { "epoch": 5.323035793851181, "grad_norm": 0.4829961061477661, "learning_rate": 4.67696420614882e-06, "loss": 0.0591, "step": 35840 }, { "epoch": 5.324521015891876, "grad_norm": 0.9738242030143738, "learning_rate": 4.675478984108124e-06, "loss": 0.0698, "step": 35850 }, { "epoch": 5.326006237932571, "grad_norm": 0.6264966726303101, "learning_rate": 4.673993762067429e-06, "loss": 0.0598, "step": 35860 }, { "epoch": 5.327491459973266, "grad_norm": 1.0743324756622314, "learning_rate": 4.672508540026735e-06, "loss": 0.068, "step": 35870 }, { "epoch": 5.328976682013961, "grad_norm": 0.896522581577301, "learning_rate": 4.671023317986039e-06, "loss": 0.0622, "step": 35880 }, { "epoch": 5.330461904054657, "grad_norm": 0.8270133137702942, "learning_rate": 4.669538095945344e-06, "loss": 0.0661, "step": 35890 }, { "epoch": 5.3319471260953515, "grad_norm": 1.0910258293151855, "learning_rate": 4.66805287390465e-06, "loss": 0.0696, "step": 35900 }, { "epoch": 5.333432348136046, "grad_norm": 1.075138807296753, "learning_rate": 4.666567651863954e-06, "loss": 0.065, "step": 35910 }, { "epoch": 5.334917570176741, "grad_norm": 0.6105763912200928, "learning_rate": 4.6650824298232586e-06, "loss": 0.0448, "step": 35920 }, { "epoch": 5.336402792217436, "grad_norm": 0.7854593396186829, "learning_rate": 4.663597207782564e-06, "loss": 0.0475, "step": 35930 }, { "epoch": 5.337888014258132, "grad_norm": 0.24348467588424683, "learning_rate": 4.662111985741869e-06, "loss": 0.0578, "step": 35940 }, { "epoch": 5.339373236298827, "grad_norm": 0.2796478867530823, "learning_rate": 4.6606267637011736e-06, "loss": 0.0547, "step": 35950 }, { "epoch": 5.340858458339522, "grad_norm": 0.6760820150375366, "learning_rate": 4.659141541660479e-06, "loss": 0.0567, "step": 35960 }, { "epoch": 5.342343680380217, "grad_norm": 0.9601134657859802, "learning_rate": 4.657656319619783e-06, "loss": 0.0468, "step": 35970 }, { "epoch": 5.343828902420912, "grad_norm": 1.3426578044891357, "learning_rate": 4.6561710975790885e-06, "loss": 0.0719, "step": 35980 }, { "epoch": 5.345314124461607, "grad_norm": 1.429280400276184, "learning_rate": 4.654685875538393e-06, "loss": 0.0747, "step": 35990 }, { "epoch": 5.346799346502302, "grad_norm": 0.47933143377304077, "learning_rate": 4.653200653497698e-06, "loss": 0.0514, "step": 36000 }, { "epoch": 5.348284568542997, "grad_norm": 0.8363801836967468, "learning_rate": 4.6517154314570035e-06, "loss": 0.0608, "step": 36010 }, { "epoch": 5.349769790583692, "grad_norm": 1.23948073387146, "learning_rate": 4.650230209416308e-06, "loss": 0.062, "step": 36020 }, { "epoch": 5.351255012624387, "grad_norm": 0.8158882260322571, "learning_rate": 4.648744987375613e-06, "loss": 0.0518, "step": 36030 }, { "epoch": 5.352740234665083, "grad_norm": 1.212444543838501, "learning_rate": 4.647259765334918e-06, "loss": 0.0672, "step": 36040 }, { "epoch": 5.354225456705778, "grad_norm": 0.9097556471824646, "learning_rate": 4.645774543294223e-06, "loss": 0.0586, "step": 36050 }, { "epoch": 5.3557106787464726, "grad_norm": 0.4835580587387085, "learning_rate": 4.644289321253528e-06, "loss": 0.04, "step": 36060 }, { "epoch": 5.3571959007871675, "grad_norm": 1.1032158136367798, "learning_rate": 4.642804099212833e-06, "loss": 0.0684, "step": 36070 }, { "epoch": 5.358681122827862, "grad_norm": 0.4809795320034027, "learning_rate": 4.641318877172137e-06, "loss": 0.0545, "step": 36080 }, { "epoch": 5.360166344868558, "grad_norm": 1.5494866371154785, "learning_rate": 4.639833655131442e-06, "loss": 0.0663, "step": 36090 }, { "epoch": 5.361651566909253, "grad_norm": 1.4795058965682983, "learning_rate": 4.638348433090748e-06, "loss": 0.0548, "step": 36100 }, { "epoch": 5.363136788949948, "grad_norm": 0.5212380886077881, "learning_rate": 4.636863211050052e-06, "loss": 0.0475, "step": 36110 }, { "epoch": 5.364622010990643, "grad_norm": 0.1950119435787201, "learning_rate": 4.635377989009357e-06, "loss": 0.0487, "step": 36120 }, { "epoch": 5.366107233031338, "grad_norm": 0.8038704991340637, "learning_rate": 4.633892766968663e-06, "loss": 0.0734, "step": 36130 }, { "epoch": 5.3675924550720335, "grad_norm": 0.8529590368270874, "learning_rate": 4.632407544927967e-06, "loss": 0.0549, "step": 36140 }, { "epoch": 5.369077677112728, "grad_norm": 0.5382302403450012, "learning_rate": 4.6309223228872715e-06, "loss": 0.0477, "step": 36150 }, { "epoch": 5.370562899153423, "grad_norm": 0.9232028126716614, "learning_rate": 4.629437100846577e-06, "loss": 0.0633, "step": 36160 }, { "epoch": 5.372048121194118, "grad_norm": 1.1026906967163086, "learning_rate": 4.627951878805882e-06, "loss": 0.0441, "step": 36170 }, { "epoch": 5.373533343234814, "grad_norm": 1.0024158954620361, "learning_rate": 4.6264666567651865e-06, "loss": 0.063, "step": 36180 }, { "epoch": 5.375018565275509, "grad_norm": 0.37517932057380676, "learning_rate": 4.624981434724492e-06, "loss": 0.0393, "step": 36190 }, { "epoch": 5.376503787316204, "grad_norm": 0.7823379635810852, "learning_rate": 4.623496212683797e-06, "loss": 0.0707, "step": 36200 }, { "epoch": 5.377989009356899, "grad_norm": 0.8410983085632324, "learning_rate": 4.6220109906431015e-06, "loss": 0.0684, "step": 36210 }, { "epoch": 5.379474231397594, "grad_norm": 1.6391550302505493, "learning_rate": 4.620525768602406e-06, "loss": 0.0602, "step": 36220 }, { "epoch": 5.380959453438289, "grad_norm": 0.5223180055618286, "learning_rate": 4.619040546561711e-06, "loss": 0.0639, "step": 36230 }, { "epoch": 5.382444675478984, "grad_norm": 0.6970897912979126, "learning_rate": 4.6175553245210165e-06, "loss": 0.0557, "step": 36240 }, { "epoch": 5.383929897519679, "grad_norm": 0.8759315013885498, "learning_rate": 4.616070102480321e-06, "loss": 0.0658, "step": 36250 }, { "epoch": 5.385415119560374, "grad_norm": 1.5353213548660278, "learning_rate": 4.614584880439626e-06, "loss": 0.0635, "step": 36260 }, { "epoch": 5.386900341601069, "grad_norm": 1.4542258977890015, "learning_rate": 4.6130996583989315e-06, "loss": 0.0652, "step": 36270 }, { "epoch": 5.388385563641765, "grad_norm": 1.3767975568771362, "learning_rate": 4.611614436358236e-06, "loss": 0.0725, "step": 36280 }, { "epoch": 5.38987078568246, "grad_norm": 0.8995253443717957, "learning_rate": 4.61012921431754e-06, "loss": 0.0785, "step": 36290 }, { "epoch": 5.391356007723155, "grad_norm": 1.5716017484664917, "learning_rate": 4.608643992276846e-06, "loss": 0.0588, "step": 36300 }, { "epoch": 5.3928412297638495, "grad_norm": 1.171876072883606, "learning_rate": 4.607158770236151e-06, "loss": 0.0603, "step": 36310 }, { "epoch": 5.394326451804544, "grad_norm": 1.5212937593460083, "learning_rate": 4.605673548195455e-06, "loss": 0.0568, "step": 36320 }, { "epoch": 5.39581167384524, "grad_norm": 0.8403087854385376, "learning_rate": 4.604188326154761e-06, "loss": 0.0756, "step": 36330 }, { "epoch": 5.397296895885935, "grad_norm": 1.2481799125671387, "learning_rate": 4.602703104114065e-06, "loss": 0.0688, "step": 36340 }, { "epoch": 5.39878211792663, "grad_norm": 1.127144455909729, "learning_rate": 4.60121788207337e-06, "loss": 0.0506, "step": 36350 }, { "epoch": 5.400267339967325, "grad_norm": 0.590782880783081, "learning_rate": 4.599732660032676e-06, "loss": 0.0678, "step": 36360 }, { "epoch": 5.40175256200802, "grad_norm": 0.8410061597824097, "learning_rate": 4.59824743799198e-06, "loss": 0.0753, "step": 36370 }, { "epoch": 5.403237784048716, "grad_norm": 1.1885724067687988, "learning_rate": 4.596762215951285e-06, "loss": 0.0668, "step": 36380 }, { "epoch": 5.4047230060894105, "grad_norm": 0.5760040879249573, "learning_rate": 4.59527699391059e-06, "loss": 0.0651, "step": 36390 }, { "epoch": 5.406208228130105, "grad_norm": 0.9846057891845703, "learning_rate": 4.593791771869895e-06, "loss": 0.0591, "step": 36400 }, { "epoch": 5.4076934501708, "grad_norm": 0.9451662302017212, "learning_rate": 4.5923065498291995e-06, "loss": 0.0646, "step": 36410 }, { "epoch": 5.409178672211496, "grad_norm": 0.6508313417434692, "learning_rate": 4.590821327788505e-06, "loss": 0.0575, "step": 36420 }, { "epoch": 5.410663894252191, "grad_norm": 0.5562500357627869, "learning_rate": 4.58933610574781e-06, "loss": 0.0596, "step": 36430 }, { "epoch": 5.412149116292886, "grad_norm": 0.43210306763648987, "learning_rate": 4.5878508837071145e-06, "loss": 0.0646, "step": 36440 }, { "epoch": 5.413634338333581, "grad_norm": 0.8571392893791199, "learning_rate": 4.586365661666419e-06, "loss": 0.0658, "step": 36450 }, { "epoch": 5.415119560374276, "grad_norm": 0.7892032265663147, "learning_rate": 4.584880439625724e-06, "loss": 0.0602, "step": 36460 }, { "epoch": 5.4166047824149715, "grad_norm": 0.21121440827846527, "learning_rate": 4.5833952175850295e-06, "loss": 0.039, "step": 36470 }, { "epoch": 5.418090004455666, "grad_norm": 0.858177900314331, "learning_rate": 4.581909995544334e-06, "loss": 0.0549, "step": 36480 }, { "epoch": 5.419575226496361, "grad_norm": 1.2030284404754639, "learning_rate": 4.580424773503639e-06, "loss": 0.0565, "step": 36490 }, { "epoch": 5.421060448537056, "grad_norm": 1.4121376276016235, "learning_rate": 4.5789395514629445e-06, "loss": 0.066, "step": 36500 }, { "epoch": 5.422545670577751, "grad_norm": 1.6546800136566162, "learning_rate": 4.577454329422249e-06, "loss": 0.0705, "step": 36510 }, { "epoch": 5.424030892618447, "grad_norm": 1.0694074630737305, "learning_rate": 4.575969107381553e-06, "loss": 0.0566, "step": 36520 }, { "epoch": 5.425516114659142, "grad_norm": 1.0429729223251343, "learning_rate": 4.5744838853408595e-06, "loss": 0.0597, "step": 36530 }, { "epoch": 5.427001336699837, "grad_norm": 0.5231200456619263, "learning_rate": 4.572998663300164e-06, "loss": 0.0443, "step": 36540 }, { "epoch": 5.4284865587405315, "grad_norm": 0.4253857135772705, "learning_rate": 4.571513441259468e-06, "loss": 0.0384, "step": 36550 }, { "epoch": 5.429971780781226, "grad_norm": 0.5469606518745422, "learning_rate": 4.570028219218774e-06, "loss": 0.0416, "step": 36560 }, { "epoch": 5.431457002821922, "grad_norm": 0.6530857682228088, "learning_rate": 4.568542997178079e-06, "loss": 0.063, "step": 36570 }, { "epoch": 5.432942224862617, "grad_norm": 1.0885331630706787, "learning_rate": 4.567057775137383e-06, "loss": 0.0609, "step": 36580 }, { "epoch": 5.434427446903312, "grad_norm": 0.5104076862335205, "learning_rate": 4.565572553096689e-06, "loss": 0.0596, "step": 36590 }, { "epoch": 5.435912668944007, "grad_norm": 0.8749592900276184, "learning_rate": 4.564087331055993e-06, "loss": 0.055, "step": 36600 }, { "epoch": 5.437397890984702, "grad_norm": 0.5066226720809937, "learning_rate": 4.562602109015298e-06, "loss": 0.0689, "step": 36610 }, { "epoch": 5.438883113025398, "grad_norm": 0.8218633532524109, "learning_rate": 4.561116886974603e-06, "loss": 0.0722, "step": 36620 }, { "epoch": 5.4403683350660925, "grad_norm": 0.7993268370628357, "learning_rate": 4.559631664933908e-06, "loss": 0.0672, "step": 36630 }, { "epoch": 5.441853557106787, "grad_norm": 0.9641051292419434, "learning_rate": 4.558146442893213e-06, "loss": 0.0642, "step": 36640 }, { "epoch": 5.443338779147482, "grad_norm": 1.2114911079406738, "learning_rate": 4.556661220852518e-06, "loss": 0.0732, "step": 36650 }, { "epoch": 5.444824001188177, "grad_norm": 1.2579625844955444, "learning_rate": 4.555175998811823e-06, "loss": 0.0651, "step": 36660 }, { "epoch": 5.446309223228873, "grad_norm": 1.0065919160842896, "learning_rate": 4.5536907767711275e-06, "loss": 0.0627, "step": 36670 }, { "epoch": 5.447794445269568, "grad_norm": 0.6958107352256775, "learning_rate": 4.552205554730433e-06, "loss": 0.0598, "step": 36680 }, { "epoch": 5.449279667310263, "grad_norm": 1.2037479877471924, "learning_rate": 4.550720332689737e-06, "loss": 0.0825, "step": 36690 }, { "epoch": 5.450764889350958, "grad_norm": 1.2185901403427124, "learning_rate": 4.5492351106490425e-06, "loss": 0.0478, "step": 36700 }, { "epoch": 5.452250111391653, "grad_norm": 0.9952356219291687, "learning_rate": 4.547749888608347e-06, "loss": 0.0644, "step": 36710 }, { "epoch": 5.453735333432348, "grad_norm": 1.9384931325912476, "learning_rate": 4.546264666567652e-06, "loss": 0.0546, "step": 36720 }, { "epoch": 5.455220555473043, "grad_norm": 0.6183782815933228, "learning_rate": 4.5447794445269575e-06, "loss": 0.0505, "step": 36730 }, { "epoch": 5.456705777513738, "grad_norm": 0.5289264917373657, "learning_rate": 4.543294222486262e-06, "loss": 0.0601, "step": 36740 }, { "epoch": 5.458190999554433, "grad_norm": 0.41108959913253784, "learning_rate": 4.541809000445566e-06, "loss": 0.0548, "step": 36750 }, { "epoch": 5.459676221595129, "grad_norm": 0.7227884531021118, "learning_rate": 4.540323778404872e-06, "loss": 0.0673, "step": 36760 }, { "epoch": 5.461161443635824, "grad_norm": 0.6556276082992554, "learning_rate": 4.538838556364177e-06, "loss": 0.0544, "step": 36770 }, { "epoch": 5.462646665676519, "grad_norm": 1.0905522108078003, "learning_rate": 4.537353334323481e-06, "loss": 0.0686, "step": 36780 }, { "epoch": 5.464131887717214, "grad_norm": 0.9823871850967407, "learning_rate": 4.535868112282787e-06, "loss": 0.0595, "step": 36790 }, { "epoch": 5.4656171097579085, "grad_norm": 0.3967541754245758, "learning_rate": 4.534382890242092e-06, "loss": 0.0665, "step": 36800 }, { "epoch": 5.467102331798604, "grad_norm": 0.5711907744407654, "learning_rate": 4.532897668201396e-06, "loss": 0.0667, "step": 36810 }, { "epoch": 5.468587553839299, "grad_norm": 0.9890785813331604, "learning_rate": 4.531412446160702e-06, "loss": 0.0624, "step": 36820 }, { "epoch": 5.470072775879994, "grad_norm": 0.9664349555969238, "learning_rate": 4.529927224120007e-06, "loss": 0.0521, "step": 36830 }, { "epoch": 5.471557997920689, "grad_norm": 1.5224270820617676, "learning_rate": 4.528442002079311e-06, "loss": 0.062, "step": 36840 }, { "epoch": 5.473043219961384, "grad_norm": 0.53714519739151, "learning_rate": 4.526956780038616e-06, "loss": 0.0652, "step": 36850 }, { "epoch": 5.47452844200208, "grad_norm": 0.7480376958847046, "learning_rate": 4.525471557997921e-06, "loss": 0.0611, "step": 36860 }, { "epoch": 5.476013664042775, "grad_norm": 1.379319190979004, "learning_rate": 4.523986335957226e-06, "loss": 0.0522, "step": 36870 }, { "epoch": 5.4774988860834695, "grad_norm": 0.8469178080558777, "learning_rate": 4.522501113916531e-06, "loss": 0.0668, "step": 36880 }, { "epoch": 5.478984108124164, "grad_norm": 0.9694294929504395, "learning_rate": 4.521015891875836e-06, "loss": 0.0573, "step": 36890 }, { "epoch": 5.480469330164859, "grad_norm": 1.125335693359375, "learning_rate": 4.519530669835141e-06, "loss": 0.0659, "step": 36900 }, { "epoch": 5.481954552205555, "grad_norm": 1.0027401447296143, "learning_rate": 4.518045447794446e-06, "loss": 0.0548, "step": 36910 }, { "epoch": 5.48343977424625, "grad_norm": 0.3352084755897522, "learning_rate": 4.51656022575375e-06, "loss": 0.0749, "step": 36920 }, { "epoch": 5.484924996286945, "grad_norm": 1.036489486694336, "learning_rate": 4.5150750037130555e-06, "loss": 0.0828, "step": 36930 }, { "epoch": 5.48641021832764, "grad_norm": 0.21433958411216736, "learning_rate": 4.513589781672361e-06, "loss": 0.0765, "step": 36940 }, { "epoch": 5.487895440368335, "grad_norm": 0.8928041458129883, "learning_rate": 4.512104559631665e-06, "loss": 0.0514, "step": 36950 }, { "epoch": 5.48938066240903, "grad_norm": 1.3866063356399536, "learning_rate": 4.5106193375909705e-06, "loss": 0.0583, "step": 36960 }, { "epoch": 5.490865884449725, "grad_norm": 0.5213386416435242, "learning_rate": 4.509134115550275e-06, "loss": 0.0583, "step": 36970 }, { "epoch": 5.49235110649042, "grad_norm": 0.6851054430007935, "learning_rate": 4.50764889350958e-06, "loss": 0.0462, "step": 36980 }, { "epoch": 5.493836328531115, "grad_norm": 0.9823634028434753, "learning_rate": 4.506163671468885e-06, "loss": 0.0476, "step": 36990 }, { "epoch": 5.495321550571811, "grad_norm": 0.695232093334198, "learning_rate": 4.50467844942819e-06, "loss": 0.0685, "step": 37000 }, { "epoch": 5.496806772612506, "grad_norm": 0.4329679012298584, "learning_rate": 4.503193227387494e-06, "loss": 0.0561, "step": 37010 }, { "epoch": 5.498291994653201, "grad_norm": 0.4390914738178253, "learning_rate": 4.5017080053468e-06, "loss": 0.0571, "step": 37020 }, { "epoch": 5.499777216693896, "grad_norm": 0.7721920609474182, "learning_rate": 4.500222783306105e-06, "loss": 0.0643, "step": 37030 }, { "epoch": 5.5012624387345905, "grad_norm": 1.4150617122650146, "learning_rate": 4.498737561265409e-06, "loss": 0.0645, "step": 37040 }, { "epoch": 5.502747660775286, "grad_norm": 0.3090786039829254, "learning_rate": 4.497252339224715e-06, "loss": 0.0397, "step": 37050 }, { "epoch": 5.504232882815981, "grad_norm": 1.3260048627853394, "learning_rate": 4.49576711718402e-06, "loss": 0.063, "step": 37060 }, { "epoch": 5.505718104856676, "grad_norm": 0.3949032127857208, "learning_rate": 4.494281895143324e-06, "loss": 0.0639, "step": 37070 }, { "epoch": 5.507203326897371, "grad_norm": 1.1284046173095703, "learning_rate": 4.492796673102629e-06, "loss": 0.0691, "step": 37080 }, { "epoch": 5.508688548938066, "grad_norm": 1.1532460451126099, "learning_rate": 4.491311451061934e-06, "loss": 0.0671, "step": 37090 }, { "epoch": 5.510173770978762, "grad_norm": 1.2885421514511108, "learning_rate": 4.489826229021239e-06, "loss": 0.0617, "step": 37100 }, { "epoch": 5.511658993019457, "grad_norm": 1.097259759902954, "learning_rate": 4.488341006980544e-06, "loss": 0.0509, "step": 37110 }, { "epoch": 5.5131442150601515, "grad_norm": 1.5691416263580322, "learning_rate": 4.486855784939849e-06, "loss": 0.0539, "step": 37120 }, { "epoch": 5.514629437100846, "grad_norm": 0.8756474256515503, "learning_rate": 4.485370562899154e-06, "loss": 0.0522, "step": 37130 }, { "epoch": 5.516114659141541, "grad_norm": 1.1349800825119019, "learning_rate": 4.483885340858459e-06, "loss": 0.0504, "step": 37140 }, { "epoch": 5.517599881182237, "grad_norm": 0.840767502784729, "learning_rate": 4.482400118817763e-06, "loss": 0.0516, "step": 37150 }, { "epoch": 5.519085103222932, "grad_norm": 1.6772485971450806, "learning_rate": 4.4809148967770684e-06, "loss": 0.0557, "step": 37160 }, { "epoch": 5.520570325263627, "grad_norm": 1.1356531381607056, "learning_rate": 4.479429674736374e-06, "loss": 0.0711, "step": 37170 }, { "epoch": 5.522055547304322, "grad_norm": 1.0116087198257446, "learning_rate": 4.477944452695678e-06, "loss": 0.0527, "step": 37180 }, { "epoch": 5.523540769345017, "grad_norm": 0.8792704939842224, "learning_rate": 4.4764592306549834e-06, "loss": 0.0837, "step": 37190 }, { "epoch": 5.5250259913857125, "grad_norm": 0.801468014717102, "learning_rate": 4.474974008614289e-06, "loss": 0.0697, "step": 37200 }, { "epoch": 5.526511213426407, "grad_norm": 0.8734177350997925, "learning_rate": 4.473488786573593e-06, "loss": 0.0578, "step": 37210 }, { "epoch": 5.527996435467102, "grad_norm": 0.9292588233947754, "learning_rate": 4.472003564532898e-06, "loss": 0.0709, "step": 37220 }, { "epoch": 5.529481657507797, "grad_norm": 1.0219374895095825, "learning_rate": 4.470518342492203e-06, "loss": 0.0591, "step": 37230 }, { "epoch": 5.530966879548492, "grad_norm": 0.9166500568389893, "learning_rate": 4.469033120451508e-06, "loss": 0.0597, "step": 37240 }, { "epoch": 5.532452101589188, "grad_norm": 0.7812015414237976, "learning_rate": 4.4675478984108126e-06, "loss": 0.0543, "step": 37250 }, { "epoch": 5.533937323629883, "grad_norm": 0.8167303204536438, "learning_rate": 4.466062676370118e-06, "loss": 0.0654, "step": 37260 }, { "epoch": 5.535422545670578, "grad_norm": 1.0693591833114624, "learning_rate": 4.464577454329422e-06, "loss": 0.0457, "step": 37270 }, { "epoch": 5.536907767711273, "grad_norm": 1.1283386945724487, "learning_rate": 4.4630922322887276e-06, "loss": 0.0489, "step": 37280 }, { "epoch": 5.5383929897519675, "grad_norm": 1.1214085817337036, "learning_rate": 4.461607010248033e-06, "loss": 0.0729, "step": 37290 }, { "epoch": 5.539878211792663, "grad_norm": 0.9980657696723938, "learning_rate": 4.460121788207337e-06, "loss": 0.0577, "step": 37300 }, { "epoch": 5.541363433833358, "grad_norm": 0.49719762802124023, "learning_rate": 4.4586365661666426e-06, "loss": 0.062, "step": 37310 }, { "epoch": 5.542848655874053, "grad_norm": 0.40343061089515686, "learning_rate": 4.457151344125947e-06, "loss": 0.056, "step": 37320 }, { "epoch": 5.544333877914748, "grad_norm": 0.7878409624099731, "learning_rate": 4.455666122085252e-06, "loss": 0.0823, "step": 37330 }, { "epoch": 5.545819099955443, "grad_norm": 0.6854997873306274, "learning_rate": 4.454180900044557e-06, "loss": 0.0468, "step": 37340 }, { "epoch": 5.547304321996139, "grad_norm": 0.8148928880691528, "learning_rate": 4.452695678003862e-06, "loss": 0.0489, "step": 37350 }, { "epoch": 5.5487895440368336, "grad_norm": 1.3261892795562744, "learning_rate": 4.451210455963167e-06, "loss": 0.0787, "step": 37360 }, { "epoch": 5.5502747660775285, "grad_norm": 0.3939753472805023, "learning_rate": 4.449725233922472e-06, "loss": 0.0703, "step": 37370 }, { "epoch": 5.551759988118223, "grad_norm": 1.5147497653961182, "learning_rate": 4.448240011881776e-06, "loss": 0.0622, "step": 37380 }, { "epoch": 5.553245210158919, "grad_norm": 1.1918028593063354, "learning_rate": 4.446754789841081e-06, "loss": 0.0724, "step": 37390 }, { "epoch": 5.554730432199614, "grad_norm": 1.0911834239959717, "learning_rate": 4.445269567800387e-06, "loss": 0.0526, "step": 37400 }, { "epoch": 5.556215654240309, "grad_norm": 1.4858453273773193, "learning_rate": 4.443784345759691e-06, "loss": 0.0882, "step": 37410 }, { "epoch": 5.557700876281004, "grad_norm": 1.0886820554733276, "learning_rate": 4.442299123718996e-06, "loss": 0.0693, "step": 37420 }, { "epoch": 5.559186098321699, "grad_norm": 0.7892463803291321, "learning_rate": 4.440813901678302e-06, "loss": 0.0635, "step": 37430 }, { "epoch": 5.5606713203623945, "grad_norm": 0.9498715996742249, "learning_rate": 4.439328679637606e-06, "loss": 0.0475, "step": 37440 }, { "epoch": 5.562156542403089, "grad_norm": 0.8907163739204407, "learning_rate": 4.4378434575969106e-06, "loss": 0.039, "step": 37450 }, { "epoch": 5.563641764443784, "grad_norm": 0.8134555220603943, "learning_rate": 4.436358235556216e-06, "loss": 0.0573, "step": 37460 }, { "epoch": 5.565126986484479, "grad_norm": 0.6253407597541809, "learning_rate": 4.434873013515521e-06, "loss": 0.0811, "step": 37470 }, { "epoch": 5.566612208525174, "grad_norm": 1.469370722770691, "learning_rate": 4.4333877914748256e-06, "loss": 0.0658, "step": 37480 }, { "epoch": 5.56809743056587, "grad_norm": 0.6674718260765076, "learning_rate": 4.431902569434131e-06, "loss": 0.0684, "step": 37490 }, { "epoch": 5.569582652606565, "grad_norm": 1.1165416240692139, "learning_rate": 4.430417347393436e-06, "loss": 0.0533, "step": 37500 }, { "epoch": 5.57106787464726, "grad_norm": 0.9866893887519836, "learning_rate": 4.4289321253527405e-06, "loss": 0.0606, "step": 37510 }, { "epoch": 5.572553096687955, "grad_norm": 0.32497164607048035, "learning_rate": 4.427446903312046e-06, "loss": 0.0504, "step": 37520 }, { "epoch": 5.57403831872865, "grad_norm": 0.6711202263832092, "learning_rate": 4.42596168127135e-06, "loss": 0.0581, "step": 37530 }, { "epoch": 5.575523540769345, "grad_norm": 0.4225695729255676, "learning_rate": 4.4244764592306555e-06, "loss": 0.0614, "step": 37540 }, { "epoch": 5.57700876281004, "grad_norm": 1.0005754232406616, "learning_rate": 4.42299123718996e-06, "loss": 0.0638, "step": 37550 }, { "epoch": 5.578493984850735, "grad_norm": 1.128718614578247, "learning_rate": 4.421506015149265e-06, "loss": 0.0631, "step": 37560 }, { "epoch": 5.57997920689143, "grad_norm": 0.8793702125549316, "learning_rate": 4.4200207931085705e-06, "loss": 0.0468, "step": 37570 }, { "epoch": 5.581464428932126, "grad_norm": 0.840347409248352, "learning_rate": 4.418535571067875e-06, "loss": 0.0619, "step": 37580 }, { "epoch": 5.582949650972821, "grad_norm": 0.5288404226303101, "learning_rate": 4.41705034902718e-06, "loss": 0.0544, "step": 37590 }, { "epoch": 5.584434873013516, "grad_norm": 0.6308549642562866, "learning_rate": 4.415565126986485e-06, "loss": 0.0569, "step": 37600 }, { "epoch": 5.5859200950542105, "grad_norm": 0.5083063244819641, "learning_rate": 4.41407990494579e-06, "loss": 0.0664, "step": 37610 }, { "epoch": 5.587405317094905, "grad_norm": 0.6433333158493042, "learning_rate": 4.412594682905094e-06, "loss": 0.0511, "step": 37620 }, { "epoch": 5.588890539135601, "grad_norm": 1.3996425867080688, "learning_rate": 4.4111094608644e-06, "loss": 0.0624, "step": 37630 }, { "epoch": 5.590375761176296, "grad_norm": 0.6735121607780457, "learning_rate": 4.409624238823704e-06, "loss": 0.0724, "step": 37640 }, { "epoch": 5.591860983216991, "grad_norm": 0.8044751286506653, "learning_rate": 4.408139016783009e-06, "loss": 0.0592, "step": 37650 }, { "epoch": 5.593346205257686, "grad_norm": 0.9791496396064758, "learning_rate": 4.406653794742315e-06, "loss": 0.0695, "step": 37660 }, { "epoch": 5.594831427298381, "grad_norm": 0.7882761359214783, "learning_rate": 4.405168572701619e-06, "loss": 0.0539, "step": 37670 }, { "epoch": 5.596316649339077, "grad_norm": 0.5030707716941833, "learning_rate": 4.4036833506609235e-06, "loss": 0.052, "step": 37680 }, { "epoch": 5.5978018713797715, "grad_norm": 0.3755476474761963, "learning_rate": 4.402198128620229e-06, "loss": 0.0548, "step": 37690 }, { "epoch": 5.599287093420466, "grad_norm": 1.3941385746002197, "learning_rate": 4.400712906579534e-06, "loss": 0.0746, "step": 37700 }, { "epoch": 5.600772315461161, "grad_norm": 0.32942524552345276, "learning_rate": 4.3992276845388385e-06, "loss": 0.0443, "step": 37710 }, { "epoch": 5.602257537501856, "grad_norm": 0.9542339444160461, "learning_rate": 4.397742462498144e-06, "loss": 0.0577, "step": 37720 }, { "epoch": 5.603742759542552, "grad_norm": 0.5538778305053711, "learning_rate": 4.396257240457449e-06, "loss": 0.0636, "step": 37730 }, { "epoch": 5.605227981583247, "grad_norm": 1.0870361328125, "learning_rate": 4.3947720184167535e-06, "loss": 0.0775, "step": 37740 }, { "epoch": 5.606713203623942, "grad_norm": 0.5325985550880432, "learning_rate": 4.393286796376058e-06, "loss": 0.0664, "step": 37750 }, { "epoch": 5.608198425664637, "grad_norm": 0.9860812425613403, "learning_rate": 4.391801574335364e-06, "loss": 0.0623, "step": 37760 }, { "epoch": 5.609683647705332, "grad_norm": 0.6696199178695679, "learning_rate": 4.3903163522946685e-06, "loss": 0.0463, "step": 37770 }, { "epoch": 5.611168869746027, "grad_norm": 1.3116692304611206, "learning_rate": 4.388831130253973e-06, "loss": 0.0611, "step": 37780 }, { "epoch": 5.612654091786722, "grad_norm": 0.8291306495666504, "learning_rate": 4.387345908213278e-06, "loss": 0.0662, "step": 37790 }, { "epoch": 5.614139313827417, "grad_norm": 1.2390786409378052, "learning_rate": 4.3858606861725835e-06, "loss": 0.059, "step": 37800 }, { "epoch": 5.615624535868112, "grad_norm": 0.18506869673728943, "learning_rate": 4.384375464131888e-06, "loss": 0.0505, "step": 37810 }, { "epoch": 5.617109757908807, "grad_norm": 1.069229245185852, "learning_rate": 4.382890242091193e-06, "loss": 0.0607, "step": 37820 }, { "epoch": 5.618594979949503, "grad_norm": 1.214794635772705, "learning_rate": 4.3814050200504985e-06, "loss": 0.0613, "step": 37830 }, { "epoch": 5.620080201990198, "grad_norm": 0.9908319711685181, "learning_rate": 4.379919798009803e-06, "loss": 0.0591, "step": 37840 }, { "epoch": 5.6215654240308925, "grad_norm": 1.3837628364562988, "learning_rate": 4.378434575969107e-06, "loss": 0.0753, "step": 37850 }, { "epoch": 5.623050646071587, "grad_norm": 1.1857173442840576, "learning_rate": 4.376949353928413e-06, "loss": 0.0669, "step": 37860 }, { "epoch": 5.624535868112282, "grad_norm": 0.367318719625473, "learning_rate": 4.375464131887718e-06, "loss": 0.0433, "step": 37870 }, { "epoch": 5.626021090152978, "grad_norm": 0.9903393983840942, "learning_rate": 4.373978909847022e-06, "loss": 0.0626, "step": 37880 }, { "epoch": 5.627506312193673, "grad_norm": 1.4275164604187012, "learning_rate": 4.372493687806328e-06, "loss": 0.0578, "step": 37890 }, { "epoch": 5.628991534234368, "grad_norm": 0.587713360786438, "learning_rate": 4.371008465765632e-06, "loss": 0.0544, "step": 37900 }, { "epoch": 5.630476756275063, "grad_norm": 0.7679840922355652, "learning_rate": 4.369523243724937e-06, "loss": 0.0582, "step": 37910 }, { "epoch": 5.631961978315758, "grad_norm": 1.3122962713241577, "learning_rate": 4.368038021684242e-06, "loss": 0.0571, "step": 37920 }, { "epoch": 5.6334472003564535, "grad_norm": 0.521410346031189, "learning_rate": 4.366552799643547e-06, "loss": 0.0539, "step": 37930 }, { "epoch": 5.634932422397148, "grad_norm": 1.6043994426727295, "learning_rate": 4.3650675776028515e-06, "loss": 0.0726, "step": 37940 }, { "epoch": 5.636417644437843, "grad_norm": 0.3124872148036957, "learning_rate": 4.363582355562157e-06, "loss": 0.0526, "step": 37950 }, { "epoch": 5.637902866478538, "grad_norm": 0.7532991170883179, "learning_rate": 4.362097133521462e-06, "loss": 0.059, "step": 37960 }, { "epoch": 5.639388088519234, "grad_norm": 0.4675773084163666, "learning_rate": 4.3606119114807665e-06, "loss": 0.059, "step": 37970 }, { "epoch": 5.640873310559929, "grad_norm": 0.5681068897247314, "learning_rate": 4.359126689440072e-06, "loss": 0.0548, "step": 37980 }, { "epoch": 5.642358532600624, "grad_norm": 1.514446496963501, "learning_rate": 4.357641467399377e-06, "loss": 0.0591, "step": 37990 }, { "epoch": 5.643843754641319, "grad_norm": 1.4750310182571411, "learning_rate": 4.3561562453586815e-06, "loss": 0.0604, "step": 38000 }, { "epoch": 5.645328976682014, "grad_norm": 0.546931266784668, "learning_rate": 4.354671023317986e-06, "loss": 0.0825, "step": 38010 }, { "epoch": 5.646814198722709, "grad_norm": 0.9507127404212952, "learning_rate": 4.353185801277291e-06, "loss": 0.0393, "step": 38020 }, { "epoch": 5.648299420763404, "grad_norm": 0.8796098828315735, "learning_rate": 4.3517005792365965e-06, "loss": 0.0421, "step": 38030 }, { "epoch": 5.649784642804099, "grad_norm": 1.5660216808319092, "learning_rate": 4.350215357195901e-06, "loss": 0.0827, "step": 38040 }, { "epoch": 5.651269864844794, "grad_norm": 0.5498520135879517, "learning_rate": 4.348730135155206e-06, "loss": 0.0658, "step": 38050 }, { "epoch": 5.652755086885489, "grad_norm": 0.9659833312034607, "learning_rate": 4.3472449131145115e-06, "loss": 0.0422, "step": 38060 }, { "epoch": 5.654240308926185, "grad_norm": 0.7959330081939697, "learning_rate": 4.345759691073816e-06, "loss": 0.0656, "step": 38070 }, { "epoch": 5.65572553096688, "grad_norm": 0.8035279512405396, "learning_rate": 4.34427446903312e-06, "loss": 0.0875, "step": 38080 }, { "epoch": 5.657210753007575, "grad_norm": 0.5401741862297058, "learning_rate": 4.342789246992426e-06, "loss": 0.0631, "step": 38090 }, { "epoch": 5.6586959750482695, "grad_norm": 0.7207291722297668, "learning_rate": 4.341304024951731e-06, "loss": 0.0705, "step": 38100 }, { "epoch": 5.660181197088965, "grad_norm": 0.7631051540374756, "learning_rate": 4.339818802911035e-06, "loss": 0.0497, "step": 38110 }, { "epoch": 5.66166641912966, "grad_norm": 0.6719472408294678, "learning_rate": 4.338333580870341e-06, "loss": 0.0634, "step": 38120 }, { "epoch": 5.663151641170355, "grad_norm": 0.5348994135856628, "learning_rate": 4.336848358829646e-06, "loss": 0.0565, "step": 38130 }, { "epoch": 5.66463686321105, "grad_norm": 1.271963357925415, "learning_rate": 4.33536313678895e-06, "loss": 0.0578, "step": 38140 }, { "epoch": 5.666122085251745, "grad_norm": 0.6291659474372864, "learning_rate": 4.333877914748255e-06, "loss": 0.0731, "step": 38150 }, { "epoch": 5.667607307292441, "grad_norm": 0.6707026362419128, "learning_rate": 4.33239269270756e-06, "loss": 0.0597, "step": 38160 }, { "epoch": 5.669092529333136, "grad_norm": 1.3206006288528442, "learning_rate": 4.330907470666865e-06, "loss": 0.0441, "step": 38170 }, { "epoch": 5.6705777513738305, "grad_norm": 0.4817686378955841, "learning_rate": 4.32942224862617e-06, "loss": 0.0543, "step": 38180 }, { "epoch": 5.672062973414525, "grad_norm": 1.7800952196121216, "learning_rate": 4.327937026585475e-06, "loss": 0.0586, "step": 38190 }, { "epoch": 5.67354819545522, "grad_norm": 0.9077200889587402, "learning_rate": 4.3264518045447795e-06, "loss": 0.0631, "step": 38200 }, { "epoch": 5.675033417495916, "grad_norm": 1.626470685005188, "learning_rate": 4.324966582504085e-06, "loss": 0.0641, "step": 38210 }, { "epoch": 5.676518639536611, "grad_norm": 0.8949599862098694, "learning_rate": 4.32348136046339e-06, "loss": 0.0713, "step": 38220 }, { "epoch": 5.678003861577306, "grad_norm": 1.184775471687317, "learning_rate": 4.3219961384226945e-06, "loss": 0.0648, "step": 38230 }, { "epoch": 5.679489083618001, "grad_norm": 0.5150486826896667, "learning_rate": 4.320510916382e-06, "loss": 0.0733, "step": 38240 }, { "epoch": 5.680974305658696, "grad_norm": 0.5177794098854065, "learning_rate": 4.319025694341304e-06, "loss": 0.0447, "step": 38250 }, { "epoch": 5.682459527699391, "grad_norm": 0.49653327465057373, "learning_rate": 4.3175404723006095e-06, "loss": 0.0577, "step": 38260 }, { "epoch": 5.683944749740086, "grad_norm": 1.3010729551315308, "learning_rate": 4.316055250259914e-06, "loss": 0.0541, "step": 38270 }, { "epoch": 5.685429971780781, "grad_norm": 0.6356299519538879, "learning_rate": 4.314570028219219e-06, "loss": 0.0582, "step": 38280 }, { "epoch": 5.686915193821476, "grad_norm": 1.8878618478775024, "learning_rate": 4.3130848061785245e-06, "loss": 0.0554, "step": 38290 }, { "epoch": 5.688400415862171, "grad_norm": 0.5616238713264465, "learning_rate": 4.311599584137829e-06, "loss": 0.0745, "step": 38300 }, { "epoch": 5.689885637902867, "grad_norm": 0.8002229928970337, "learning_rate": 4.310114362097133e-06, "loss": 0.0616, "step": 38310 }, { "epoch": 5.691370859943562, "grad_norm": 0.7039357423782349, "learning_rate": 4.308629140056439e-06, "loss": 0.0526, "step": 38320 }, { "epoch": 5.692856081984257, "grad_norm": 0.6721161007881165, "learning_rate": 4.307143918015744e-06, "loss": 0.0482, "step": 38330 }, { "epoch": 5.6943413040249515, "grad_norm": 1.965502381324768, "learning_rate": 4.305658695975048e-06, "loss": 0.0765, "step": 38340 }, { "epoch": 5.695826526065646, "grad_norm": 1.0320897102355957, "learning_rate": 4.304173473934354e-06, "loss": 0.0568, "step": 38350 }, { "epoch": 5.697311748106342, "grad_norm": 0.753450870513916, "learning_rate": 4.302688251893659e-06, "loss": 0.0432, "step": 38360 }, { "epoch": 5.698796970147037, "grad_norm": 0.35579219460487366, "learning_rate": 4.301203029852963e-06, "loss": 0.0594, "step": 38370 }, { "epoch": 5.700282192187732, "grad_norm": 1.503354549407959, "learning_rate": 4.299717807812268e-06, "loss": 0.0623, "step": 38380 }, { "epoch": 5.701767414228427, "grad_norm": 1.1138559579849243, "learning_rate": 4.298232585771573e-06, "loss": 0.0736, "step": 38390 }, { "epoch": 5.703252636269122, "grad_norm": 0.9944598078727722, "learning_rate": 4.296747363730878e-06, "loss": 0.0552, "step": 38400 }, { "epoch": 5.704737858309818, "grad_norm": 0.48267674446105957, "learning_rate": 4.295262141690183e-06, "loss": 0.0543, "step": 38410 }, { "epoch": 5.7062230803505125, "grad_norm": 0.34499338269233704, "learning_rate": 4.293776919649488e-06, "loss": 0.0431, "step": 38420 }, { "epoch": 5.707708302391207, "grad_norm": 1.3373545408248901, "learning_rate": 4.292291697608793e-06, "loss": 0.0498, "step": 38430 }, { "epoch": 5.709193524431902, "grad_norm": 1.3047128915786743, "learning_rate": 4.290806475568098e-06, "loss": 0.0512, "step": 38440 }, { "epoch": 5.710678746472597, "grad_norm": 0.5727968811988831, "learning_rate": 4.289321253527402e-06, "loss": 0.0593, "step": 38450 }, { "epoch": 5.712163968513293, "grad_norm": 0.6570802330970764, "learning_rate": 4.2878360314867075e-06, "loss": 0.062, "step": 38460 }, { "epoch": 5.713649190553988, "grad_norm": 0.5600692629814148, "learning_rate": 4.286350809446013e-06, "loss": 0.0574, "step": 38470 }, { "epoch": 5.715134412594683, "grad_norm": 0.804882824420929, "learning_rate": 4.284865587405317e-06, "loss": 0.0655, "step": 38480 }, { "epoch": 5.716619634635378, "grad_norm": 1.1548188924789429, "learning_rate": 4.2833803653646225e-06, "loss": 0.0703, "step": 38490 }, { "epoch": 5.718104856676073, "grad_norm": 1.5685232877731323, "learning_rate": 4.281895143323928e-06, "loss": 0.0602, "step": 38500 }, { "epoch": 5.719590078716768, "grad_norm": 1.076205849647522, "learning_rate": 4.280409921283232e-06, "loss": 0.048, "step": 38510 }, { "epoch": 5.721075300757463, "grad_norm": 0.591611921787262, "learning_rate": 4.2789246992425374e-06, "loss": 0.0655, "step": 38520 }, { "epoch": 5.722560522798158, "grad_norm": 0.7020032405853271, "learning_rate": 4.277439477201842e-06, "loss": 0.0334, "step": 38530 }, { "epoch": 5.724045744838853, "grad_norm": 1.2611887454986572, "learning_rate": 4.275954255161147e-06, "loss": 0.0549, "step": 38540 }, { "epoch": 5.725530966879549, "grad_norm": 0.578007698059082, "learning_rate": 4.274469033120452e-06, "loss": 0.0585, "step": 38550 }, { "epoch": 5.727016188920244, "grad_norm": 0.9836631417274475, "learning_rate": 4.272983811079757e-06, "loss": 0.0767, "step": 38560 }, { "epoch": 5.728501410960939, "grad_norm": 0.37310218811035156, "learning_rate": 4.271498589039061e-06, "loss": 0.0393, "step": 38570 }, { "epoch": 5.729986633001634, "grad_norm": 0.5416752696037292, "learning_rate": 4.270013366998367e-06, "loss": 0.0739, "step": 38580 }, { "epoch": 5.7314718550423285, "grad_norm": 1.1677316427230835, "learning_rate": 4.268528144957672e-06, "loss": 0.0433, "step": 38590 }, { "epoch": 5.732957077083024, "grad_norm": 0.6157244443893433, "learning_rate": 4.267042922916976e-06, "loss": 0.0488, "step": 38600 }, { "epoch": 5.734442299123719, "grad_norm": 0.7313002347946167, "learning_rate": 4.265557700876282e-06, "loss": 0.0512, "step": 38610 }, { "epoch": 5.735927521164414, "grad_norm": 0.9613766670227051, "learning_rate": 4.264072478835586e-06, "loss": 0.0496, "step": 38620 }, { "epoch": 5.737412743205109, "grad_norm": 1.0196996927261353, "learning_rate": 4.262587256794891e-06, "loss": 0.0579, "step": 38630 }, { "epoch": 5.738897965245804, "grad_norm": 1.3182339668273926, "learning_rate": 4.261102034754196e-06, "loss": 0.0668, "step": 38640 }, { "epoch": 5.7403831872865, "grad_norm": 0.5998702645301819, "learning_rate": 4.259616812713501e-06, "loss": 0.0544, "step": 38650 }, { "epoch": 5.7418684093271946, "grad_norm": 0.27779215574264526, "learning_rate": 4.258131590672806e-06, "loss": 0.0484, "step": 38660 }, { "epoch": 5.7433536313678895, "grad_norm": 0.6996427178382874, "learning_rate": 4.256646368632111e-06, "loss": 0.0646, "step": 38670 }, { "epoch": 5.744838853408584, "grad_norm": 1.1955310106277466, "learning_rate": 4.255161146591415e-06, "loss": 0.0549, "step": 38680 }, { "epoch": 5.74632407544928, "grad_norm": 0.9063337445259094, "learning_rate": 4.253675924550721e-06, "loss": 0.0752, "step": 38690 }, { "epoch": 5.747809297489975, "grad_norm": 0.4556771218776703, "learning_rate": 4.252190702510026e-06, "loss": 0.0514, "step": 38700 }, { "epoch": 5.74929451953067, "grad_norm": 0.5066632032394409, "learning_rate": 4.25070548046933e-06, "loss": 0.0803, "step": 38710 }, { "epoch": 5.750779741571365, "grad_norm": 0.6577308773994446, "learning_rate": 4.2492202584286354e-06, "loss": 0.0446, "step": 38720 }, { "epoch": 5.75226496361206, "grad_norm": 1.5056424140930176, "learning_rate": 4.247735036387941e-06, "loss": 0.0631, "step": 38730 }, { "epoch": 5.7537501856527555, "grad_norm": 1.3212007284164429, "learning_rate": 4.246249814347245e-06, "loss": 0.0589, "step": 38740 }, { "epoch": 5.75523540769345, "grad_norm": 0.6160315871238708, "learning_rate": 4.24476459230655e-06, "loss": 0.0705, "step": 38750 }, { "epoch": 5.756720629734145, "grad_norm": 1.3710724115371704, "learning_rate": 4.243279370265856e-06, "loss": 0.0415, "step": 38760 }, { "epoch": 5.75820585177484, "grad_norm": 0.6069679260253906, "learning_rate": 4.24179414822516e-06, "loss": 0.0417, "step": 38770 }, { "epoch": 5.759691073815535, "grad_norm": 1.013668179512024, "learning_rate": 4.2403089261844646e-06, "loss": 0.0607, "step": 38780 }, { "epoch": 5.761176295856231, "grad_norm": 0.6373612880706787, "learning_rate": 4.23882370414377e-06, "loss": 0.0574, "step": 38790 }, { "epoch": 5.762661517896926, "grad_norm": 1.0072802305221558, "learning_rate": 4.237338482103075e-06, "loss": 0.068, "step": 38800 }, { "epoch": 5.764146739937621, "grad_norm": 0.5579874515533447, "learning_rate": 4.2358532600623796e-06, "loss": 0.0537, "step": 38810 }, { "epoch": 5.765631961978316, "grad_norm": 0.5878424644470215, "learning_rate": 4.234368038021685e-06, "loss": 0.0779, "step": 38820 }, { "epoch": 5.7671171840190105, "grad_norm": 1.0271848440170288, "learning_rate": 4.232882815980989e-06, "loss": 0.0647, "step": 38830 }, { "epoch": 5.768602406059706, "grad_norm": 0.8676832914352417, "learning_rate": 4.2313975939402946e-06, "loss": 0.0509, "step": 38840 }, { "epoch": 5.770087628100401, "grad_norm": 0.5808263421058655, "learning_rate": 4.229912371899599e-06, "loss": 0.0587, "step": 38850 }, { "epoch": 5.771572850141096, "grad_norm": 0.6637036800384521, "learning_rate": 4.228427149858904e-06, "loss": 0.0565, "step": 38860 }, { "epoch": 5.773058072181791, "grad_norm": 0.6437532901763916, "learning_rate": 4.2269419278182096e-06, "loss": 0.0543, "step": 38870 }, { "epoch": 5.774543294222486, "grad_norm": 0.9969229102134705, "learning_rate": 4.225456705777514e-06, "loss": 0.0654, "step": 38880 }, { "epoch": 5.776028516263182, "grad_norm": 1.494921326637268, "learning_rate": 4.223971483736819e-06, "loss": 0.0685, "step": 38890 }, { "epoch": 5.777513738303877, "grad_norm": 1.8869636058807373, "learning_rate": 4.222486261696124e-06, "loss": 0.0431, "step": 38900 }, { "epoch": 5.7789989603445715, "grad_norm": 0.9608843922615051, "learning_rate": 4.221001039655429e-06, "loss": 0.0636, "step": 38910 }, { "epoch": 5.780484182385266, "grad_norm": 0.35328376293182373, "learning_rate": 4.219515817614733e-06, "loss": 0.0503, "step": 38920 }, { "epoch": 5.781969404425961, "grad_norm": 0.5849358439445496, "learning_rate": 4.218030595574039e-06, "loss": 0.0447, "step": 38930 }, { "epoch": 5.783454626466657, "grad_norm": 1.1339222192764282, "learning_rate": 4.216545373533343e-06, "loss": 0.0705, "step": 38940 }, { "epoch": 5.784939848507352, "grad_norm": 0.9738719463348389, "learning_rate": 4.215060151492648e-06, "loss": 0.0727, "step": 38950 }, { "epoch": 5.786425070548047, "grad_norm": 0.6222785115242004, "learning_rate": 4.213574929451954e-06, "loss": 0.0657, "step": 38960 }, { "epoch": 5.787910292588742, "grad_norm": 1.2067147493362427, "learning_rate": 4.212089707411258e-06, "loss": 0.0539, "step": 38970 }, { "epoch": 5.789395514629437, "grad_norm": 0.5365669131278992, "learning_rate": 4.210604485370563e-06, "loss": 0.0572, "step": 38980 }, { "epoch": 5.7908807366701325, "grad_norm": 0.5680752396583557, "learning_rate": 4.209119263329869e-06, "loss": 0.0806, "step": 38990 }, { "epoch": 5.792365958710827, "grad_norm": 1.4261845350265503, "learning_rate": 4.207634041289173e-06, "loss": 0.0598, "step": 39000 }, { "epoch": 5.793851180751522, "grad_norm": 0.8928173780441284, "learning_rate": 4.2061488192484775e-06, "loss": 0.0544, "step": 39010 }, { "epoch": 5.795336402792217, "grad_norm": 0.5879567861557007, "learning_rate": 4.204663597207783e-06, "loss": 0.047, "step": 39020 }, { "epoch": 5.796821624832912, "grad_norm": 1.41495943069458, "learning_rate": 4.203178375167088e-06, "loss": 0.0717, "step": 39030 }, { "epoch": 5.798306846873608, "grad_norm": 1.1093374490737915, "learning_rate": 4.2016931531263925e-06, "loss": 0.0543, "step": 39040 }, { "epoch": 5.799792068914303, "grad_norm": 1.1786057949066162, "learning_rate": 4.200207931085698e-06, "loss": 0.0508, "step": 39050 }, { "epoch": 5.801277290954998, "grad_norm": 1.0340731143951416, "learning_rate": 4.198722709045003e-06, "loss": 0.0564, "step": 39060 }, { "epoch": 5.802762512995693, "grad_norm": 0.9552382230758667, "learning_rate": 4.1972374870043075e-06, "loss": 0.0543, "step": 39070 }, { "epoch": 5.8042477350363875, "grad_norm": 0.7804151177406311, "learning_rate": 4.195752264963612e-06, "loss": 0.0555, "step": 39080 }, { "epoch": 5.805732957077083, "grad_norm": 0.8787037134170532, "learning_rate": 4.194267042922917e-06, "loss": 0.054, "step": 39090 }, { "epoch": 5.807218179117778, "grad_norm": 0.7925017476081848, "learning_rate": 4.1927818208822225e-06, "loss": 0.0629, "step": 39100 }, { "epoch": 5.808703401158473, "grad_norm": 0.8146625757217407, "learning_rate": 4.191296598841527e-06, "loss": 0.0655, "step": 39110 }, { "epoch": 5.810188623199168, "grad_norm": 0.7402033805847168, "learning_rate": 4.189811376800832e-06, "loss": 0.0642, "step": 39120 }, { "epoch": 5.811673845239864, "grad_norm": 0.7034904956817627, "learning_rate": 4.188326154760137e-06, "loss": 0.0608, "step": 39130 }, { "epoch": 5.813159067280559, "grad_norm": 0.8925518989562988, "learning_rate": 4.186840932719442e-06, "loss": 0.0531, "step": 39140 }, { "epoch": 5.8146442893212535, "grad_norm": 1.6555513143539429, "learning_rate": 4.185355710678746e-06, "loss": 0.0748, "step": 39150 }, { "epoch": 5.8161295113619484, "grad_norm": 0.83237624168396, "learning_rate": 4.183870488638052e-06, "loss": 0.0424, "step": 39160 }, { "epoch": 5.817614733402643, "grad_norm": 0.5995851755142212, "learning_rate": 4.182385266597357e-06, "loss": 0.0702, "step": 39170 }, { "epoch": 5.819099955443339, "grad_norm": 1.0438969135284424, "learning_rate": 4.180900044556661e-06, "loss": 0.0603, "step": 39180 }, { "epoch": 5.820585177484034, "grad_norm": 0.857422411441803, "learning_rate": 4.179414822515967e-06, "loss": 0.0518, "step": 39190 }, { "epoch": 5.822070399524729, "grad_norm": 1.5087437629699707, "learning_rate": 4.177929600475271e-06, "loss": 0.0971, "step": 39200 }, { "epoch": 5.823555621565424, "grad_norm": 1.900402545928955, "learning_rate": 4.176444378434576e-06, "loss": 0.0759, "step": 39210 }, { "epoch": 5.825040843606119, "grad_norm": 1.5815813541412354, "learning_rate": 4.174959156393882e-06, "loss": 0.0586, "step": 39220 }, { "epoch": 5.8265260656468145, "grad_norm": 1.3800815343856812, "learning_rate": 4.173473934353186e-06, "loss": 0.0685, "step": 39230 }, { "epoch": 5.828011287687509, "grad_norm": 1.4719352722167969, "learning_rate": 4.1719887123124905e-06, "loss": 0.0649, "step": 39240 }, { "epoch": 5.829496509728204, "grad_norm": 0.8187944889068604, "learning_rate": 4.170503490271796e-06, "loss": 0.0594, "step": 39250 }, { "epoch": 5.830981731768899, "grad_norm": 0.8237382769584656, "learning_rate": 4.169018268231101e-06, "loss": 0.0583, "step": 39260 }, { "epoch": 5.832466953809595, "grad_norm": 1.1916723251342773, "learning_rate": 4.1675330461904055e-06, "loss": 0.0523, "step": 39270 }, { "epoch": 5.83395217585029, "grad_norm": 1.322054386138916, "learning_rate": 4.166047824149711e-06, "loss": 0.0549, "step": 39280 }, { "epoch": 5.835437397890985, "grad_norm": 0.6644608974456787, "learning_rate": 4.164562602109016e-06, "loss": 0.0529, "step": 39290 }, { "epoch": 5.83692261993168, "grad_norm": 1.1946402788162231, "learning_rate": 4.1630773800683205e-06, "loss": 0.0561, "step": 39300 }, { "epoch": 5.838407841972375, "grad_norm": 0.6778384447097778, "learning_rate": 4.161592158027625e-06, "loss": 0.0559, "step": 39310 }, { "epoch": 5.83989306401307, "grad_norm": 0.8052584528923035, "learning_rate": 4.16010693598693e-06, "loss": 0.0523, "step": 39320 }, { "epoch": 5.841378286053765, "grad_norm": 0.5099238157272339, "learning_rate": 4.1586217139462355e-06, "loss": 0.0264, "step": 39330 }, { "epoch": 5.84286350809446, "grad_norm": 0.740264356136322, "learning_rate": 4.15713649190554e-06, "loss": 0.0518, "step": 39340 }, { "epoch": 5.844348730135155, "grad_norm": 0.8734014630317688, "learning_rate": 4.155651269864845e-06, "loss": 0.0476, "step": 39350 }, { "epoch": 5.84583395217585, "grad_norm": 0.38284748792648315, "learning_rate": 4.1541660478241505e-06, "loss": 0.0489, "step": 39360 }, { "epoch": 5.847319174216546, "grad_norm": 0.6769605875015259, "learning_rate": 4.152680825783455e-06, "loss": 0.0602, "step": 39370 }, { "epoch": 5.848804396257241, "grad_norm": 0.6371337175369263, "learning_rate": 4.151195603742759e-06, "loss": 0.0623, "step": 39380 }, { "epoch": 5.850289618297936, "grad_norm": 0.7483164072036743, "learning_rate": 4.149710381702065e-06, "loss": 0.0693, "step": 39390 }, { "epoch": 5.8517748403386305, "grad_norm": 0.4191286563873291, "learning_rate": 4.14822515966137e-06, "loss": 0.0354, "step": 39400 }, { "epoch": 5.853260062379325, "grad_norm": 0.699856698513031, "learning_rate": 4.146739937620674e-06, "loss": 0.0464, "step": 39410 }, { "epoch": 5.854745284420021, "grad_norm": 0.8900712132453918, "learning_rate": 4.14525471557998e-06, "loss": 0.0673, "step": 39420 }, { "epoch": 5.856230506460716, "grad_norm": 0.9745095372200012, "learning_rate": 4.143769493539285e-06, "loss": 0.0704, "step": 39430 }, { "epoch": 5.857715728501411, "grad_norm": 0.7951921820640564, "learning_rate": 4.142284271498589e-06, "loss": 0.0636, "step": 39440 }, { "epoch": 5.859200950542106, "grad_norm": 0.7359218001365662, "learning_rate": 4.140799049457895e-06, "loss": 0.0449, "step": 39450 }, { "epoch": 5.860686172582801, "grad_norm": 1.602457046508789, "learning_rate": 4.139313827417199e-06, "loss": 0.047, "step": 39460 }, { "epoch": 5.862171394623497, "grad_norm": 0.7292214035987854, "learning_rate": 4.137828605376504e-06, "loss": 0.0506, "step": 39470 }, { "epoch": 5.8636566166641915, "grad_norm": 1.0408436059951782, "learning_rate": 4.136343383335809e-06, "loss": 0.0598, "step": 39480 }, { "epoch": 5.865141838704886, "grad_norm": 1.1956384181976318, "learning_rate": 4.134858161295114e-06, "loss": 0.0491, "step": 39490 }, { "epoch": 5.866627060745581, "grad_norm": 1.9306631088256836, "learning_rate": 4.1333729392544185e-06, "loss": 0.0554, "step": 39500 }, { "epoch": 5.868112282786276, "grad_norm": 0.30741629004478455, "learning_rate": 4.131887717213724e-06, "loss": 0.0447, "step": 39510 }, { "epoch": 5.869597504826972, "grad_norm": 0.909333348274231, "learning_rate": 4.130402495173029e-06, "loss": 0.0551, "step": 39520 }, { "epoch": 5.871082726867667, "grad_norm": 0.5839402675628662, "learning_rate": 4.1289172731323335e-06, "loss": 0.054, "step": 39530 }, { "epoch": 5.872567948908362, "grad_norm": 1.0948971509933472, "learning_rate": 4.127432051091639e-06, "loss": 0.064, "step": 39540 }, { "epoch": 5.874053170949057, "grad_norm": 0.7635152339935303, "learning_rate": 4.125946829050943e-06, "loss": 0.0534, "step": 39550 }, { "epoch": 5.875538392989752, "grad_norm": 0.6703165173530579, "learning_rate": 4.1244616070102485e-06, "loss": 0.0603, "step": 39560 }, { "epoch": 5.877023615030447, "grad_norm": 0.819165825843811, "learning_rate": 4.122976384969553e-06, "loss": 0.0544, "step": 39570 }, { "epoch": 5.878508837071142, "grad_norm": 1.707190990447998, "learning_rate": 4.121491162928858e-06, "loss": 0.0638, "step": 39580 }, { "epoch": 5.879994059111837, "grad_norm": 0.5266190767288208, "learning_rate": 4.1200059408881635e-06, "loss": 0.0563, "step": 39590 }, { "epoch": 5.881479281152532, "grad_norm": 1.176369547843933, "learning_rate": 4.118520718847468e-06, "loss": 0.0657, "step": 39600 }, { "epoch": 5.882964503193227, "grad_norm": 1.1126110553741455, "learning_rate": 4.117035496806772e-06, "loss": 0.0436, "step": 39610 }, { "epoch": 5.884449725233923, "grad_norm": 0.798716127872467, "learning_rate": 4.115550274766078e-06, "loss": 0.0571, "step": 39620 }, { "epoch": 5.885934947274618, "grad_norm": 0.43753018975257874, "learning_rate": 4.114065052725383e-06, "loss": 0.0524, "step": 39630 }, { "epoch": 5.8874201693153125, "grad_norm": 0.7865732908248901, "learning_rate": 4.112579830684687e-06, "loss": 0.0595, "step": 39640 }, { "epoch": 5.888905391356007, "grad_norm": 0.4022800028324127, "learning_rate": 4.111094608643993e-06, "loss": 0.0661, "step": 39650 }, { "epoch": 5.890390613396702, "grad_norm": 1.2196767330169678, "learning_rate": 4.109609386603298e-06, "loss": 0.0477, "step": 39660 }, { "epoch": 5.891875835437398, "grad_norm": 0.7821894288063049, "learning_rate": 4.108124164562602e-06, "loss": 0.0499, "step": 39670 }, { "epoch": 5.893361057478093, "grad_norm": 1.9917155504226685, "learning_rate": 4.106638942521908e-06, "loss": 0.0934, "step": 39680 }, { "epoch": 5.894846279518788, "grad_norm": 0.9341873526573181, "learning_rate": 4.105153720481213e-06, "loss": 0.0558, "step": 39690 }, { "epoch": 5.896331501559483, "grad_norm": 0.6423032879829407, "learning_rate": 4.103668498440517e-06, "loss": 0.052, "step": 39700 }, { "epoch": 5.897816723600179, "grad_norm": 0.8528935313224792, "learning_rate": 4.102183276399822e-06, "loss": 0.0638, "step": 39710 }, { "epoch": 5.8993019456408735, "grad_norm": 0.3458503484725952, "learning_rate": 4.100698054359127e-06, "loss": 0.0495, "step": 39720 }, { "epoch": 5.900787167681568, "grad_norm": 0.5680827498435974, "learning_rate": 4.099212832318432e-06, "loss": 0.0576, "step": 39730 }, { "epoch": 5.902272389722263, "grad_norm": 0.38053596019744873, "learning_rate": 4.097727610277737e-06, "loss": 0.0563, "step": 39740 }, { "epoch": 5.903757611762958, "grad_norm": 0.666007936000824, "learning_rate": 4.096242388237042e-06, "loss": 0.0556, "step": 39750 }, { "epoch": 5.905242833803654, "grad_norm": 0.8363524079322815, "learning_rate": 4.0947571661963465e-06, "loss": 0.0613, "step": 39760 }, { "epoch": 5.906728055844349, "grad_norm": 1.3122344017028809, "learning_rate": 4.093271944155652e-06, "loss": 0.0713, "step": 39770 }, { "epoch": 5.908213277885044, "grad_norm": 0.5094563364982605, "learning_rate": 4.091786722114956e-06, "loss": 0.0596, "step": 39780 }, { "epoch": 5.909698499925739, "grad_norm": 1.2744078636169434, "learning_rate": 4.0903015000742615e-06, "loss": 0.0558, "step": 39790 }, { "epoch": 5.911183721966434, "grad_norm": 0.8067665100097656, "learning_rate": 4.088816278033567e-06, "loss": 0.0535, "step": 39800 }, { "epoch": 5.912668944007129, "grad_norm": 0.9969848990440369, "learning_rate": 4.087331055992871e-06, "loss": 0.067, "step": 39810 }, { "epoch": 5.914154166047824, "grad_norm": 0.7039811015129089, "learning_rate": 4.0858458339521765e-06, "loss": 0.0712, "step": 39820 }, { "epoch": 5.915639388088519, "grad_norm": 0.5828683972358704, "learning_rate": 4.084360611911481e-06, "loss": 0.0605, "step": 39830 }, { "epoch": 5.917124610129214, "grad_norm": 1.289592981338501, "learning_rate": 4.082875389870786e-06, "loss": 0.0649, "step": 39840 }, { "epoch": 5.91860983216991, "grad_norm": 0.8193347454071045, "learning_rate": 4.081390167830091e-06, "loss": 0.0717, "step": 39850 }, { "epoch": 5.920095054210605, "grad_norm": 0.9378249049186707, "learning_rate": 4.079904945789396e-06, "loss": 0.0591, "step": 39860 }, { "epoch": 5.9215802762513, "grad_norm": 1.126220941543579, "learning_rate": 4.0784197237487e-06, "loss": 0.0723, "step": 39870 }, { "epoch": 5.923065498291995, "grad_norm": 1.3452033996582031, "learning_rate": 4.076934501708006e-06, "loss": 0.0636, "step": 39880 }, { "epoch": 5.9245507203326895, "grad_norm": 0.8854779601097107, "learning_rate": 4.075449279667311e-06, "loss": 0.074, "step": 39890 }, { "epoch": 5.926035942373385, "grad_norm": 0.6700276136398315, "learning_rate": 4.073964057626615e-06, "loss": 0.0575, "step": 39900 }, { "epoch": 5.92752116441408, "grad_norm": 0.5441814661026001, "learning_rate": 4.07247883558592e-06, "loss": 0.0543, "step": 39910 }, { "epoch": 5.929006386454775, "grad_norm": 0.8362489342689514, "learning_rate": 4.070993613545226e-06, "loss": 0.0563, "step": 39920 }, { "epoch": 5.93049160849547, "grad_norm": 0.7308693528175354, "learning_rate": 4.06950839150453e-06, "loss": 0.0797, "step": 39930 }, { "epoch": 5.931976830536165, "grad_norm": 0.3876124322414398, "learning_rate": 4.068023169463835e-06, "loss": 0.0688, "step": 39940 }, { "epoch": 5.933462052576861, "grad_norm": 0.6457473635673523, "learning_rate": 4.06653794742314e-06, "loss": 0.0607, "step": 39950 }, { "epoch": 5.934947274617556, "grad_norm": 0.6994035243988037, "learning_rate": 4.065052725382445e-06, "loss": 0.055, "step": 39960 }, { "epoch": 5.9364324966582505, "grad_norm": 2.109799861907959, "learning_rate": 4.06356750334175e-06, "loss": 0.0857, "step": 39970 }, { "epoch": 5.937917718698945, "grad_norm": 1.9049144983291626, "learning_rate": 4.062082281301055e-06, "loss": 0.0681, "step": 39980 }, { "epoch": 5.93940294073964, "grad_norm": 1.0914199352264404, "learning_rate": 4.06059705926036e-06, "loss": 0.0597, "step": 39990 }, { "epoch": 5.940888162780336, "grad_norm": 0.5895785689353943, "learning_rate": 4.059111837219665e-06, "loss": 0.0526, "step": 40000 }, { "epoch": 5.942373384821031, "grad_norm": 1.0604206323623657, "learning_rate": 4.057626615178969e-06, "loss": 0.061, "step": 40010 }, { "epoch": 5.943858606861726, "grad_norm": 0.4064539670944214, "learning_rate": 4.0561413931382744e-06, "loss": 0.0541, "step": 40020 }, { "epoch": 5.945343828902421, "grad_norm": 1.6338391304016113, "learning_rate": 4.05465617109758e-06, "loss": 0.0765, "step": 40030 }, { "epoch": 5.946829050943116, "grad_norm": 0.4018402695655823, "learning_rate": 4.053170949056884e-06, "loss": 0.0599, "step": 40040 }, { "epoch": 5.948314272983811, "grad_norm": 0.8363461494445801, "learning_rate": 4.0516857270161894e-06, "loss": 0.0542, "step": 40050 }, { "epoch": 5.949799495024506, "grad_norm": 1.554994821548462, "learning_rate": 4.050200504975495e-06, "loss": 0.0495, "step": 40060 }, { "epoch": 5.951284717065201, "grad_norm": 0.9533831477165222, "learning_rate": 4.048715282934799e-06, "loss": 0.0451, "step": 40070 }, { "epoch": 5.952769939105896, "grad_norm": 0.3587993085384369, "learning_rate": 4.047230060894104e-06, "loss": 0.0463, "step": 40080 }, { "epoch": 5.954255161146591, "grad_norm": 1.0116937160491943, "learning_rate": 4.045744838853409e-06, "loss": 0.0614, "step": 40090 }, { "epoch": 5.955740383187287, "grad_norm": 0.9308059215545654, "learning_rate": 4.044259616812714e-06, "loss": 0.0542, "step": 40100 }, { "epoch": 5.957225605227982, "grad_norm": 1.0829479694366455, "learning_rate": 4.042774394772019e-06, "loss": 0.0552, "step": 40110 }, { "epoch": 5.958710827268677, "grad_norm": 0.3777959644794464, "learning_rate": 4.041289172731324e-06, "loss": 0.0669, "step": 40120 }, { "epoch": 5.9601960493093715, "grad_norm": 1.892651915550232, "learning_rate": 4.039803950690628e-06, "loss": 0.0546, "step": 40130 }, { "epoch": 5.961681271350066, "grad_norm": 0.3805493414402008, "learning_rate": 4.038318728649934e-06, "loss": 0.0494, "step": 40140 }, { "epoch": 5.963166493390762, "grad_norm": 1.2674118280410767, "learning_rate": 4.036833506609239e-06, "loss": 0.0408, "step": 40150 }, { "epoch": 5.964651715431457, "grad_norm": 1.2300618886947632, "learning_rate": 4.035348284568543e-06, "loss": 0.0579, "step": 40160 }, { "epoch": 5.966136937472152, "grad_norm": 1.122583031654358, "learning_rate": 4.033863062527848e-06, "loss": 0.0691, "step": 40170 }, { "epoch": 5.967622159512847, "grad_norm": 0.8715793490409851, "learning_rate": 4.032377840487153e-06, "loss": 0.0592, "step": 40180 }, { "epoch": 5.969107381553542, "grad_norm": 0.9388965368270874, "learning_rate": 4.030892618446458e-06, "loss": 0.0593, "step": 40190 }, { "epoch": 5.970592603594238, "grad_norm": 0.6892751455307007, "learning_rate": 4.029407396405763e-06, "loss": 0.0458, "step": 40200 }, { "epoch": 5.9720778256349325, "grad_norm": 0.6505733132362366, "learning_rate": 4.027922174365068e-06, "loss": 0.0557, "step": 40210 }, { "epoch": 5.973563047675627, "grad_norm": 0.536372184753418, "learning_rate": 4.026436952324373e-06, "loss": 0.0512, "step": 40220 }, { "epoch": 5.975048269716322, "grad_norm": 1.115814208984375, "learning_rate": 4.024951730283678e-06, "loss": 0.0546, "step": 40230 }, { "epoch": 5.976533491757017, "grad_norm": 1.299026608467102, "learning_rate": 4.023466508242982e-06, "loss": 0.0513, "step": 40240 }, { "epoch": 5.978018713797713, "grad_norm": 0.5905170440673828, "learning_rate": 4.0219812862022874e-06, "loss": 0.0584, "step": 40250 }, { "epoch": 5.979503935838408, "grad_norm": 0.8045508861541748, "learning_rate": 4.020496064161593e-06, "loss": 0.0638, "step": 40260 }, { "epoch": 5.980989157879103, "grad_norm": 1.1870166063308716, "learning_rate": 4.019010842120897e-06, "loss": 0.0553, "step": 40270 }, { "epoch": 5.982474379919798, "grad_norm": 0.40031322836875916, "learning_rate": 4.017525620080202e-06, "loss": 0.0481, "step": 40280 }, { "epoch": 5.9839596019604935, "grad_norm": 0.9006701707839966, "learning_rate": 4.016040398039508e-06, "loss": 0.0577, "step": 40290 }, { "epoch": 5.985444824001188, "grad_norm": 0.5803741812705994, "learning_rate": 4.014555175998812e-06, "loss": 0.0607, "step": 40300 }, { "epoch": 5.986930046041883, "grad_norm": 0.7242305874824524, "learning_rate": 4.0130699539581166e-06, "loss": 0.0644, "step": 40310 }, { "epoch": 5.988415268082578, "grad_norm": 1.0055646896362305, "learning_rate": 4.011584731917422e-06, "loss": 0.0649, "step": 40320 }, { "epoch": 5.989900490123273, "grad_norm": 0.3593951463699341, "learning_rate": 4.010099509876727e-06, "loss": 0.0382, "step": 40330 }, { "epoch": 5.991385712163969, "grad_norm": 0.23760199546813965, "learning_rate": 4.0086142878360316e-06, "loss": 0.0543, "step": 40340 }, { "epoch": 5.992870934204664, "grad_norm": 0.9924305081367493, "learning_rate": 4.007129065795337e-06, "loss": 0.0602, "step": 40350 }, { "epoch": 5.994356156245359, "grad_norm": 0.8447095155715942, "learning_rate": 4.005643843754642e-06, "loss": 0.0511, "step": 40360 }, { "epoch": 5.995841378286054, "grad_norm": 1.5200226306915283, "learning_rate": 4.0041586217139466e-06, "loss": 0.0543, "step": 40370 }, { "epoch": 5.9973266003267485, "grad_norm": 0.6162705421447754, "learning_rate": 4.002673399673251e-06, "loss": 0.0462, "step": 40380 }, { "epoch": 5.998811822367444, "grad_norm": 0.6710197329521179, "learning_rate": 4.001188177632556e-06, "loss": 0.0659, "step": 40390 }, { "epoch": 6.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.05773457512259483, "eval_runtime": 212.5572, "eval_samples_per_second": 178.865, "eval_steps_per_second": 5.594, "step": 40398 }, { "epoch": 6.000297044408139, "grad_norm": 0.789023756980896, "learning_rate": 3.9997029555918616e-06, "loss": 0.0558, "step": 40400 }, { "epoch": 6.001782266448834, "grad_norm": 0.887060821056366, "learning_rate": 3.998217733551166e-06, "loss": 0.0514, "step": 40410 }, { "epoch": 6.003267488489529, "grad_norm": 0.6187435388565063, "learning_rate": 3.996732511510471e-06, "loss": 0.0489, "step": 40420 }, { "epoch": 6.004752710530224, "grad_norm": 0.9915494918823242, "learning_rate": 3.995247289469776e-06, "loss": 0.0643, "step": 40430 }, { "epoch": 6.00623793257092, "grad_norm": 0.8194555640220642, "learning_rate": 3.993762067429081e-06, "loss": 0.0729, "step": 40440 }, { "epoch": 6.0077231546116145, "grad_norm": 0.6049264669418335, "learning_rate": 3.992276845388386e-06, "loss": 0.0503, "step": 40450 }, { "epoch": 6.0092083766523094, "grad_norm": 0.9943747520446777, "learning_rate": 3.990791623347691e-06, "loss": 0.0452, "step": 40460 }, { "epoch": 6.010693598693004, "grad_norm": 0.8540377020835876, "learning_rate": 3.989306401306996e-06, "loss": 0.0512, "step": 40470 }, { "epoch": 6.012178820733699, "grad_norm": 0.9365292191505432, "learning_rate": 3.9878211792663e-06, "loss": 0.0633, "step": 40480 }, { "epoch": 6.013664042774395, "grad_norm": 0.8296148180961609, "learning_rate": 3.986335957225606e-06, "loss": 0.0633, "step": 40490 }, { "epoch": 6.01514926481509, "grad_norm": 0.6906760334968567, "learning_rate": 3.98485073518491e-06, "loss": 0.0568, "step": 40500 }, { "epoch": 6.016634486855785, "grad_norm": 0.9065415263175964, "learning_rate": 3.983365513144215e-06, "loss": 0.0665, "step": 40510 }, { "epoch": 6.01811970889648, "grad_norm": 0.38411182165145874, "learning_rate": 3.981880291103521e-06, "loss": 0.056, "step": 40520 }, { "epoch": 6.0196049309371755, "grad_norm": 0.6814888119697571, "learning_rate": 3.980395069062825e-06, "loss": 0.0489, "step": 40530 }, { "epoch": 6.02109015297787, "grad_norm": 1.3805969953536987, "learning_rate": 3.9789098470221295e-06, "loss": 0.0561, "step": 40540 }, { "epoch": 6.022575375018565, "grad_norm": 1.1943320035934448, "learning_rate": 3.977424624981435e-06, "loss": 0.0547, "step": 40550 }, { "epoch": 6.02406059705926, "grad_norm": 0.49964407086372375, "learning_rate": 3.97593940294074e-06, "loss": 0.0422, "step": 40560 }, { "epoch": 6.025545819099955, "grad_norm": 0.6587172150611877, "learning_rate": 3.9744541809000445e-06, "loss": 0.0711, "step": 40570 }, { "epoch": 6.027031041140651, "grad_norm": 1.0862164497375488, "learning_rate": 3.97296895885935e-06, "loss": 0.0494, "step": 40580 }, { "epoch": 6.028516263181346, "grad_norm": 1.1919859647750854, "learning_rate": 3.971483736818655e-06, "loss": 0.0492, "step": 40590 }, { "epoch": 6.030001485222041, "grad_norm": 1.0630143880844116, "learning_rate": 3.9699985147779595e-06, "loss": 0.0666, "step": 40600 }, { "epoch": 6.031486707262736, "grad_norm": 0.7935609221458435, "learning_rate": 3.968513292737264e-06, "loss": 0.0298, "step": 40610 }, { "epoch": 6.0329719293034305, "grad_norm": 0.8281997442245483, "learning_rate": 3.96702807069657e-06, "loss": 0.0451, "step": 40620 }, { "epoch": 6.034457151344126, "grad_norm": 0.31102851033210754, "learning_rate": 3.9655428486558745e-06, "loss": 0.0701, "step": 40630 }, { "epoch": 6.035942373384821, "grad_norm": 0.6675470471382141, "learning_rate": 3.964057626615179e-06, "loss": 0.0577, "step": 40640 }, { "epoch": 6.037427595425516, "grad_norm": 1.168430209159851, "learning_rate": 3.962572404574484e-06, "loss": 0.0434, "step": 40650 }, { "epoch": 6.038912817466211, "grad_norm": 1.6854634284973145, "learning_rate": 3.9610871825337895e-06, "loss": 0.0683, "step": 40660 }, { "epoch": 6.040398039506906, "grad_norm": 0.688122034072876, "learning_rate": 3.959601960493094e-06, "loss": 0.0568, "step": 40670 }, { "epoch": 6.041883261547602, "grad_norm": 0.9928855895996094, "learning_rate": 3.958116738452399e-06, "loss": 0.0485, "step": 40680 }, { "epoch": 6.043368483588297, "grad_norm": 0.8839887380599976, "learning_rate": 3.956631516411704e-06, "loss": 0.0527, "step": 40690 }, { "epoch": 6.0448537056289915, "grad_norm": 2.176649570465088, "learning_rate": 3.955146294371009e-06, "loss": 0.0759, "step": 40700 }, { "epoch": 6.046338927669686, "grad_norm": 0.7272756099700928, "learning_rate": 3.953661072330313e-06, "loss": 0.0462, "step": 40710 }, { "epoch": 6.047824149710381, "grad_norm": 0.5668994784355164, "learning_rate": 3.952175850289619e-06, "loss": 0.0561, "step": 40720 }, { "epoch": 6.049309371751077, "grad_norm": 0.7731595635414124, "learning_rate": 3.950690628248924e-06, "loss": 0.074, "step": 40730 }, { "epoch": 6.050794593791772, "grad_norm": 0.7913956642150879, "learning_rate": 3.949205406208228e-06, "loss": 0.0644, "step": 40740 }, { "epoch": 6.052279815832467, "grad_norm": 0.48246562480926514, "learning_rate": 3.947720184167534e-06, "loss": 0.0449, "step": 40750 }, { "epoch": 6.053765037873162, "grad_norm": 1.6649562120437622, "learning_rate": 3.946234962126838e-06, "loss": 0.0695, "step": 40760 }, { "epoch": 6.055250259913857, "grad_norm": 0.8541234135627747, "learning_rate": 3.944749740086143e-06, "loss": 0.0526, "step": 40770 }, { "epoch": 6.0567354819545525, "grad_norm": 1.0169918537139893, "learning_rate": 3.943264518045448e-06, "loss": 0.0619, "step": 40780 }, { "epoch": 6.058220703995247, "grad_norm": 1.1719691753387451, "learning_rate": 3.941779296004753e-06, "loss": 0.0566, "step": 40790 }, { "epoch": 6.059705926035942, "grad_norm": 0.8269428014755249, "learning_rate": 3.9402940739640575e-06, "loss": 0.0501, "step": 40800 }, { "epoch": 6.061191148076637, "grad_norm": 1.1896958351135254, "learning_rate": 3.938808851923363e-06, "loss": 0.0516, "step": 40810 }, { "epoch": 6.062676370117333, "grad_norm": 1.3691949844360352, "learning_rate": 3.937323629882668e-06, "loss": 0.0591, "step": 40820 }, { "epoch": 6.064161592158028, "grad_norm": 0.9588063359260559, "learning_rate": 3.9358384078419725e-06, "loss": 0.0489, "step": 40830 }, { "epoch": 6.065646814198723, "grad_norm": 1.0517445802688599, "learning_rate": 3.934353185801277e-06, "loss": 0.0603, "step": 40840 }, { "epoch": 6.067132036239418, "grad_norm": 1.123839020729065, "learning_rate": 3.932867963760583e-06, "loss": 0.0402, "step": 40850 }, { "epoch": 6.068617258280113, "grad_norm": 1.0038114786148071, "learning_rate": 3.9313827417198875e-06, "loss": 0.0615, "step": 40860 }, { "epoch": 6.070102480320808, "grad_norm": 0.554094672203064, "learning_rate": 3.929897519679192e-06, "loss": 0.06, "step": 40870 }, { "epoch": 6.071587702361503, "grad_norm": 1.453340768814087, "learning_rate": 3.928412297638497e-06, "loss": 0.0546, "step": 40880 }, { "epoch": 6.073072924402198, "grad_norm": 1.0778160095214844, "learning_rate": 3.9269270755978025e-06, "loss": 0.0594, "step": 40890 }, { "epoch": 6.074558146442893, "grad_norm": 1.0551393032073975, "learning_rate": 3.925441853557107e-06, "loss": 0.052, "step": 40900 }, { "epoch": 6.076043368483588, "grad_norm": 1.485189437866211, "learning_rate": 3.923956631516412e-06, "loss": 0.0466, "step": 40910 }, { "epoch": 6.077528590524284, "grad_norm": 0.8034722208976746, "learning_rate": 3.9224714094757175e-06, "loss": 0.0542, "step": 40920 }, { "epoch": 6.079013812564979, "grad_norm": 0.6016208529472351, "learning_rate": 3.920986187435022e-06, "loss": 0.0586, "step": 40930 }, { "epoch": 6.0804990346056735, "grad_norm": 1.325735092163086, "learning_rate": 3.919500965394326e-06, "loss": 0.0545, "step": 40940 }, { "epoch": 6.081984256646368, "grad_norm": 1.393202304840088, "learning_rate": 3.918015743353632e-06, "loss": 0.0534, "step": 40950 }, { "epoch": 6.083469478687063, "grad_norm": 0.6158031225204468, "learning_rate": 3.916530521312937e-06, "loss": 0.0582, "step": 40960 }, { "epoch": 6.084954700727759, "grad_norm": 0.6937658190727234, "learning_rate": 3.915045299272241e-06, "loss": 0.0434, "step": 40970 }, { "epoch": 6.086439922768454, "grad_norm": 0.5090669989585876, "learning_rate": 3.913560077231547e-06, "loss": 0.0585, "step": 40980 }, { "epoch": 6.087925144809149, "grad_norm": 0.6960013508796692, "learning_rate": 3.912074855190852e-06, "loss": 0.0682, "step": 40990 }, { "epoch": 6.089410366849844, "grad_norm": 1.6729462146759033, "learning_rate": 3.910589633150156e-06, "loss": 0.0846, "step": 41000 }, { "epoch": 6.090895588890539, "grad_norm": 0.41924574971199036, "learning_rate": 3.909104411109461e-06, "loss": 0.0695, "step": 41010 }, { "epoch": 6.0923808109312345, "grad_norm": 0.9897835850715637, "learning_rate": 3.907619189068766e-06, "loss": 0.0571, "step": 41020 }, { "epoch": 6.093866032971929, "grad_norm": 0.7119272947311401, "learning_rate": 3.906133967028071e-06, "loss": 0.0722, "step": 41030 }, { "epoch": 6.095351255012624, "grad_norm": 0.5422357320785522, "learning_rate": 3.904648744987376e-06, "loss": 0.0471, "step": 41040 }, { "epoch": 6.096836477053319, "grad_norm": 0.6966010928153992, "learning_rate": 3.903163522946681e-06, "loss": 0.0579, "step": 41050 }, { "epoch": 6.098321699094015, "grad_norm": 1.064351201057434, "learning_rate": 3.9016783009059855e-06, "loss": 0.0526, "step": 41060 }, { "epoch": 6.09980692113471, "grad_norm": 0.25419992208480835, "learning_rate": 3.900193078865291e-06, "loss": 0.0447, "step": 41070 }, { "epoch": 6.101292143175405, "grad_norm": 1.352564811706543, "learning_rate": 3.898707856824595e-06, "loss": 0.0686, "step": 41080 }, { "epoch": 6.1027773652161, "grad_norm": 0.6406208872795105, "learning_rate": 3.8972226347839005e-06, "loss": 0.05, "step": 41090 }, { "epoch": 6.104262587256795, "grad_norm": 0.680209755897522, "learning_rate": 3.895737412743205e-06, "loss": 0.0462, "step": 41100 }, { "epoch": 6.10574780929749, "grad_norm": 0.8571810126304626, "learning_rate": 3.89425219070251e-06, "loss": 0.0793, "step": 41110 }, { "epoch": 6.107233031338185, "grad_norm": 0.9361696243286133, "learning_rate": 3.8927669686618155e-06, "loss": 0.0585, "step": 41120 }, { "epoch": 6.10871825337888, "grad_norm": 1.5825921297073364, "learning_rate": 3.89128174662112e-06, "loss": 0.0554, "step": 41130 }, { "epoch": 6.110203475419575, "grad_norm": 0.8863343596458435, "learning_rate": 3.889796524580425e-06, "loss": 0.0589, "step": 41140 }, { "epoch": 6.11168869746027, "grad_norm": 1.2165135145187378, "learning_rate": 3.8883113025397305e-06, "loss": 0.0718, "step": 41150 }, { "epoch": 6.113173919500966, "grad_norm": 1.1842724084854126, "learning_rate": 3.886826080499035e-06, "loss": 0.0721, "step": 41160 }, { "epoch": 6.114659141541661, "grad_norm": 0.7539393901824951, "learning_rate": 3.885340858458339e-06, "loss": 0.0548, "step": 41170 }, { "epoch": 6.116144363582356, "grad_norm": 0.5141851902008057, "learning_rate": 3.883855636417645e-06, "loss": 0.036, "step": 41180 }, { "epoch": 6.1176295856230505, "grad_norm": 1.1090915203094482, "learning_rate": 3.88237041437695e-06, "loss": 0.0628, "step": 41190 }, { "epoch": 6.119114807663745, "grad_norm": 0.4115223288536072, "learning_rate": 3.880885192336254e-06, "loss": 0.0512, "step": 41200 }, { "epoch": 6.120600029704441, "grad_norm": 0.7853782176971436, "learning_rate": 3.87939997029556e-06, "loss": 0.0588, "step": 41210 }, { "epoch": 6.122085251745136, "grad_norm": 0.8141002058982849, "learning_rate": 3.877914748254865e-06, "loss": 0.057, "step": 41220 }, { "epoch": 6.123570473785831, "grad_norm": 1.1658971309661865, "learning_rate": 3.876429526214169e-06, "loss": 0.0595, "step": 41230 }, { "epoch": 6.125055695826526, "grad_norm": 0.9855948090553284, "learning_rate": 3.874944304173474e-06, "loss": 0.0437, "step": 41240 }, { "epoch": 6.126540917867221, "grad_norm": 0.8863463401794434, "learning_rate": 3.873459082132779e-06, "loss": 0.0607, "step": 41250 }, { "epoch": 6.128026139907917, "grad_norm": 0.5298382639884949, "learning_rate": 3.871973860092084e-06, "loss": 0.0523, "step": 41260 }, { "epoch": 6.1295113619486115, "grad_norm": 0.23882558941841125, "learning_rate": 3.870488638051389e-06, "loss": 0.052, "step": 41270 }, { "epoch": 6.130996583989306, "grad_norm": 1.4826135635375977, "learning_rate": 3.869003416010694e-06, "loss": 0.0688, "step": 41280 }, { "epoch": 6.132481806030001, "grad_norm": 0.44090694189071655, "learning_rate": 3.867518193969999e-06, "loss": 0.0564, "step": 41290 }, { "epoch": 6.133967028070696, "grad_norm": 1.0146280527114868, "learning_rate": 3.866032971929304e-06, "loss": 0.0625, "step": 41300 }, { "epoch": 6.135452250111392, "grad_norm": 0.3917138874530792, "learning_rate": 3.864547749888608e-06, "loss": 0.0495, "step": 41310 }, { "epoch": 6.136937472152087, "grad_norm": 0.6822810769081116, "learning_rate": 3.8630625278479135e-06, "loss": 0.066, "step": 41320 }, { "epoch": 6.138422694192782, "grad_norm": 0.5941704511642456, "learning_rate": 3.861577305807219e-06, "loss": 0.0604, "step": 41330 }, { "epoch": 6.139907916233477, "grad_norm": 0.5531407594680786, "learning_rate": 3.860092083766523e-06, "loss": 0.073, "step": 41340 }, { "epoch": 6.1413931382741715, "grad_norm": 1.3868801593780518, "learning_rate": 3.8586068617258285e-06, "loss": 0.0564, "step": 41350 }, { "epoch": 6.142878360314867, "grad_norm": 0.8457816243171692, "learning_rate": 3.857121639685133e-06, "loss": 0.04, "step": 41360 }, { "epoch": 6.144363582355562, "grad_norm": 0.671506404876709, "learning_rate": 3.855636417644438e-06, "loss": 0.0342, "step": 41370 }, { "epoch": 6.145848804396257, "grad_norm": 0.5517304539680481, "learning_rate": 3.8541511956037435e-06, "loss": 0.0502, "step": 41380 }, { "epoch": 6.147334026436952, "grad_norm": 0.5606401562690735, "learning_rate": 3.852665973563048e-06, "loss": 0.0605, "step": 41390 }, { "epoch": 6.148819248477648, "grad_norm": 1.4839414358139038, "learning_rate": 3.851180751522353e-06, "loss": 0.0797, "step": 41400 }, { "epoch": 6.150304470518343, "grad_norm": 1.2912324666976929, "learning_rate": 3.849695529481658e-06, "loss": 0.0874, "step": 41410 }, { "epoch": 6.151789692559038, "grad_norm": 1.0865097045898438, "learning_rate": 3.848210307440963e-06, "loss": 0.0436, "step": 41420 }, { "epoch": 6.1532749145997325, "grad_norm": 0.8545838594436646, "learning_rate": 3.846725085400267e-06, "loss": 0.0463, "step": 41430 }, { "epoch": 6.154760136640427, "grad_norm": 0.6500738859176636, "learning_rate": 3.845239863359573e-06, "loss": 0.0564, "step": 41440 }, { "epoch": 6.156245358681123, "grad_norm": 1.0389630794525146, "learning_rate": 3.843754641318878e-06, "loss": 0.0582, "step": 41450 }, { "epoch": 6.157730580721818, "grad_norm": 1.0748189687728882, "learning_rate": 3.842269419278182e-06, "loss": 0.0662, "step": 41460 }, { "epoch": 6.159215802762513, "grad_norm": 1.0013229846954346, "learning_rate": 3.840784197237487e-06, "loss": 0.0622, "step": 41470 }, { "epoch": 6.160701024803208, "grad_norm": 1.26486074924469, "learning_rate": 3.839298975196792e-06, "loss": 0.0456, "step": 41480 }, { "epoch": 6.162186246843903, "grad_norm": 0.8088564276695251, "learning_rate": 3.837813753156097e-06, "loss": 0.0574, "step": 41490 }, { "epoch": 6.163671468884599, "grad_norm": 0.7139611840248108, "learning_rate": 3.836328531115402e-06, "loss": 0.074, "step": 41500 }, { "epoch": 6.1651566909252935, "grad_norm": 0.7177110910415649, "learning_rate": 3.834843309074707e-06, "loss": 0.0488, "step": 41510 }, { "epoch": 6.166641912965988, "grad_norm": 1.611185908317566, "learning_rate": 3.833358087034012e-06, "loss": 0.0572, "step": 41520 }, { "epoch": 6.168127135006683, "grad_norm": 0.7332130074501038, "learning_rate": 3.831872864993317e-06, "loss": 0.0672, "step": 41530 }, { "epoch": 6.169612357047378, "grad_norm": 1.156199336051941, "learning_rate": 3.830387642952621e-06, "loss": 0.0387, "step": 41540 }, { "epoch": 6.171097579088074, "grad_norm": 0.6302946209907532, "learning_rate": 3.828902420911927e-06, "loss": 0.0488, "step": 41550 }, { "epoch": 6.172582801128769, "grad_norm": 0.9586188793182373, "learning_rate": 3.827417198871232e-06, "loss": 0.0664, "step": 41560 }, { "epoch": 6.174068023169464, "grad_norm": 0.8568915128707886, "learning_rate": 3.825931976830536e-06, "loss": 0.0438, "step": 41570 }, { "epoch": 6.175553245210159, "grad_norm": 0.9365776181221008, "learning_rate": 3.8244467547898414e-06, "loss": 0.051, "step": 41580 }, { "epoch": 6.177038467250854, "grad_norm": 0.6824678778648376, "learning_rate": 3.822961532749147e-06, "loss": 0.0568, "step": 41590 }, { "epoch": 6.178523689291549, "grad_norm": 0.9566531777381897, "learning_rate": 3.821476310708451e-06, "loss": 0.0667, "step": 41600 }, { "epoch": 6.180008911332244, "grad_norm": 1.0182305574417114, "learning_rate": 3.8199910886677564e-06, "loss": 0.0707, "step": 41610 }, { "epoch": 6.181494133372939, "grad_norm": 1.1408592462539673, "learning_rate": 3.818505866627061e-06, "loss": 0.074, "step": 41620 }, { "epoch": 6.182979355413634, "grad_norm": 1.2050893306732178, "learning_rate": 3.817020644586366e-06, "loss": 0.0684, "step": 41630 }, { "epoch": 6.18446457745433, "grad_norm": 0.6228426098823547, "learning_rate": 3.815535422545671e-06, "loss": 0.0509, "step": 41640 }, { "epoch": 6.185949799495025, "grad_norm": 0.21518337726593018, "learning_rate": 3.8140502005049754e-06, "loss": 0.0554, "step": 41650 }, { "epoch": 6.18743502153572, "grad_norm": 0.8894339799880981, "learning_rate": 3.812564978464281e-06, "loss": 0.0525, "step": 41660 }, { "epoch": 6.188920243576415, "grad_norm": 0.8132939338684082, "learning_rate": 3.8110797564235856e-06, "loss": 0.0754, "step": 41670 }, { "epoch": 6.1904054656171095, "grad_norm": 1.0716118812561035, "learning_rate": 3.8095945343828904e-06, "loss": 0.0672, "step": 41680 }, { "epoch": 6.191890687657805, "grad_norm": 1.131106972694397, "learning_rate": 3.8081093123421953e-06, "loss": 0.0582, "step": 41690 }, { "epoch": 6.1933759096985, "grad_norm": 1.8588736057281494, "learning_rate": 3.8066240903015006e-06, "loss": 0.0522, "step": 41700 }, { "epoch": 6.194861131739195, "grad_norm": 0.807196319103241, "learning_rate": 3.8051388682608054e-06, "loss": 0.0569, "step": 41710 }, { "epoch": 6.19634635377989, "grad_norm": 0.8074870109558105, "learning_rate": 3.8036536462201103e-06, "loss": 0.0626, "step": 41720 }, { "epoch": 6.197831575820585, "grad_norm": 0.7318360805511475, "learning_rate": 3.8021684241794147e-06, "loss": 0.047, "step": 41730 }, { "epoch": 6.199316797861281, "grad_norm": 0.2228417545557022, "learning_rate": 3.8006832021387204e-06, "loss": 0.0573, "step": 41740 }, { "epoch": 6.2008020199019755, "grad_norm": 0.7176522016525269, "learning_rate": 3.799197980098025e-06, "loss": 0.0484, "step": 41750 }, { "epoch": 6.2022872419426704, "grad_norm": 1.6052658557891846, "learning_rate": 3.7977127580573297e-06, "loss": 0.0629, "step": 41760 }, { "epoch": 6.203772463983365, "grad_norm": 0.9158141016960144, "learning_rate": 3.796227536016635e-06, "loss": 0.0677, "step": 41770 }, { "epoch": 6.20525768602406, "grad_norm": 0.7459770441055298, "learning_rate": 3.79474231397594e-06, "loss": 0.0731, "step": 41780 }, { "epoch": 6.206742908064756, "grad_norm": 1.1536628007888794, "learning_rate": 3.7932570919352447e-06, "loss": 0.0617, "step": 41790 }, { "epoch": 6.208228130105451, "grad_norm": 0.9005663394927979, "learning_rate": 3.7917718698945496e-06, "loss": 0.0525, "step": 41800 }, { "epoch": 6.209713352146146, "grad_norm": 0.64149409532547, "learning_rate": 3.790286647853855e-06, "loss": 0.0592, "step": 41810 }, { "epoch": 6.211198574186841, "grad_norm": 0.7573428153991699, "learning_rate": 3.7888014258131593e-06, "loss": 0.0608, "step": 41820 }, { "epoch": 6.212683796227536, "grad_norm": 0.9015231728553772, "learning_rate": 3.787316203772464e-06, "loss": 0.0783, "step": 41830 }, { "epoch": 6.214169018268231, "grad_norm": 0.5844115018844604, "learning_rate": 3.785830981731769e-06, "loss": 0.0541, "step": 41840 }, { "epoch": 6.215654240308926, "grad_norm": 0.7862566709518433, "learning_rate": 3.7843457596910743e-06, "loss": 0.0405, "step": 41850 }, { "epoch": 6.217139462349621, "grad_norm": 0.7765684723854065, "learning_rate": 3.782860537650379e-06, "loss": 0.0626, "step": 41860 }, { "epoch": 6.218624684390316, "grad_norm": 1.4600820541381836, "learning_rate": 3.781375315609684e-06, "loss": 0.0644, "step": 41870 }, { "epoch": 6.220109906431011, "grad_norm": 0.4247475564479828, "learning_rate": 3.7798900935689884e-06, "loss": 0.0493, "step": 41880 }, { "epoch": 6.221595128471707, "grad_norm": 1.0825570821762085, "learning_rate": 3.778404871528294e-06, "loss": 0.0753, "step": 41890 }, { "epoch": 6.223080350512402, "grad_norm": 0.660193681716919, "learning_rate": 3.7769196494875986e-06, "loss": 0.0658, "step": 41900 }, { "epoch": 6.224565572553097, "grad_norm": 0.6365493535995483, "learning_rate": 3.7754344274469034e-06, "loss": 0.0538, "step": 41910 }, { "epoch": 6.2260507945937915, "grad_norm": 0.7567954659461975, "learning_rate": 3.7739492054062087e-06, "loss": 0.0612, "step": 41920 }, { "epoch": 6.227536016634486, "grad_norm": 0.9321280717849731, "learning_rate": 3.7724639833655136e-06, "loss": 0.0716, "step": 41930 }, { "epoch": 6.229021238675182, "grad_norm": 1.1202092170715332, "learning_rate": 3.7709787613248184e-06, "loss": 0.0496, "step": 41940 }, { "epoch": 6.230506460715877, "grad_norm": 0.5768749713897705, "learning_rate": 3.7694935392841233e-06, "loss": 0.0493, "step": 41950 }, { "epoch": 6.231991682756572, "grad_norm": 0.6443141102790833, "learning_rate": 3.7680083172434285e-06, "loss": 0.0634, "step": 41960 }, { "epoch": 6.233476904797267, "grad_norm": 0.8601978421211243, "learning_rate": 3.766523095202733e-06, "loss": 0.062, "step": 41970 }, { "epoch": 6.234962126837963, "grad_norm": 0.9269450902938843, "learning_rate": 3.765037873162038e-06, "loss": 0.0753, "step": 41980 }, { "epoch": 6.236447348878658, "grad_norm": 0.7924551367759705, "learning_rate": 3.7635526511213427e-06, "loss": 0.0643, "step": 41990 }, { "epoch": 6.2379325709193525, "grad_norm": 0.8593719005584717, "learning_rate": 3.762067429080648e-06, "loss": 0.0726, "step": 42000 }, { "epoch": 6.239417792960047, "grad_norm": 0.5367026329040527, "learning_rate": 3.760582207039953e-06, "loss": 0.0598, "step": 42010 }, { "epoch": 6.240903015000742, "grad_norm": 0.9355032444000244, "learning_rate": 3.7590969849992577e-06, "loss": 0.0626, "step": 42020 }, { "epoch": 6.242388237041438, "grad_norm": 1.0359206199645996, "learning_rate": 3.7576117629585625e-06, "loss": 0.0491, "step": 42030 }, { "epoch": 6.243873459082133, "grad_norm": 0.44826602935791016, "learning_rate": 3.756126540917868e-06, "loss": 0.0636, "step": 42040 }, { "epoch": 6.245358681122828, "grad_norm": 0.7758882641792297, "learning_rate": 3.7546413188771723e-06, "loss": 0.0519, "step": 42050 }, { "epoch": 6.246843903163523, "grad_norm": 1.433592677116394, "learning_rate": 3.753156096836477e-06, "loss": 0.0608, "step": 42060 }, { "epoch": 6.248329125204218, "grad_norm": 0.5968450307846069, "learning_rate": 3.7516708747957824e-06, "loss": 0.054, "step": 42070 }, { "epoch": 6.2498143472449135, "grad_norm": 0.689393162727356, "learning_rate": 3.7501856527550873e-06, "loss": 0.0664, "step": 42080 }, { "epoch": 6.251299569285608, "grad_norm": 0.7492974400520325, "learning_rate": 3.748700430714392e-06, "loss": 0.0659, "step": 42090 }, { "epoch": 6.252784791326303, "grad_norm": 0.3340299129486084, "learning_rate": 3.747215208673697e-06, "loss": 0.0448, "step": 42100 }, { "epoch": 6.254270013366998, "grad_norm": 1.1328703165054321, "learning_rate": 3.7457299866330022e-06, "loss": 0.0377, "step": 42110 }, { "epoch": 6.255755235407693, "grad_norm": 0.6826351881027222, "learning_rate": 3.744244764592307e-06, "loss": 0.0641, "step": 42120 }, { "epoch": 6.257240457448389, "grad_norm": 0.5043858885765076, "learning_rate": 3.7427595425516115e-06, "loss": 0.0525, "step": 42130 }, { "epoch": 6.258725679489084, "grad_norm": 0.7694330811500549, "learning_rate": 3.7412743205109164e-06, "loss": 0.0574, "step": 42140 }, { "epoch": 6.260210901529779, "grad_norm": 0.6579335331916809, "learning_rate": 3.7397890984702217e-06, "loss": 0.0419, "step": 42150 }, { "epoch": 6.261696123570474, "grad_norm": 0.3473973572254181, "learning_rate": 3.7383038764295265e-06, "loss": 0.0327, "step": 42160 }, { "epoch": 6.2631813456111685, "grad_norm": 0.5329101085662842, "learning_rate": 3.7368186543888314e-06, "loss": 0.0743, "step": 42170 }, { "epoch": 6.264666567651864, "grad_norm": 1.2124110460281372, "learning_rate": 3.7353334323481367e-06, "loss": 0.0701, "step": 42180 }, { "epoch": 6.266151789692559, "grad_norm": 1.588112711906433, "learning_rate": 3.7338482103074415e-06, "loss": 0.0664, "step": 42190 }, { "epoch": 6.267637011733254, "grad_norm": 0.6495673656463623, "learning_rate": 3.732362988266746e-06, "loss": 0.0528, "step": 42200 }, { "epoch": 6.269122233773949, "grad_norm": 0.4462796747684479, "learning_rate": 3.730877766226051e-06, "loss": 0.0413, "step": 42210 }, { "epoch": 6.270607455814645, "grad_norm": 0.7272733449935913, "learning_rate": 3.729392544185356e-06, "loss": 0.0767, "step": 42220 }, { "epoch": 6.27209267785534, "grad_norm": 1.4103509187698364, "learning_rate": 3.727907322144661e-06, "loss": 0.0541, "step": 42230 }, { "epoch": 6.2735778998960345, "grad_norm": 0.6002963781356812, "learning_rate": 3.726422100103966e-06, "loss": 0.0602, "step": 42240 }, { "epoch": 6.275063121936729, "grad_norm": 0.6196624636650085, "learning_rate": 3.7249368780632707e-06, "loss": 0.0539, "step": 42250 }, { "epoch": 6.276548343977424, "grad_norm": 0.47498103976249695, "learning_rate": 3.723451656022576e-06, "loss": 0.0648, "step": 42260 }, { "epoch": 6.27803356601812, "grad_norm": 0.428314208984375, "learning_rate": 3.721966433981881e-06, "loss": 0.0481, "step": 42270 }, { "epoch": 6.279518788058815, "grad_norm": 0.5692898631095886, "learning_rate": 3.7204812119411852e-06, "loss": 0.0421, "step": 42280 }, { "epoch": 6.28100401009951, "grad_norm": 2.004471778869629, "learning_rate": 3.71899598990049e-06, "loss": 0.0619, "step": 42290 }, { "epoch": 6.282489232140205, "grad_norm": 0.504784882068634, "learning_rate": 3.7175107678597954e-06, "loss": 0.0467, "step": 42300 }, { "epoch": 6.2839744541809, "grad_norm": 0.5373015403747559, "learning_rate": 3.7160255458191002e-06, "loss": 0.0622, "step": 42310 }, { "epoch": 6.2854596762215955, "grad_norm": 0.6683239340782166, "learning_rate": 3.714540323778405e-06, "loss": 0.0548, "step": 42320 }, { "epoch": 6.28694489826229, "grad_norm": 0.7173686623573303, "learning_rate": 3.7130551017377104e-06, "loss": 0.0595, "step": 42330 }, { "epoch": 6.288430120302985, "grad_norm": 0.7080352902412415, "learning_rate": 3.7115698796970152e-06, "loss": 0.0579, "step": 42340 }, { "epoch": 6.28991534234368, "grad_norm": 0.9369353652000427, "learning_rate": 3.7100846576563197e-06, "loss": 0.0715, "step": 42350 }, { "epoch": 6.291400564384375, "grad_norm": 0.34376099705696106, "learning_rate": 3.7085994356156245e-06, "loss": 0.0415, "step": 42360 }, { "epoch": 6.292885786425071, "grad_norm": 0.711609423160553, "learning_rate": 3.70711421357493e-06, "loss": 0.0469, "step": 42370 }, { "epoch": 6.294371008465766, "grad_norm": 1.4193964004516602, "learning_rate": 3.7056289915342347e-06, "loss": 0.065, "step": 42380 }, { "epoch": 6.295856230506461, "grad_norm": 0.6497589349746704, "learning_rate": 3.7041437694935395e-06, "loss": 0.053, "step": 42390 }, { "epoch": 6.297341452547156, "grad_norm": 1.2740793228149414, "learning_rate": 3.7026585474528444e-06, "loss": 0.0514, "step": 42400 }, { "epoch": 6.2988266745878505, "grad_norm": 0.4431326985359192, "learning_rate": 3.7011733254121496e-06, "loss": 0.0641, "step": 42410 }, { "epoch": 6.300311896628546, "grad_norm": 0.61473149061203, "learning_rate": 3.6996881033714545e-06, "loss": 0.0434, "step": 42420 }, { "epoch": 6.301797118669241, "grad_norm": 0.9502345323562622, "learning_rate": 3.698202881330759e-06, "loss": 0.0632, "step": 42430 }, { "epoch": 6.303282340709936, "grad_norm": 0.5800604820251465, "learning_rate": 3.6967176592900646e-06, "loss": 0.0564, "step": 42440 }, { "epoch": 6.304767562750631, "grad_norm": 0.6123915910720825, "learning_rate": 3.695232437249369e-06, "loss": 0.0532, "step": 42450 }, { "epoch": 6.306252784791326, "grad_norm": 1.1906459331512451, "learning_rate": 3.693747215208674e-06, "loss": 0.062, "step": 42460 }, { "epoch": 6.307738006832022, "grad_norm": 0.4894062876701355, "learning_rate": 3.692261993167979e-06, "loss": 0.0729, "step": 42470 }, { "epoch": 6.309223228872717, "grad_norm": 0.6452047824859619, "learning_rate": 3.690776771127284e-06, "loss": 0.0458, "step": 42480 }, { "epoch": 6.3107084509134115, "grad_norm": 0.8813139200210571, "learning_rate": 3.689291549086589e-06, "loss": 0.052, "step": 42490 }, { "epoch": 6.312193672954106, "grad_norm": 0.7368887066841125, "learning_rate": 3.6878063270458938e-06, "loss": 0.0642, "step": 42500 }, { "epoch": 6.313678894994801, "grad_norm": 0.7231339812278748, "learning_rate": 3.6863211050051982e-06, "loss": 0.0519, "step": 42510 }, { "epoch": 6.315164117035497, "grad_norm": 0.9437358975410461, "learning_rate": 3.6848358829645035e-06, "loss": 0.0581, "step": 42520 }, { "epoch": 6.316649339076192, "grad_norm": 0.8325836658477783, "learning_rate": 3.6833506609238084e-06, "loss": 0.0667, "step": 42530 }, { "epoch": 6.318134561116887, "grad_norm": 1.0628379583358765, "learning_rate": 3.681865438883113e-06, "loss": 0.0598, "step": 42540 }, { "epoch": 6.319619783157582, "grad_norm": 1.2732975482940674, "learning_rate": 3.680380216842418e-06, "loss": 0.067, "step": 42550 }, { "epoch": 6.321105005198277, "grad_norm": 0.9922568202018738, "learning_rate": 3.6788949948017233e-06, "loss": 0.0597, "step": 42560 }, { "epoch": 6.3225902272389725, "grad_norm": 0.9544100761413574, "learning_rate": 3.677409772761028e-06, "loss": 0.0458, "step": 42570 }, { "epoch": 6.324075449279667, "grad_norm": 1.7845814228057861, "learning_rate": 3.6759245507203326e-06, "loss": 0.0558, "step": 42580 }, { "epoch": 6.325560671320362, "grad_norm": 0.8888192772865295, "learning_rate": 3.6744393286796383e-06, "loss": 0.0806, "step": 42590 }, { "epoch": 6.327045893361057, "grad_norm": 0.7353573441505432, "learning_rate": 3.6729541066389428e-06, "loss": 0.0476, "step": 42600 }, { "epoch": 6.328531115401753, "grad_norm": 1.0673481225967407, "learning_rate": 3.6714688845982476e-06, "loss": 0.0604, "step": 42610 }, { "epoch": 6.330016337442448, "grad_norm": 0.9482265710830688, "learning_rate": 3.6699836625575525e-06, "loss": 0.051, "step": 42620 }, { "epoch": 6.331501559483143, "grad_norm": 0.5500361323356628, "learning_rate": 3.6684984405168578e-06, "loss": 0.0568, "step": 42630 }, { "epoch": 6.332986781523838, "grad_norm": 0.8495878577232361, "learning_rate": 3.6670132184761626e-06, "loss": 0.0537, "step": 42640 }, { "epoch": 6.3344720035645325, "grad_norm": 0.6985103487968445, "learning_rate": 3.6655279964354675e-06, "loss": 0.0455, "step": 42650 }, { "epoch": 6.335957225605228, "grad_norm": 0.6907896995544434, "learning_rate": 3.664042774394772e-06, "loss": 0.0501, "step": 42660 }, { "epoch": 6.337442447645923, "grad_norm": 1.1214615106582642, "learning_rate": 3.662557552354077e-06, "loss": 0.0546, "step": 42670 }, { "epoch": 6.338927669686618, "grad_norm": 0.7439780831336975, "learning_rate": 3.661072330313382e-06, "loss": 0.0566, "step": 42680 }, { "epoch": 6.340412891727313, "grad_norm": 0.47073668241500854, "learning_rate": 3.659587108272687e-06, "loss": 0.0488, "step": 42690 }, { "epoch": 6.341898113768008, "grad_norm": 0.8119236826896667, "learning_rate": 3.658101886231992e-06, "loss": 0.0616, "step": 42700 }, { "epoch": 6.343383335808704, "grad_norm": 0.8019691109657288, "learning_rate": 3.656616664191297e-06, "loss": 0.0604, "step": 42710 }, { "epoch": 6.344868557849399, "grad_norm": 0.8019810914993286, "learning_rate": 3.655131442150602e-06, "loss": 0.0516, "step": 42720 }, { "epoch": 6.3463537798900935, "grad_norm": 0.7432687282562256, "learning_rate": 3.6536462201099063e-06, "loss": 0.0644, "step": 42730 }, { "epoch": 6.347839001930788, "grad_norm": 0.9278331398963928, "learning_rate": 3.652160998069212e-06, "loss": 0.0677, "step": 42740 }, { "epoch": 6.349324223971483, "grad_norm": 0.8735617399215698, "learning_rate": 3.6506757760285165e-06, "loss": 0.0416, "step": 42750 }, { "epoch": 6.350809446012179, "grad_norm": 0.627546489238739, "learning_rate": 3.6491905539878213e-06, "loss": 0.056, "step": 42760 }, { "epoch": 6.352294668052874, "grad_norm": 0.3710090219974518, "learning_rate": 3.647705331947126e-06, "loss": 0.0572, "step": 42770 }, { "epoch": 6.353779890093569, "grad_norm": 0.8109626770019531, "learning_rate": 3.6462201099064315e-06, "loss": 0.0519, "step": 42780 }, { "epoch": 6.355265112134264, "grad_norm": 0.7307920455932617, "learning_rate": 3.6447348878657363e-06, "loss": 0.0624, "step": 42790 }, { "epoch": 6.35675033417496, "grad_norm": 0.569338321685791, "learning_rate": 3.643249665825041e-06, "loss": 0.0626, "step": 42800 }, { "epoch": 6.3582355562156545, "grad_norm": 1.0345954895019531, "learning_rate": 3.6417644437843456e-06, "loss": 0.0639, "step": 42810 }, { "epoch": 6.359720778256349, "grad_norm": 0.6668635606765747, "learning_rate": 3.6402792217436513e-06, "loss": 0.0512, "step": 42820 }, { "epoch": 6.361206000297044, "grad_norm": 0.7520941495895386, "learning_rate": 3.6387939997029558e-06, "loss": 0.063, "step": 42830 }, { "epoch": 6.362691222337739, "grad_norm": 0.9604344367980957, "learning_rate": 3.6373087776622606e-06, "loss": 0.0638, "step": 42840 }, { "epoch": 6.364176444378435, "grad_norm": 0.719851016998291, "learning_rate": 3.635823555621566e-06, "loss": 0.0639, "step": 42850 }, { "epoch": 6.36566166641913, "grad_norm": 1.470848798751831, "learning_rate": 3.6343383335808707e-06, "loss": 0.066, "step": 42860 }, { "epoch": 6.367146888459825, "grad_norm": 1.5860182046890259, "learning_rate": 3.6328531115401756e-06, "loss": 0.0548, "step": 42870 }, { "epoch": 6.36863211050052, "grad_norm": 0.7763499617576599, "learning_rate": 3.6313678894994805e-06, "loss": 0.0682, "step": 42880 }, { "epoch": 6.370117332541215, "grad_norm": 0.8643427491188049, "learning_rate": 3.6298826674587857e-06, "loss": 0.0444, "step": 42890 }, { "epoch": 6.37160255458191, "grad_norm": 1.1857274770736694, "learning_rate": 3.62839744541809e-06, "loss": 0.0518, "step": 42900 }, { "epoch": 6.373087776622605, "grad_norm": 0.8132492303848267, "learning_rate": 3.626912223377395e-06, "loss": 0.0492, "step": 42910 }, { "epoch": 6.3745729986633, "grad_norm": 1.3745230436325073, "learning_rate": 3.6254270013367e-06, "loss": 0.057, "step": 42920 }, { "epoch": 6.376058220703995, "grad_norm": 1.691438913345337, "learning_rate": 3.623941779296005e-06, "loss": 0.0536, "step": 42930 }, { "epoch": 6.37754344274469, "grad_norm": 0.6980904936790466, "learning_rate": 3.62245655725531e-06, "loss": 0.0468, "step": 42940 }, { "epoch": 6.379028664785386, "grad_norm": 0.4476390480995178, "learning_rate": 3.620971335214615e-06, "loss": 0.0732, "step": 42950 }, { "epoch": 6.380513886826081, "grad_norm": 0.8817645311355591, "learning_rate": 3.61948611317392e-06, "loss": 0.0513, "step": 42960 }, { "epoch": 6.381999108866776, "grad_norm": 0.406442791223526, "learning_rate": 3.618000891133225e-06, "loss": 0.057, "step": 42970 }, { "epoch": 6.3834843309074705, "grad_norm": 0.8691296577453613, "learning_rate": 3.6165156690925295e-06, "loss": 0.0692, "step": 42980 }, { "epoch": 6.384969552948165, "grad_norm": 1.1780812740325928, "learning_rate": 3.6150304470518343e-06, "loss": 0.0708, "step": 42990 }, { "epoch": 6.386454774988861, "grad_norm": 0.6412712931632996, "learning_rate": 3.6135452250111396e-06, "loss": 0.0525, "step": 43000 }, { "epoch": 6.387939997029556, "grad_norm": 0.9573513865470886, "learning_rate": 3.6120600029704444e-06, "loss": 0.0691, "step": 43010 }, { "epoch": 6.389425219070251, "grad_norm": 0.7389662861824036, "learning_rate": 3.6105747809297493e-06, "loss": 0.0616, "step": 43020 }, { "epoch": 6.390910441110946, "grad_norm": 0.9667379260063171, "learning_rate": 3.609089558889054e-06, "loss": 0.0584, "step": 43030 }, { "epoch": 6.392395663151641, "grad_norm": 0.5876452922821045, "learning_rate": 3.6076043368483594e-06, "loss": 0.0692, "step": 43040 }, { "epoch": 6.3938808851923365, "grad_norm": 1.150445580482483, "learning_rate": 3.606119114807664e-06, "loss": 0.0686, "step": 43050 }, { "epoch": 6.3953661072330314, "grad_norm": 1.0147703886032104, "learning_rate": 3.6046338927669687e-06, "loss": 0.0614, "step": 43060 }, { "epoch": 6.396851329273726, "grad_norm": 1.0354033708572388, "learning_rate": 3.6031486707262736e-06, "loss": 0.0589, "step": 43070 }, { "epoch": 6.398336551314421, "grad_norm": 1.8302271366119385, "learning_rate": 3.601663448685579e-06, "loss": 0.054, "step": 43080 }, { "epoch": 6.399821773355116, "grad_norm": 0.3475869596004486, "learning_rate": 3.6001782266448837e-06, "loss": 0.0575, "step": 43090 }, { "epoch": 6.401306995395812, "grad_norm": 0.732566773891449, "learning_rate": 3.5986930046041886e-06, "loss": 0.0394, "step": 43100 }, { "epoch": 6.402792217436507, "grad_norm": 0.48667097091674805, "learning_rate": 3.597207782563494e-06, "loss": 0.0594, "step": 43110 }, { "epoch": 6.404277439477202, "grad_norm": 0.8790454268455505, "learning_rate": 3.5957225605227987e-06, "loss": 0.0405, "step": 43120 }, { "epoch": 6.405762661517897, "grad_norm": 0.9130986928939819, "learning_rate": 3.594237338482103e-06, "loss": 0.053, "step": 43130 }, { "epoch": 6.4072478835585915, "grad_norm": 1.0321241617202759, "learning_rate": 3.592752116441408e-06, "loss": 0.0586, "step": 43140 }, { "epoch": 6.408733105599287, "grad_norm": 0.9510276913642883, "learning_rate": 3.5912668944007133e-06, "loss": 0.0666, "step": 43150 }, { "epoch": 6.410218327639982, "grad_norm": 0.6249675750732422, "learning_rate": 3.589781672360018e-06, "loss": 0.0569, "step": 43160 }, { "epoch": 6.411703549680677, "grad_norm": 1.0495495796203613, "learning_rate": 3.588296450319323e-06, "loss": 0.0596, "step": 43170 }, { "epoch": 6.413188771721372, "grad_norm": 1.4297109842300415, "learning_rate": 3.586811228278628e-06, "loss": 0.0661, "step": 43180 }, { "epoch": 6.414673993762068, "grad_norm": 1.4268157482147217, "learning_rate": 3.585326006237933e-06, "loss": 0.0631, "step": 43190 }, { "epoch": 6.416159215802763, "grad_norm": 0.49547335505485535, "learning_rate": 3.583840784197238e-06, "loss": 0.0529, "step": 43200 }, { "epoch": 6.417644437843458, "grad_norm": 1.1406906843185425, "learning_rate": 3.5823555621565424e-06, "loss": 0.0807, "step": 43210 }, { "epoch": 6.4191296598841525, "grad_norm": 0.9071478843688965, "learning_rate": 3.5808703401158477e-06, "loss": 0.0688, "step": 43220 }, { "epoch": 6.420614881924847, "grad_norm": 0.9624571204185486, "learning_rate": 3.5793851180751526e-06, "loss": 0.0609, "step": 43230 }, { "epoch": 6.422100103965543, "grad_norm": 0.5686826705932617, "learning_rate": 3.5778998960344574e-06, "loss": 0.0472, "step": 43240 }, { "epoch": 6.423585326006238, "grad_norm": 1.276450753211975, "learning_rate": 3.5764146739937623e-06, "loss": 0.0593, "step": 43250 }, { "epoch": 6.425070548046933, "grad_norm": 1.1901986598968506, "learning_rate": 3.5749294519530676e-06, "loss": 0.0573, "step": 43260 }, { "epoch": 6.426555770087628, "grad_norm": 0.5635876655578613, "learning_rate": 3.5734442299123724e-06, "loss": 0.0494, "step": 43270 }, { "epoch": 6.428040992128323, "grad_norm": 1.0205134153366089, "learning_rate": 3.571959007871677e-06, "loss": 0.0579, "step": 43280 }, { "epoch": 6.429526214169019, "grad_norm": 0.9467157125473022, "learning_rate": 3.5704737858309817e-06, "loss": 0.0696, "step": 43290 }, { "epoch": 6.4310114362097135, "grad_norm": 0.8886051774024963, "learning_rate": 3.568988563790287e-06, "loss": 0.0607, "step": 43300 }, { "epoch": 6.432496658250408, "grad_norm": 0.7653177976608276, "learning_rate": 3.567503341749592e-06, "loss": 0.0823, "step": 43310 }, { "epoch": 6.433981880291103, "grad_norm": 1.416134238243103, "learning_rate": 3.5660181197088967e-06, "loss": 0.0665, "step": 43320 }, { "epoch": 6.435467102331798, "grad_norm": 0.49492040276527405, "learning_rate": 3.5645328976682016e-06, "loss": 0.0612, "step": 43330 }, { "epoch": 6.436952324372494, "grad_norm": 0.8057112693786621, "learning_rate": 3.563047675627507e-06, "loss": 0.0647, "step": 43340 }, { "epoch": 6.438437546413189, "grad_norm": 0.5526387095451355, "learning_rate": 3.5615624535868117e-06, "loss": 0.0738, "step": 43350 }, { "epoch": 6.439922768453884, "grad_norm": 1.0343058109283447, "learning_rate": 3.560077231546116e-06, "loss": 0.0657, "step": 43360 }, { "epoch": 6.441407990494579, "grad_norm": 1.026384949684143, "learning_rate": 3.5585920095054214e-06, "loss": 0.0455, "step": 43370 }, { "epoch": 6.4428932125352745, "grad_norm": 0.7136487364768982, "learning_rate": 3.5571067874647263e-06, "loss": 0.0517, "step": 43380 }, { "epoch": 6.444378434575969, "grad_norm": 1.1231248378753662, "learning_rate": 3.555621565424031e-06, "loss": 0.0539, "step": 43390 }, { "epoch": 6.445863656616664, "grad_norm": 0.7071220278739929, "learning_rate": 3.554136343383336e-06, "loss": 0.0462, "step": 43400 }, { "epoch": 6.447348878657359, "grad_norm": 0.6214123368263245, "learning_rate": 3.5526511213426413e-06, "loss": 0.0631, "step": 43410 }, { "epoch": 6.448834100698054, "grad_norm": 0.8865244388580322, "learning_rate": 3.551165899301946e-06, "loss": 0.0481, "step": 43420 }, { "epoch": 6.45031932273875, "grad_norm": 0.6450020670890808, "learning_rate": 3.5496806772612506e-06, "loss": 0.0357, "step": 43430 }, { "epoch": 6.451804544779445, "grad_norm": 0.4999340772628784, "learning_rate": 3.5481954552205554e-06, "loss": 0.0623, "step": 43440 }, { "epoch": 6.45328976682014, "grad_norm": 0.978721559047699, "learning_rate": 3.5467102331798607e-06, "loss": 0.0814, "step": 43450 }, { "epoch": 6.454774988860835, "grad_norm": 1.1102402210235596, "learning_rate": 3.5452250111391656e-06, "loss": 0.0667, "step": 43460 }, { "epoch": 6.4562602109015295, "grad_norm": 0.7155255675315857, "learning_rate": 3.5437397890984704e-06, "loss": 0.0361, "step": 43470 }, { "epoch": 6.457745432942225, "grad_norm": 0.6756823658943176, "learning_rate": 3.5422545670577753e-06, "loss": 0.0772, "step": 43480 }, { "epoch": 6.45923065498292, "grad_norm": 0.9203720688819885, "learning_rate": 3.5407693450170805e-06, "loss": 0.0585, "step": 43490 }, { "epoch": 6.460715877023615, "grad_norm": 1.1555261611938477, "learning_rate": 3.5392841229763854e-06, "loss": 0.046, "step": 43500 }, { "epoch": 6.46220109906431, "grad_norm": 0.8422235250473022, "learning_rate": 3.53779890093569e-06, "loss": 0.0494, "step": 43510 }, { "epoch": 6.463686321105005, "grad_norm": 0.676811695098877, "learning_rate": 3.536313678894995e-06, "loss": 0.0549, "step": 43520 }, { "epoch": 6.465171543145701, "grad_norm": 2.3225271701812744, "learning_rate": 3.5348284568543e-06, "loss": 0.0959, "step": 43530 }, { "epoch": 6.4666567651863955, "grad_norm": 0.8296643495559692, "learning_rate": 3.533343234813605e-06, "loss": 0.0632, "step": 43540 }, { "epoch": 6.46814198722709, "grad_norm": 1.4344838857650757, "learning_rate": 3.5318580127729097e-06, "loss": 0.0584, "step": 43550 }, { "epoch": 6.469627209267785, "grad_norm": 1.3841466903686523, "learning_rate": 3.530372790732215e-06, "loss": 0.0707, "step": 43560 }, { "epoch": 6.47111243130848, "grad_norm": 0.9099441170692444, "learning_rate": 3.52888756869152e-06, "loss": 0.0615, "step": 43570 }, { "epoch": 6.472597653349176, "grad_norm": 0.8922671675682068, "learning_rate": 3.5274023466508247e-06, "loss": 0.0591, "step": 43580 }, { "epoch": 6.474082875389871, "grad_norm": 1.31600821018219, "learning_rate": 3.525917124610129e-06, "loss": 0.0567, "step": 43590 }, { "epoch": 6.475568097430566, "grad_norm": 0.8927801847457886, "learning_rate": 3.5244319025694344e-06, "loss": 0.0739, "step": 43600 }, { "epoch": 6.477053319471261, "grad_norm": 0.879862368106842, "learning_rate": 3.5229466805287393e-06, "loss": 0.0516, "step": 43610 }, { "epoch": 6.478538541511956, "grad_norm": 0.6488139629364014, "learning_rate": 3.521461458488044e-06, "loss": 0.048, "step": 43620 }, { "epoch": 6.480023763552651, "grad_norm": 0.35967373847961426, "learning_rate": 3.5199762364473494e-06, "loss": 0.0622, "step": 43630 }, { "epoch": 6.481508985593346, "grad_norm": 0.7182832360267639, "learning_rate": 3.5184910144066542e-06, "loss": 0.0615, "step": 43640 }, { "epoch": 6.482994207634041, "grad_norm": 0.6438133716583252, "learning_rate": 3.517005792365959e-06, "loss": 0.0607, "step": 43650 }, { "epoch": 6.484479429674736, "grad_norm": 0.5943735241889954, "learning_rate": 3.5155205703252635e-06, "loss": 0.0517, "step": 43660 }, { "epoch": 6.485964651715431, "grad_norm": 0.5622908473014832, "learning_rate": 3.5140353482845692e-06, "loss": 0.052, "step": 43670 }, { "epoch": 6.487449873756127, "grad_norm": 0.690758466720581, "learning_rate": 3.5125501262438737e-06, "loss": 0.0612, "step": 43680 }, { "epoch": 6.488935095796822, "grad_norm": 0.8912779092788696, "learning_rate": 3.5110649042031785e-06, "loss": 0.0464, "step": 43690 }, { "epoch": 6.490420317837517, "grad_norm": 0.9846697449684143, "learning_rate": 3.5095796821624834e-06, "loss": 0.0592, "step": 43700 }, { "epoch": 6.4919055398782115, "grad_norm": 0.5257307291030884, "learning_rate": 3.5080944601217887e-06, "loss": 0.0703, "step": 43710 }, { "epoch": 6.493390761918906, "grad_norm": 0.32277756929397583, "learning_rate": 3.5066092380810935e-06, "loss": 0.0585, "step": 43720 }, { "epoch": 6.494875983959602, "grad_norm": 0.7821950912475586, "learning_rate": 3.5051240160403984e-06, "loss": 0.0685, "step": 43730 }, { "epoch": 6.496361206000297, "grad_norm": 0.563582181930542, "learning_rate": 3.503638793999703e-06, "loss": 0.0514, "step": 43740 }, { "epoch": 6.497846428040992, "grad_norm": 0.6015027165412903, "learning_rate": 3.502153571959008e-06, "loss": 0.0631, "step": 43750 }, { "epoch": 6.499331650081687, "grad_norm": 0.8591887950897217, "learning_rate": 3.500668349918313e-06, "loss": 0.0703, "step": 43760 }, { "epoch": 6.500816872122383, "grad_norm": 0.7110676765441895, "learning_rate": 3.499183127877618e-06, "loss": 0.0765, "step": 43770 }, { "epoch": 6.502302094163078, "grad_norm": 0.6216291785240173, "learning_rate": 3.497697905836923e-06, "loss": 0.0496, "step": 43780 }, { "epoch": 6.5037873162037725, "grad_norm": 0.9407916069030762, "learning_rate": 3.496212683796228e-06, "loss": 0.0538, "step": 43790 }, { "epoch": 6.505272538244467, "grad_norm": 1.50139582157135, "learning_rate": 3.494727461755533e-06, "loss": 0.0619, "step": 43800 }, { "epoch": 6.506757760285162, "grad_norm": 1.1639320850372314, "learning_rate": 3.4932422397148372e-06, "loss": 0.0467, "step": 43810 }, { "epoch": 6.508242982325858, "grad_norm": 0.931298017501831, "learning_rate": 3.491757017674143e-06, "loss": 0.0692, "step": 43820 }, { "epoch": 6.509728204366553, "grad_norm": 0.8163691163063049, "learning_rate": 3.4902717956334474e-06, "loss": 0.0646, "step": 43830 }, { "epoch": 6.511213426407248, "grad_norm": 1.0349771976470947, "learning_rate": 3.4887865735927522e-06, "loss": 0.062, "step": 43840 }, { "epoch": 6.512698648447943, "grad_norm": 1.071566104888916, "learning_rate": 3.487301351552057e-06, "loss": 0.0657, "step": 43850 }, { "epoch": 6.514183870488638, "grad_norm": 0.7026413679122925, "learning_rate": 3.4858161295113624e-06, "loss": 0.0638, "step": 43860 }, { "epoch": 6.5156690925293335, "grad_norm": 0.6621662378311157, "learning_rate": 3.4843309074706672e-06, "loss": 0.0503, "step": 43870 }, { "epoch": 6.517154314570028, "grad_norm": 0.49411311745643616, "learning_rate": 3.482845685429972e-06, "loss": 0.0443, "step": 43880 }, { "epoch": 6.518639536610723, "grad_norm": 0.7259182333946228, "learning_rate": 3.4813604633892774e-06, "loss": 0.0583, "step": 43890 }, { "epoch": 6.520124758651418, "grad_norm": 0.4303617477416992, "learning_rate": 3.4798752413485822e-06, "loss": 0.0545, "step": 43900 }, { "epoch": 6.521609980692114, "grad_norm": 0.8239855766296387, "learning_rate": 3.4783900193078867e-06, "loss": 0.0581, "step": 43910 }, { "epoch": 6.523095202732809, "grad_norm": 1.0155378580093384, "learning_rate": 3.4769047972671915e-06, "loss": 0.0651, "step": 43920 }, { "epoch": 6.524580424773504, "grad_norm": 0.9577513933181763, "learning_rate": 3.475419575226497e-06, "loss": 0.0641, "step": 43930 }, { "epoch": 6.526065646814199, "grad_norm": 0.8862541317939758, "learning_rate": 3.4739343531858016e-06, "loss": 0.0483, "step": 43940 }, { "epoch": 6.5275508688548936, "grad_norm": 1.088813066482544, "learning_rate": 3.4724491311451065e-06, "loss": 0.0629, "step": 43950 }, { "epoch": 6.529036090895589, "grad_norm": 0.4905775785446167, "learning_rate": 3.4709639091044114e-06, "loss": 0.0428, "step": 43960 }, { "epoch": 6.530521312936284, "grad_norm": 0.9292188286781311, "learning_rate": 3.4694786870637166e-06, "loss": 0.0506, "step": 43970 }, { "epoch": 6.532006534976979, "grad_norm": 1.2144511938095093, "learning_rate": 3.467993465023021e-06, "loss": 0.0699, "step": 43980 }, { "epoch": 6.533491757017674, "grad_norm": 1.1785643100738525, "learning_rate": 3.466508242982326e-06, "loss": 0.0581, "step": 43990 }, { "epoch": 6.534976979058369, "grad_norm": 0.647038459777832, "learning_rate": 3.465023020941631e-06, "loss": 0.066, "step": 44000 }, { "epoch": 6.536462201099065, "grad_norm": 0.5554239749908447, "learning_rate": 3.463537798900936e-06, "loss": 0.0673, "step": 44010 }, { "epoch": 6.53794742313976, "grad_norm": 0.9084454774856567, "learning_rate": 3.462052576860241e-06, "loss": 0.0677, "step": 44020 }, { "epoch": 6.5394326451804545, "grad_norm": 0.7279065251350403, "learning_rate": 3.4605673548195458e-06, "loss": 0.0674, "step": 44030 }, { "epoch": 6.540917867221149, "grad_norm": 0.3517337441444397, "learning_rate": 3.459082132778851e-06, "loss": 0.0519, "step": 44040 }, { "epoch": 6.542403089261844, "grad_norm": 1.2529404163360596, "learning_rate": 3.457596910738156e-06, "loss": 0.0594, "step": 44050 }, { "epoch": 6.54388831130254, "grad_norm": 1.125239610671997, "learning_rate": 3.4561116886974604e-06, "loss": 0.0717, "step": 44060 }, { "epoch": 6.545373533343235, "grad_norm": 1.0038970708847046, "learning_rate": 3.454626466656765e-06, "loss": 0.0566, "step": 44070 }, { "epoch": 6.54685875538393, "grad_norm": 1.224101185798645, "learning_rate": 3.4531412446160705e-06, "loss": 0.0514, "step": 44080 }, { "epoch": 6.548343977424625, "grad_norm": 1.390031099319458, "learning_rate": 3.4516560225753753e-06, "loss": 0.0636, "step": 44090 }, { "epoch": 6.54982919946532, "grad_norm": 1.0642993450164795, "learning_rate": 3.45017080053468e-06, "loss": 0.0786, "step": 44100 }, { "epoch": 6.5513144215060155, "grad_norm": 0.3884289264678955, "learning_rate": 3.448685578493985e-06, "loss": 0.055, "step": 44110 }, { "epoch": 6.55279964354671, "grad_norm": 0.6680036187171936, "learning_rate": 3.4472003564532903e-06, "loss": 0.043, "step": 44120 }, { "epoch": 6.554284865587405, "grad_norm": 0.6033088564872742, "learning_rate": 3.4457151344125948e-06, "loss": 0.0571, "step": 44130 }, { "epoch": 6.5557700876281, "grad_norm": 0.7803447246551514, "learning_rate": 3.4442299123718996e-06, "loss": 0.0646, "step": 44140 }, { "epoch": 6.557255309668795, "grad_norm": 1.9591981172561646, "learning_rate": 3.442744690331205e-06, "loss": 0.0556, "step": 44150 }, { "epoch": 6.558740531709491, "grad_norm": 0.18442347645759583, "learning_rate": 3.4412594682905098e-06, "loss": 0.0426, "step": 44160 }, { "epoch": 6.560225753750186, "grad_norm": 0.8935715556144714, "learning_rate": 3.4397742462498146e-06, "loss": 0.0599, "step": 44170 }, { "epoch": 6.561710975790881, "grad_norm": 0.8119651675224304, "learning_rate": 3.4382890242091195e-06, "loss": 0.063, "step": 44180 }, { "epoch": 6.563196197831576, "grad_norm": 0.9296265840530396, "learning_rate": 3.4368038021684248e-06, "loss": 0.0711, "step": 44190 }, { "epoch": 6.5646814198722705, "grad_norm": 0.2975754737854004, "learning_rate": 3.4353185801277296e-06, "loss": 0.0525, "step": 44200 }, { "epoch": 6.566166641912966, "grad_norm": 0.6989734768867493, "learning_rate": 3.433833358087034e-06, "loss": 0.0506, "step": 44210 }, { "epoch": 6.567651863953661, "grad_norm": 0.19553795456886292, "learning_rate": 3.432348136046339e-06, "loss": 0.0472, "step": 44220 }, { "epoch": 6.569137085994356, "grad_norm": 0.7017039060592651, "learning_rate": 3.430862914005644e-06, "loss": 0.0598, "step": 44230 }, { "epoch": 6.570622308035051, "grad_norm": 1.4003336429595947, "learning_rate": 3.429377691964949e-06, "loss": 0.0527, "step": 44240 }, { "epoch": 6.572107530075746, "grad_norm": 1.4430428743362427, "learning_rate": 3.427892469924254e-06, "loss": 0.0695, "step": 44250 }, { "epoch": 6.573592752116442, "grad_norm": 0.7251480221748352, "learning_rate": 3.4264072478835588e-06, "loss": 0.0559, "step": 44260 }, { "epoch": 6.575077974157137, "grad_norm": 0.3925531208515167, "learning_rate": 3.424922025842864e-06, "loss": 0.0617, "step": 44270 }, { "epoch": 6.5765631961978315, "grad_norm": 0.9270767569541931, "learning_rate": 3.423436803802169e-06, "loss": 0.0668, "step": 44280 }, { "epoch": 6.578048418238526, "grad_norm": 0.6193248629570007, "learning_rate": 3.4219515817614733e-06, "loss": 0.0668, "step": 44290 }, { "epoch": 6.579533640279221, "grad_norm": 1.1179356575012207, "learning_rate": 3.4204663597207786e-06, "loss": 0.0491, "step": 44300 }, { "epoch": 6.581018862319917, "grad_norm": 0.7524288296699524, "learning_rate": 3.4189811376800835e-06, "loss": 0.0485, "step": 44310 }, { "epoch": 6.582504084360612, "grad_norm": 0.6350118517875671, "learning_rate": 3.4174959156393883e-06, "loss": 0.0575, "step": 44320 }, { "epoch": 6.583989306401307, "grad_norm": 0.1729784607887268, "learning_rate": 3.416010693598693e-06, "loss": 0.0454, "step": 44330 }, { "epoch": 6.585474528442002, "grad_norm": 0.2924365997314453, "learning_rate": 3.4145254715579985e-06, "loss": 0.061, "step": 44340 }, { "epoch": 6.5869597504826976, "grad_norm": 0.9202251434326172, "learning_rate": 3.4130402495173033e-06, "loss": 0.0491, "step": 44350 }, { "epoch": 6.5884449725233925, "grad_norm": 0.7880986928939819, "learning_rate": 3.4115550274766078e-06, "loss": 0.0465, "step": 44360 }, { "epoch": 6.589930194564087, "grad_norm": 0.7341892719268799, "learning_rate": 3.4100698054359126e-06, "loss": 0.0741, "step": 44370 }, { "epoch": 6.591415416604782, "grad_norm": 0.5536158084869385, "learning_rate": 3.408584583395218e-06, "loss": 0.0548, "step": 44380 }, { "epoch": 6.592900638645477, "grad_norm": 0.754804253578186, "learning_rate": 3.4070993613545227e-06, "loss": 0.047, "step": 44390 }, { "epoch": 6.594385860686173, "grad_norm": 0.522686243057251, "learning_rate": 3.4056141393138276e-06, "loss": 0.0683, "step": 44400 }, { "epoch": 6.595871082726868, "grad_norm": 1.4597195386886597, "learning_rate": 3.404128917273133e-06, "loss": 0.0595, "step": 44410 }, { "epoch": 6.597356304767563, "grad_norm": 0.75593501329422, "learning_rate": 3.4026436952324377e-06, "loss": 0.0628, "step": 44420 }, { "epoch": 6.598841526808258, "grad_norm": 0.8334012627601624, "learning_rate": 3.4011584731917426e-06, "loss": 0.0488, "step": 44430 }, { "epoch": 6.6003267488489525, "grad_norm": 1.003138780593872, "learning_rate": 3.399673251151047e-06, "loss": 0.0675, "step": 44440 }, { "epoch": 6.601811970889648, "grad_norm": 0.5932856798171997, "learning_rate": 3.3981880291103523e-06, "loss": 0.0432, "step": 44450 }, { "epoch": 6.603297192930343, "grad_norm": 0.6687909364700317, "learning_rate": 3.396702807069657e-06, "loss": 0.068, "step": 44460 }, { "epoch": 6.604782414971038, "grad_norm": 0.2233189046382904, "learning_rate": 3.395217585028962e-06, "loss": 0.0696, "step": 44470 }, { "epoch": 6.606267637011733, "grad_norm": 1.6143152713775635, "learning_rate": 3.393732362988267e-06, "loss": 0.0388, "step": 44480 }, { "epoch": 6.607752859052429, "grad_norm": 1.348990797996521, "learning_rate": 3.392247140947572e-06, "loss": 0.0639, "step": 44490 }, { "epoch": 6.609238081093124, "grad_norm": 0.905087947845459, "learning_rate": 3.390761918906877e-06, "loss": 0.0636, "step": 44500 }, { "epoch": 6.610723303133819, "grad_norm": 0.6363906264305115, "learning_rate": 3.3892766968661815e-06, "loss": 0.0394, "step": 44510 }, { "epoch": 6.6122085251745135, "grad_norm": 0.41277167201042175, "learning_rate": 3.3877914748254863e-06, "loss": 0.0618, "step": 44520 }, { "epoch": 6.613693747215208, "grad_norm": 0.7106294631958008, "learning_rate": 3.3863062527847916e-06, "loss": 0.0444, "step": 44530 }, { "epoch": 6.615178969255904, "grad_norm": 0.21014659106731415, "learning_rate": 3.3848210307440964e-06, "loss": 0.0429, "step": 44540 }, { "epoch": 6.616664191296599, "grad_norm": 1.446640968322754, "learning_rate": 3.3833358087034013e-06, "loss": 0.0565, "step": 44550 }, { "epoch": 6.618149413337294, "grad_norm": 1.2112292051315308, "learning_rate": 3.3818505866627066e-06, "loss": 0.0668, "step": 44560 }, { "epoch": 6.619634635377989, "grad_norm": 0.5960361957550049, "learning_rate": 3.3803653646220114e-06, "loss": 0.0721, "step": 44570 }, { "epoch": 6.621119857418684, "grad_norm": 0.8905211091041565, "learning_rate": 3.3788801425813163e-06, "loss": 0.0709, "step": 44580 }, { "epoch": 6.62260507945938, "grad_norm": 0.4864606559276581, "learning_rate": 3.3773949205406207e-06, "loss": 0.0506, "step": 44590 }, { "epoch": 6.6240903015000745, "grad_norm": 0.5841920375823975, "learning_rate": 3.375909698499926e-06, "loss": 0.0631, "step": 44600 }, { "epoch": 6.625575523540769, "grad_norm": 0.9447734355926514, "learning_rate": 3.374424476459231e-06, "loss": 0.0508, "step": 44610 }, { "epoch": 6.627060745581464, "grad_norm": 0.7786805629730225, "learning_rate": 3.3729392544185357e-06, "loss": 0.0716, "step": 44620 }, { "epoch": 6.628545967622159, "grad_norm": 0.5071942210197449, "learning_rate": 3.3714540323778406e-06, "loss": 0.0617, "step": 44630 }, { "epoch": 6.630031189662855, "grad_norm": 1.5175836086273193, "learning_rate": 3.369968810337146e-06, "loss": 0.0564, "step": 44640 }, { "epoch": 6.63151641170355, "grad_norm": 0.8158230781555176, "learning_rate": 3.3684835882964507e-06, "loss": 0.0646, "step": 44650 }, { "epoch": 6.633001633744245, "grad_norm": 1.2170836925506592, "learning_rate": 3.3669983662557556e-06, "loss": 0.0498, "step": 44660 }, { "epoch": 6.63448685578494, "grad_norm": 0.3966522514820099, "learning_rate": 3.365513144215061e-06, "loss": 0.0596, "step": 44670 }, { "epoch": 6.635972077825635, "grad_norm": 1.9576127529144287, "learning_rate": 3.3640279221743653e-06, "loss": 0.0552, "step": 44680 }, { "epoch": 6.63745729986633, "grad_norm": 1.2230498790740967, "learning_rate": 3.36254270013367e-06, "loss": 0.0606, "step": 44690 }, { "epoch": 6.638942521907025, "grad_norm": 1.4444591999053955, "learning_rate": 3.361057478092975e-06, "loss": 0.0427, "step": 44700 }, { "epoch": 6.64042774394772, "grad_norm": 1.66185462474823, "learning_rate": 3.3595722560522803e-06, "loss": 0.0519, "step": 44710 }, { "epoch": 6.641912965988415, "grad_norm": 0.7028926610946655, "learning_rate": 3.358087034011585e-06, "loss": 0.0565, "step": 44720 }, { "epoch": 6.64339818802911, "grad_norm": 1.442725658416748, "learning_rate": 3.35660181197089e-06, "loss": 0.0613, "step": 44730 }, { "epoch": 6.644883410069806, "grad_norm": 1.138018250465393, "learning_rate": 3.3551165899301944e-06, "loss": 0.0434, "step": 44740 }, { "epoch": 6.646368632110501, "grad_norm": 1.1926655769348145, "learning_rate": 3.3536313678895e-06, "loss": 0.0592, "step": 44750 }, { "epoch": 6.647853854151196, "grad_norm": 0.6647235155105591, "learning_rate": 3.3521461458488046e-06, "loss": 0.0579, "step": 44760 }, { "epoch": 6.6493390761918905, "grad_norm": 0.8624783754348755, "learning_rate": 3.3506609238081094e-06, "loss": 0.0679, "step": 44770 }, { "epoch": 6.650824298232585, "grad_norm": 0.6453647017478943, "learning_rate": 3.3491757017674143e-06, "loss": 0.0566, "step": 44780 }, { "epoch": 6.652309520273281, "grad_norm": 1.101426124572754, "learning_rate": 3.3476904797267196e-06, "loss": 0.0472, "step": 44790 }, { "epoch": 6.653794742313976, "grad_norm": 0.6298049688339233, "learning_rate": 3.3462052576860244e-06, "loss": 0.065, "step": 44800 }, { "epoch": 6.655279964354671, "grad_norm": 0.6735140085220337, "learning_rate": 3.3447200356453293e-06, "loss": 0.0458, "step": 44810 }, { "epoch": 6.656765186395366, "grad_norm": 0.6601940393447876, "learning_rate": 3.3432348136046346e-06, "loss": 0.0698, "step": 44820 }, { "epoch": 6.658250408436061, "grad_norm": 1.033394694328308, "learning_rate": 3.341749591563939e-06, "loss": 0.0547, "step": 44830 }, { "epoch": 6.6597356304767565, "grad_norm": 0.618804395198822, "learning_rate": 3.340264369523244e-06, "loss": 0.0619, "step": 44840 }, { "epoch": 6.661220852517451, "grad_norm": 0.6303835511207581, "learning_rate": 3.3387791474825487e-06, "loss": 0.0563, "step": 44850 }, { "epoch": 6.662706074558146, "grad_norm": 1.1331371068954468, "learning_rate": 3.337293925441854e-06, "loss": 0.0589, "step": 44860 }, { "epoch": 6.664191296598841, "grad_norm": 0.8499082326889038, "learning_rate": 3.335808703401159e-06, "loss": 0.0524, "step": 44870 }, { "epoch": 6.665676518639536, "grad_norm": 0.5605273246765137, "learning_rate": 3.3343234813604637e-06, "loss": 0.055, "step": 44880 }, { "epoch": 6.667161740680232, "grad_norm": 1.4254909753799438, "learning_rate": 3.332838259319768e-06, "loss": 0.048, "step": 44890 }, { "epoch": 6.668646962720927, "grad_norm": 0.7363986968994141, "learning_rate": 3.331353037279074e-06, "loss": 0.0599, "step": 44900 }, { "epoch": 6.670132184761622, "grad_norm": 0.7361230254173279, "learning_rate": 3.3298678152383783e-06, "loss": 0.0585, "step": 44910 }, { "epoch": 6.671617406802317, "grad_norm": 1.6775233745574951, "learning_rate": 3.328382593197683e-06, "loss": 0.0788, "step": 44920 }, { "epoch": 6.673102628843012, "grad_norm": 1.4757988452911377, "learning_rate": 3.326897371156988e-06, "loss": 0.062, "step": 44930 }, { "epoch": 6.674587850883707, "grad_norm": 0.7029728889465332, "learning_rate": 3.3254121491162933e-06, "loss": 0.0613, "step": 44940 }, { "epoch": 6.676073072924402, "grad_norm": 0.9859765768051147, "learning_rate": 3.323926927075598e-06, "loss": 0.0582, "step": 44950 }, { "epoch": 6.677558294965097, "grad_norm": 0.9241379499435425, "learning_rate": 3.322441705034903e-06, "loss": 0.0666, "step": 44960 }, { "epoch": 6.679043517005792, "grad_norm": 0.483398973941803, "learning_rate": 3.3209564829942083e-06, "loss": 0.069, "step": 44970 }, { "epoch": 6.680528739046488, "grad_norm": 0.4294925928115845, "learning_rate": 3.319471260953513e-06, "loss": 0.056, "step": 44980 }, { "epoch": 6.682013961087183, "grad_norm": 0.8365574479103088, "learning_rate": 3.3179860389128176e-06, "loss": 0.0571, "step": 44990 }, { "epoch": 6.683499183127878, "grad_norm": 0.9189231395721436, "learning_rate": 3.3165008168721224e-06, "loss": 0.0488, "step": 45000 }, { "epoch": 6.6849844051685725, "grad_norm": 0.796147882938385, "learning_rate": 3.3150155948314277e-06, "loss": 0.0497, "step": 45010 }, { "epoch": 6.686469627209267, "grad_norm": 0.5429372787475586, "learning_rate": 3.3135303727907325e-06, "loss": 0.0601, "step": 45020 }, { "epoch": 6.687954849249963, "grad_norm": 1.0063883066177368, "learning_rate": 3.3120451507500374e-06, "loss": 0.0622, "step": 45030 }, { "epoch": 6.689440071290658, "grad_norm": 1.2463853359222412, "learning_rate": 3.3105599287093423e-06, "loss": 0.0583, "step": 45040 }, { "epoch": 6.690925293331353, "grad_norm": 1.391730785369873, "learning_rate": 3.3090747066686475e-06, "loss": 0.0651, "step": 45050 }, { "epoch": 6.692410515372048, "grad_norm": 0.46899959444999695, "learning_rate": 3.307589484627952e-06, "loss": 0.057, "step": 45060 }, { "epoch": 6.693895737412744, "grad_norm": 0.9016135931015015, "learning_rate": 3.306104262587257e-06, "loss": 0.0479, "step": 45070 }, { "epoch": 6.695380959453439, "grad_norm": 0.6811698079109192, "learning_rate": 3.304619040546562e-06, "loss": 0.0446, "step": 45080 }, { "epoch": 6.6968661814941335, "grad_norm": 0.9126334190368652, "learning_rate": 3.303133818505867e-06, "loss": 0.068, "step": 45090 }, { "epoch": 6.698351403534828, "grad_norm": 1.4983772039413452, "learning_rate": 3.301648596465172e-06, "loss": 0.0551, "step": 45100 }, { "epoch": 6.699836625575523, "grad_norm": 0.8648777604103088, "learning_rate": 3.3001633744244767e-06, "loss": 0.0319, "step": 45110 }, { "epoch": 6.701321847616219, "grad_norm": 0.8801341652870178, "learning_rate": 3.298678152383782e-06, "loss": 0.0612, "step": 45120 }, { "epoch": 6.702807069656914, "grad_norm": 0.8212476968765259, "learning_rate": 3.297192930343087e-06, "loss": 0.0661, "step": 45130 }, { "epoch": 6.704292291697609, "grad_norm": 1.3855417966842651, "learning_rate": 3.2957077083023913e-06, "loss": 0.0626, "step": 45140 }, { "epoch": 6.705777513738304, "grad_norm": 1.2375926971435547, "learning_rate": 3.294222486261696e-06, "loss": 0.0466, "step": 45150 }, { "epoch": 6.707262735778999, "grad_norm": 1.4005922079086304, "learning_rate": 3.2927372642210014e-06, "loss": 0.0486, "step": 45160 }, { "epoch": 6.7087479578196945, "grad_norm": 0.3769647777080536, "learning_rate": 3.2912520421803062e-06, "loss": 0.0463, "step": 45170 }, { "epoch": 6.710233179860389, "grad_norm": 1.2366719245910645, "learning_rate": 3.289766820139611e-06, "loss": 0.0454, "step": 45180 }, { "epoch": 6.711718401901084, "grad_norm": 0.6258324980735779, "learning_rate": 3.288281598098916e-06, "loss": 0.0584, "step": 45190 }, { "epoch": 6.713203623941779, "grad_norm": 0.7826153635978699, "learning_rate": 3.2867963760582212e-06, "loss": 0.0534, "step": 45200 }, { "epoch": 6.714688845982474, "grad_norm": 0.854837954044342, "learning_rate": 3.2853111540175257e-06, "loss": 0.0491, "step": 45210 }, { "epoch": 6.71617406802317, "grad_norm": 0.3340891897678375, "learning_rate": 3.2838259319768305e-06, "loss": 0.0765, "step": 45220 }, { "epoch": 6.717659290063865, "grad_norm": 0.7108610272407532, "learning_rate": 3.282340709936136e-06, "loss": 0.0465, "step": 45230 }, { "epoch": 6.71914451210456, "grad_norm": 1.0852887630462646, "learning_rate": 3.2808554878954407e-06, "loss": 0.0535, "step": 45240 }, { "epoch": 6.7206297341452546, "grad_norm": 0.7470007538795471, "learning_rate": 3.2793702658547455e-06, "loss": 0.0561, "step": 45250 }, { "epoch": 6.7221149561859495, "grad_norm": 0.2710890471935272, "learning_rate": 3.2778850438140504e-06, "loss": 0.0507, "step": 45260 }, { "epoch": 6.723600178226645, "grad_norm": 0.7607895731925964, "learning_rate": 3.2763998217733557e-06, "loss": 0.0411, "step": 45270 }, { "epoch": 6.72508540026734, "grad_norm": 1.0981948375701904, "learning_rate": 3.2749145997326605e-06, "loss": 0.0525, "step": 45280 }, { "epoch": 6.726570622308035, "grad_norm": 0.8234240412712097, "learning_rate": 3.273429377691965e-06, "loss": 0.065, "step": 45290 }, { "epoch": 6.72805584434873, "grad_norm": 0.6028541922569275, "learning_rate": 3.27194415565127e-06, "loss": 0.0472, "step": 45300 }, { "epoch": 6.729541066389425, "grad_norm": 1.2868266105651855, "learning_rate": 3.270458933610575e-06, "loss": 0.0375, "step": 45310 }, { "epoch": 6.731026288430121, "grad_norm": 1.1159216165542603, "learning_rate": 3.26897371156988e-06, "loss": 0.064, "step": 45320 }, { "epoch": 6.7325115104708155, "grad_norm": 1.5056071281433105, "learning_rate": 3.267488489529185e-06, "loss": 0.0546, "step": 45330 }, { "epoch": 6.73399673251151, "grad_norm": 2.2418577671051025, "learning_rate": 3.26600326748849e-06, "loss": 0.045, "step": 45340 }, { "epoch": 6.735481954552205, "grad_norm": 0.6371803283691406, "learning_rate": 3.264518045447795e-06, "loss": 0.0709, "step": 45350 }, { "epoch": 6.7369671765929, "grad_norm": 0.7394696474075317, "learning_rate": 3.2630328234071e-06, "loss": 0.0668, "step": 45360 }, { "epoch": 6.738452398633596, "grad_norm": 0.636842668056488, "learning_rate": 3.2615476013664042e-06, "loss": 0.0461, "step": 45370 }, { "epoch": 6.739937620674291, "grad_norm": 0.5398241281509399, "learning_rate": 3.2600623793257095e-06, "loss": 0.0529, "step": 45380 }, { "epoch": 6.741422842714986, "grad_norm": 1.4932724237442017, "learning_rate": 3.2585771572850144e-06, "loss": 0.0532, "step": 45390 }, { "epoch": 6.742908064755681, "grad_norm": 0.7000812292098999, "learning_rate": 3.2570919352443192e-06, "loss": 0.0541, "step": 45400 }, { "epoch": 6.744393286796376, "grad_norm": 0.3533134460449219, "learning_rate": 3.255606713203624e-06, "loss": 0.0496, "step": 45410 }, { "epoch": 6.745878508837071, "grad_norm": 1.3116090297698975, "learning_rate": 3.2541214911629294e-06, "loss": 0.058, "step": 45420 }, { "epoch": 6.747363730877766, "grad_norm": 1.1693205833435059, "learning_rate": 3.2526362691222342e-06, "loss": 0.0679, "step": 45430 }, { "epoch": 6.748848952918461, "grad_norm": 0.7300345301628113, "learning_rate": 3.2511510470815387e-06, "loss": 0.052, "step": 45440 }, { "epoch": 6.750334174959156, "grad_norm": 1.4570856094360352, "learning_rate": 3.2496658250408435e-06, "loss": 0.0573, "step": 45450 }, { "epoch": 6.751819396999851, "grad_norm": 0.6028823852539062, "learning_rate": 3.248180603000149e-06, "loss": 0.0514, "step": 45460 }, { "epoch": 6.753304619040547, "grad_norm": 0.6922430396080017, "learning_rate": 3.2466953809594536e-06, "loss": 0.0457, "step": 45470 }, { "epoch": 6.754789841081242, "grad_norm": 1.0826843976974487, "learning_rate": 3.2452101589187585e-06, "loss": 0.0374, "step": 45480 }, { "epoch": 6.756275063121937, "grad_norm": 0.9226403832435608, "learning_rate": 3.2437249368780638e-06, "loss": 0.075, "step": 45490 }, { "epoch": 6.7577602851626315, "grad_norm": 1.2400094270706177, "learning_rate": 3.2422397148373686e-06, "loss": 0.0735, "step": 45500 }, { "epoch": 6.759245507203327, "grad_norm": 0.8410294055938721, "learning_rate": 3.2407544927966735e-06, "loss": 0.0627, "step": 45510 }, { "epoch": 6.760730729244022, "grad_norm": 0.6011391878128052, "learning_rate": 3.239269270755978e-06, "loss": 0.0453, "step": 45520 }, { "epoch": 6.762215951284717, "grad_norm": 1.1391057968139648, "learning_rate": 3.237784048715283e-06, "loss": 0.068, "step": 45530 }, { "epoch": 6.763701173325412, "grad_norm": 0.9182172417640686, "learning_rate": 3.236298826674588e-06, "loss": 0.062, "step": 45540 }, { "epoch": 6.765186395366107, "grad_norm": 0.744276225566864, "learning_rate": 3.234813604633893e-06, "loss": 0.0592, "step": 45550 }, { "epoch": 6.766671617406803, "grad_norm": 0.7632409930229187, "learning_rate": 3.2333283825931978e-06, "loss": 0.0542, "step": 45560 }, { "epoch": 6.768156839447498, "grad_norm": 1.5479755401611328, "learning_rate": 3.231843160552503e-06, "loss": 0.075, "step": 45570 }, { "epoch": 6.7696420614881925, "grad_norm": 0.5727285742759705, "learning_rate": 3.230357938511808e-06, "loss": 0.0546, "step": 45580 }, { "epoch": 6.771127283528887, "grad_norm": 1.4612948894500732, "learning_rate": 3.2288727164711124e-06, "loss": 0.0482, "step": 45590 }, { "epoch": 6.772612505569582, "grad_norm": 1.1288641691207886, "learning_rate": 3.227387494430418e-06, "loss": 0.0623, "step": 45600 }, { "epoch": 6.774097727610278, "grad_norm": 0.662311315536499, "learning_rate": 3.2259022723897225e-06, "loss": 0.0448, "step": 45610 }, { "epoch": 6.775582949650973, "grad_norm": 1.3300342559814453, "learning_rate": 3.2244170503490273e-06, "loss": 0.0463, "step": 45620 }, { "epoch": 6.777068171691668, "grad_norm": 0.4563080966472626, "learning_rate": 3.222931828308332e-06, "loss": 0.0432, "step": 45630 }, { "epoch": 6.778553393732363, "grad_norm": 0.7089258432388306, "learning_rate": 3.2214466062676375e-06, "loss": 0.0524, "step": 45640 }, { "epoch": 6.7800386157730586, "grad_norm": 0.6759585738182068, "learning_rate": 3.2199613842269423e-06, "loss": 0.051, "step": 45650 }, { "epoch": 6.7815238378137535, "grad_norm": 0.4059349000453949, "learning_rate": 3.218476162186247e-06, "loss": 0.073, "step": 45660 }, { "epoch": 6.783009059854448, "grad_norm": 1.0772613286972046, "learning_rate": 3.2169909401455516e-06, "loss": 0.0635, "step": 45670 }, { "epoch": 6.784494281895143, "grad_norm": 0.5796312093734741, "learning_rate": 3.215505718104857e-06, "loss": 0.0508, "step": 45680 }, { "epoch": 6.785979503935838, "grad_norm": 0.342790812253952, "learning_rate": 3.2140204960641618e-06, "loss": 0.0686, "step": 45690 }, { "epoch": 6.787464725976534, "grad_norm": 0.28183823823928833, "learning_rate": 3.2125352740234666e-06, "loss": 0.0377, "step": 45700 }, { "epoch": 6.788949948017229, "grad_norm": 1.015336513519287, "learning_rate": 3.2110500519827715e-06, "loss": 0.0651, "step": 45710 }, { "epoch": 6.790435170057924, "grad_norm": 0.5184191465377808, "learning_rate": 3.2095648299420768e-06, "loss": 0.0566, "step": 45720 }, { "epoch": 6.791920392098619, "grad_norm": 1.2390819787979126, "learning_rate": 3.2080796079013816e-06, "loss": 0.0634, "step": 45730 }, { "epoch": 6.7934056141393135, "grad_norm": 1.3153749704360962, "learning_rate": 3.2065943858606865e-06, "loss": 0.0733, "step": 45740 }, { "epoch": 6.794890836180009, "grad_norm": 0.7628545165061951, "learning_rate": 3.2051091638199918e-06, "loss": 0.0642, "step": 45750 }, { "epoch": 6.796376058220704, "grad_norm": 0.3626285493373871, "learning_rate": 3.203623941779296e-06, "loss": 0.0683, "step": 45760 }, { "epoch": 6.797861280261399, "grad_norm": 1.9119198322296143, "learning_rate": 3.202138719738601e-06, "loss": 0.061, "step": 45770 }, { "epoch": 6.799346502302094, "grad_norm": 3.3886430263519287, "learning_rate": 3.200653497697906e-06, "loss": 0.0658, "step": 45780 }, { "epoch": 6.800831724342789, "grad_norm": 0.7113927006721497, "learning_rate": 3.199168275657211e-06, "loss": 0.0538, "step": 45790 }, { "epoch": 6.802316946383485, "grad_norm": 0.2835240364074707, "learning_rate": 3.197683053616516e-06, "loss": 0.0624, "step": 45800 }, { "epoch": 6.80380216842418, "grad_norm": 1.0122696161270142, "learning_rate": 3.196197831575821e-06, "loss": 0.0578, "step": 45810 }, { "epoch": 6.8052873904648745, "grad_norm": 0.16555657982826233, "learning_rate": 3.1947126095351253e-06, "loss": 0.0491, "step": 45820 }, { "epoch": 6.806772612505569, "grad_norm": 1.048108696937561, "learning_rate": 3.193227387494431e-06, "loss": 0.0622, "step": 45830 }, { "epoch": 6.808257834546264, "grad_norm": 0.842766523361206, "learning_rate": 3.1917421654537355e-06, "loss": 0.0464, "step": 45840 }, { "epoch": 6.80974305658696, "grad_norm": 0.6435438990592957, "learning_rate": 3.1902569434130403e-06, "loss": 0.0426, "step": 45850 }, { "epoch": 6.811228278627655, "grad_norm": 0.6203869581222534, "learning_rate": 3.1887717213723456e-06, "loss": 0.0484, "step": 45860 }, { "epoch": 6.81271350066835, "grad_norm": 0.9314228296279907, "learning_rate": 3.1872864993316505e-06, "loss": 0.0581, "step": 45870 }, { "epoch": 6.814198722709045, "grad_norm": 0.7096196413040161, "learning_rate": 3.1858012772909553e-06, "loss": 0.0488, "step": 45880 }, { "epoch": 6.81568394474974, "grad_norm": 0.7279118299484253, "learning_rate": 3.18431605525026e-06, "loss": 0.0684, "step": 45890 }, { "epoch": 6.8171691667904355, "grad_norm": 0.3500811755657196, "learning_rate": 3.1828308332095655e-06, "loss": 0.0511, "step": 45900 }, { "epoch": 6.81865438883113, "grad_norm": 2.1978440284729004, "learning_rate": 3.18134561116887e-06, "loss": 0.0594, "step": 45910 }, { "epoch": 6.820139610871825, "grad_norm": 0.5409901738166809, "learning_rate": 3.1798603891281747e-06, "loss": 0.0479, "step": 45920 }, { "epoch": 6.82162483291252, "grad_norm": 1.1827361583709717, "learning_rate": 3.1783751670874796e-06, "loss": 0.0754, "step": 45930 }, { "epoch": 6.823110054953215, "grad_norm": 1.3332449197769165, "learning_rate": 3.176889945046785e-06, "loss": 0.0687, "step": 45940 }, { "epoch": 6.824595276993911, "grad_norm": 1.1483596563339233, "learning_rate": 3.1754047230060897e-06, "loss": 0.0816, "step": 45950 }, { "epoch": 6.826080499034606, "grad_norm": 0.9273011684417725, "learning_rate": 3.1739195009653946e-06, "loss": 0.0577, "step": 45960 }, { "epoch": 6.827565721075301, "grad_norm": 1.3494038581848145, "learning_rate": 3.172434278924699e-06, "loss": 0.069, "step": 45970 }, { "epoch": 6.829050943115996, "grad_norm": 1.0662420988082886, "learning_rate": 3.1709490568840047e-06, "loss": 0.0494, "step": 45980 }, { "epoch": 6.8305361651566905, "grad_norm": 0.4860890507698059, "learning_rate": 3.169463834843309e-06, "loss": 0.0428, "step": 45990 }, { "epoch": 6.832021387197386, "grad_norm": 0.5755127668380737, "learning_rate": 3.167978612802614e-06, "loss": 0.0485, "step": 46000 }, { "epoch": 6.833506609238081, "grad_norm": 1.821703553199768, "learning_rate": 3.1664933907619193e-06, "loss": 0.0745, "step": 46010 }, { "epoch": 6.834991831278776, "grad_norm": 1.4857114553451538, "learning_rate": 3.165008168721224e-06, "loss": 0.0549, "step": 46020 }, { "epoch": 6.836477053319471, "grad_norm": 0.24308165907859802, "learning_rate": 3.163522946680529e-06, "loss": 0.0573, "step": 46030 }, { "epoch": 6.837962275360166, "grad_norm": 0.7036371231079102, "learning_rate": 3.162037724639834e-06, "loss": 0.0492, "step": 46040 }, { "epoch": 6.839447497400862, "grad_norm": 1.0023090839385986, "learning_rate": 3.160552502599139e-06, "loss": 0.0555, "step": 46050 }, { "epoch": 6.840932719441557, "grad_norm": 1.0630323886871338, "learning_rate": 3.159067280558444e-06, "loss": 0.0572, "step": 46060 }, { "epoch": 6.8424179414822515, "grad_norm": 0.6267212629318237, "learning_rate": 3.1575820585177484e-06, "loss": 0.058, "step": 46070 }, { "epoch": 6.843903163522946, "grad_norm": 1.072516679763794, "learning_rate": 3.1560968364770533e-06, "loss": 0.0391, "step": 46080 }, { "epoch": 6.845388385563642, "grad_norm": 1.2599564790725708, "learning_rate": 3.1546116144363586e-06, "loss": 0.0442, "step": 46090 }, { "epoch": 6.846873607604337, "grad_norm": 0.9169860482215881, "learning_rate": 3.1531263923956634e-06, "loss": 0.081, "step": 46100 }, { "epoch": 6.848358829645032, "grad_norm": 0.9532322883605957, "learning_rate": 3.1516411703549683e-06, "loss": 0.0626, "step": 46110 }, { "epoch": 6.849844051685727, "grad_norm": 1.1273218393325806, "learning_rate": 3.1501559483142736e-06, "loss": 0.0488, "step": 46120 }, { "epoch": 6.851329273726422, "grad_norm": 0.7113762497901917, "learning_rate": 3.1486707262735784e-06, "loss": 0.0402, "step": 46130 }, { "epoch": 6.8528144957671175, "grad_norm": 0.5954686403274536, "learning_rate": 3.147185504232883e-06, "loss": 0.0543, "step": 46140 }, { "epoch": 6.854299717807812, "grad_norm": 1.0223079919815063, "learning_rate": 3.1457002821921877e-06, "loss": 0.0602, "step": 46150 }, { "epoch": 6.855784939848507, "grad_norm": 0.790874719619751, "learning_rate": 3.144215060151493e-06, "loss": 0.0587, "step": 46160 }, { "epoch": 6.857270161889202, "grad_norm": 1.2213834524154663, "learning_rate": 3.142729838110798e-06, "loss": 0.0795, "step": 46170 }, { "epoch": 6.858755383929897, "grad_norm": 1.4595249891281128, "learning_rate": 3.1412446160701027e-06, "loss": 0.0633, "step": 46180 }, { "epoch": 6.860240605970593, "grad_norm": 1.1774989366531372, "learning_rate": 3.1397593940294076e-06, "loss": 0.0525, "step": 46190 }, { "epoch": 6.861725828011288, "grad_norm": 0.7904123067855835, "learning_rate": 3.138274171988713e-06, "loss": 0.0699, "step": 46200 }, { "epoch": 6.863211050051983, "grad_norm": 0.8630386590957642, "learning_rate": 3.1367889499480177e-06, "loss": 0.0661, "step": 46210 }, { "epoch": 6.864696272092678, "grad_norm": 0.9028981328010559, "learning_rate": 3.135303727907322e-06, "loss": 0.0614, "step": 46220 }, { "epoch": 6.866181494133373, "grad_norm": 0.7527992129325867, "learning_rate": 3.133818505866627e-06, "loss": 0.0571, "step": 46230 }, { "epoch": 6.867666716174068, "grad_norm": 1.0797587633132935, "learning_rate": 3.1323332838259323e-06, "loss": 0.0481, "step": 46240 }, { "epoch": 6.869151938214763, "grad_norm": 0.7128764986991882, "learning_rate": 3.130848061785237e-06, "loss": 0.0694, "step": 46250 }, { "epoch": 6.870637160255458, "grad_norm": 0.7176929712295532, "learning_rate": 3.129362839744542e-06, "loss": 0.0587, "step": 46260 }, { "epoch": 6.872122382296153, "grad_norm": 1.0772123336791992, "learning_rate": 3.1278776177038473e-06, "loss": 0.0671, "step": 46270 }, { "epoch": 6.873607604336849, "grad_norm": 0.9489224553108215, "learning_rate": 3.126392395663152e-06, "loss": 0.0761, "step": 46280 }, { "epoch": 6.875092826377544, "grad_norm": 0.44016531109809875, "learning_rate": 3.1249071736224566e-06, "loss": 0.0462, "step": 46290 }, { "epoch": 6.876578048418239, "grad_norm": 1.233738899230957, "learning_rate": 3.1234219515817614e-06, "loss": 0.0654, "step": 46300 }, { "epoch": 6.8780632704589335, "grad_norm": 1.355086088180542, "learning_rate": 3.1219367295410667e-06, "loss": 0.0656, "step": 46310 }, { "epoch": 6.879548492499628, "grad_norm": 0.967938244342804, "learning_rate": 3.1204515075003716e-06, "loss": 0.0563, "step": 46320 }, { "epoch": 6.881033714540324, "grad_norm": 0.3975263833999634, "learning_rate": 3.1189662854596764e-06, "loss": 0.0569, "step": 46330 }, { "epoch": 6.882518936581019, "grad_norm": 0.5445032715797424, "learning_rate": 3.1174810634189813e-06, "loss": 0.0585, "step": 46340 }, { "epoch": 6.884004158621714, "grad_norm": 1.676100492477417, "learning_rate": 3.1159958413782866e-06, "loss": 0.0737, "step": 46350 }, { "epoch": 6.885489380662409, "grad_norm": 0.5582711100578308, "learning_rate": 3.1145106193375914e-06, "loss": 0.0411, "step": 46360 }, { "epoch": 6.886974602703104, "grad_norm": 0.9410350322723389, "learning_rate": 3.113025397296896e-06, "loss": 0.0392, "step": 46370 }, { "epoch": 6.8884598247438, "grad_norm": 1.3940622806549072, "learning_rate": 3.111540175256201e-06, "loss": 0.0542, "step": 46380 }, { "epoch": 6.8899450467844945, "grad_norm": 1.2696105241775513, "learning_rate": 3.110054953215506e-06, "loss": 0.0529, "step": 46390 }, { "epoch": 6.891430268825189, "grad_norm": 0.5525928735733032, "learning_rate": 3.108569731174811e-06, "loss": 0.0653, "step": 46400 }, { "epoch": 6.892915490865884, "grad_norm": 0.6907713413238525, "learning_rate": 3.1070845091341157e-06, "loss": 0.0557, "step": 46410 }, { "epoch": 6.894400712906579, "grad_norm": 1.9477370977401733, "learning_rate": 3.105599287093421e-06, "loss": 0.0398, "step": 46420 }, { "epoch": 6.895885934947275, "grad_norm": 0.5067228674888611, "learning_rate": 3.104114065052726e-06, "loss": 0.0618, "step": 46430 }, { "epoch": 6.89737115698797, "grad_norm": 0.6338545083999634, "learning_rate": 3.1026288430120307e-06, "loss": 0.0475, "step": 46440 }, { "epoch": 6.898856379028665, "grad_norm": 1.339288592338562, "learning_rate": 3.101143620971335e-06, "loss": 0.0614, "step": 46450 }, { "epoch": 6.90034160106936, "grad_norm": 0.7984129786491394, "learning_rate": 3.0996583989306404e-06, "loss": 0.0688, "step": 46460 }, { "epoch": 6.901826823110055, "grad_norm": 1.0853620767593384, "learning_rate": 3.0981731768899453e-06, "loss": 0.0624, "step": 46470 }, { "epoch": 6.90331204515075, "grad_norm": 0.7857335805892944, "learning_rate": 3.09668795484925e-06, "loss": 0.0731, "step": 46480 }, { "epoch": 6.904797267191445, "grad_norm": 0.6904709935188293, "learning_rate": 3.095202732808555e-06, "loss": 0.0528, "step": 46490 }, { "epoch": 6.90628248923214, "grad_norm": 1.523937702178955, "learning_rate": 3.0937175107678603e-06, "loss": 0.0723, "step": 46500 }, { "epoch": 6.907767711272835, "grad_norm": 0.5652097463607788, "learning_rate": 3.092232288727165e-06, "loss": 0.0645, "step": 46510 }, { "epoch": 6.90925293331353, "grad_norm": 1.3932520151138306, "learning_rate": 3.0907470666864696e-06, "loss": 0.0491, "step": 46520 }, { "epoch": 6.910738155354226, "grad_norm": 0.5495141744613647, "learning_rate": 3.0892618446457753e-06, "loss": 0.0622, "step": 46530 }, { "epoch": 6.912223377394921, "grad_norm": 0.7841812968254089, "learning_rate": 3.0877766226050797e-06, "loss": 0.0606, "step": 46540 }, { "epoch": 6.9137085994356156, "grad_norm": 0.9534539580345154, "learning_rate": 3.0862914005643845e-06, "loss": 0.0517, "step": 46550 }, { "epoch": 6.9151938214763105, "grad_norm": 0.7401015758514404, "learning_rate": 3.0848061785236894e-06, "loss": 0.0598, "step": 46560 }, { "epoch": 6.916679043517005, "grad_norm": 1.01905357837677, "learning_rate": 3.0833209564829947e-06, "loss": 0.0599, "step": 46570 }, { "epoch": 6.918164265557701, "grad_norm": 0.5511347055435181, "learning_rate": 3.0818357344422995e-06, "loss": 0.0563, "step": 46580 }, { "epoch": 6.919649487598396, "grad_norm": 0.8203628063201904, "learning_rate": 3.0803505124016044e-06, "loss": 0.0501, "step": 46590 }, { "epoch": 6.921134709639091, "grad_norm": 0.7230483889579773, "learning_rate": 3.078865290360909e-06, "loss": 0.0636, "step": 46600 }, { "epoch": 6.922619931679786, "grad_norm": 1.0204007625579834, "learning_rate": 3.077380068320214e-06, "loss": 0.046, "step": 46610 }, { "epoch": 6.924105153720481, "grad_norm": 0.5306640267372131, "learning_rate": 3.075894846279519e-06, "loss": 0.0564, "step": 46620 }, { "epoch": 6.9255903757611765, "grad_norm": 1.163796067237854, "learning_rate": 3.074409624238824e-06, "loss": 0.0685, "step": 46630 }, { "epoch": 6.927075597801871, "grad_norm": 0.9207706451416016, "learning_rate": 3.0729244021981287e-06, "loss": 0.0522, "step": 46640 }, { "epoch": 6.928560819842566, "grad_norm": 0.6923567652702332, "learning_rate": 3.071439180157434e-06, "loss": 0.0599, "step": 46650 }, { "epoch": 6.930046041883261, "grad_norm": 0.8811810612678528, "learning_rate": 3.069953958116739e-06, "loss": 0.0657, "step": 46660 }, { "epoch": 6.931531263923956, "grad_norm": 0.8357272148132324, "learning_rate": 3.0684687360760433e-06, "loss": 0.0537, "step": 46670 }, { "epoch": 6.933016485964652, "grad_norm": 1.2306370735168457, "learning_rate": 3.066983514035349e-06, "loss": 0.0692, "step": 46680 }, { "epoch": 6.934501708005347, "grad_norm": 0.9291362166404724, "learning_rate": 3.0654982919946534e-06, "loss": 0.0535, "step": 46690 }, { "epoch": 6.935986930046042, "grad_norm": 0.82795649766922, "learning_rate": 3.0640130699539582e-06, "loss": 0.0533, "step": 46700 }, { "epoch": 6.937472152086737, "grad_norm": 1.0339187383651733, "learning_rate": 3.062527847913263e-06, "loss": 0.0721, "step": 46710 }, { "epoch": 6.938957374127432, "grad_norm": 0.706620991230011, "learning_rate": 3.0610426258725684e-06, "loss": 0.0643, "step": 46720 }, { "epoch": 6.940442596168127, "grad_norm": 1.19780695438385, "learning_rate": 3.0595574038318732e-06, "loss": 0.071, "step": 46730 }, { "epoch": 6.941927818208822, "grad_norm": 0.41239285469055176, "learning_rate": 3.058072181791178e-06, "loss": 0.0618, "step": 46740 }, { "epoch": 6.943413040249517, "grad_norm": 0.7439904808998108, "learning_rate": 3.0565869597504825e-06, "loss": 0.0524, "step": 46750 }, { "epoch": 6.944898262290212, "grad_norm": 1.1534885168075562, "learning_rate": 3.055101737709788e-06, "loss": 0.0511, "step": 46760 }, { "epoch": 6.946383484330908, "grad_norm": 1.2603319883346558, "learning_rate": 3.0536165156690927e-06, "loss": 0.0475, "step": 46770 }, { "epoch": 6.947868706371603, "grad_norm": 0.8110623955726624, "learning_rate": 3.0521312936283975e-06, "loss": 0.0552, "step": 46780 }, { "epoch": 6.949353928412298, "grad_norm": 0.9178988337516785, "learning_rate": 3.050646071587703e-06, "loss": 0.0566, "step": 46790 }, { "epoch": 6.9508391504529925, "grad_norm": 1.1614978313446045, "learning_rate": 3.0491608495470077e-06, "loss": 0.0679, "step": 46800 }, { "epoch": 6.952324372493688, "grad_norm": 1.3334935903549194, "learning_rate": 3.0476756275063125e-06, "loss": 0.0358, "step": 46810 }, { "epoch": 6.953809594534383, "grad_norm": 1.5388178825378418, "learning_rate": 3.0461904054656174e-06, "loss": 0.0738, "step": 46820 }, { "epoch": 6.955294816575078, "grad_norm": 1.34645414352417, "learning_rate": 3.0447051834249227e-06, "loss": 0.0561, "step": 46830 }, { "epoch": 6.956780038615773, "grad_norm": 1.158138632774353, "learning_rate": 3.043219961384227e-06, "loss": 0.0492, "step": 46840 }, { "epoch": 6.958265260656468, "grad_norm": 0.43134617805480957, "learning_rate": 3.041734739343532e-06, "loss": 0.0549, "step": 46850 }, { "epoch": 6.959750482697164, "grad_norm": 0.8437008857727051, "learning_rate": 3.040249517302837e-06, "loss": 0.0746, "step": 46860 }, { "epoch": 6.961235704737859, "grad_norm": 1.0502889156341553, "learning_rate": 3.038764295262142e-06, "loss": 0.0463, "step": 46870 }, { "epoch": 6.9627209267785535, "grad_norm": 0.48640525341033936, "learning_rate": 3.037279073221447e-06, "loss": 0.0591, "step": 46880 }, { "epoch": 6.964206148819248, "grad_norm": 1.5872031450271606, "learning_rate": 3.035793851180752e-06, "loss": 0.0653, "step": 46890 }, { "epoch": 6.965691370859943, "grad_norm": 0.44661468267440796, "learning_rate": 3.0343086291400562e-06, "loss": 0.058, "step": 46900 }, { "epoch": 6.967176592900639, "grad_norm": 0.8914174437522888, "learning_rate": 3.032823407099362e-06, "loss": 0.0555, "step": 46910 }, { "epoch": 6.968661814941334, "grad_norm": 0.38050782680511475, "learning_rate": 3.0313381850586664e-06, "loss": 0.053, "step": 46920 }, { "epoch": 6.970147036982029, "grad_norm": 0.38610634207725525, "learning_rate": 3.0298529630179712e-06, "loss": 0.0738, "step": 46930 }, { "epoch": 6.971632259022724, "grad_norm": 1.3435474634170532, "learning_rate": 3.0283677409772765e-06, "loss": 0.0648, "step": 46940 }, { "epoch": 6.973117481063419, "grad_norm": 1.6155376434326172, "learning_rate": 3.0268825189365814e-06, "loss": 0.0642, "step": 46950 }, { "epoch": 6.9746027031041145, "grad_norm": 0.6376026272773743, "learning_rate": 3.0253972968958862e-06, "loss": 0.0509, "step": 46960 }, { "epoch": 6.976087925144809, "grad_norm": 1.0941927433013916, "learning_rate": 3.023912074855191e-06, "loss": 0.0715, "step": 46970 }, { "epoch": 6.977573147185504, "grad_norm": 1.121254324913025, "learning_rate": 3.0224268528144964e-06, "loss": 0.0567, "step": 46980 }, { "epoch": 6.979058369226199, "grad_norm": 1.4150434732437134, "learning_rate": 3.020941630773801e-06, "loss": 0.0661, "step": 46990 }, { "epoch": 6.980543591266894, "grad_norm": 0.7081685662269592, "learning_rate": 3.0194564087331056e-06, "loss": 0.0726, "step": 47000 }, { "epoch": 6.98202881330759, "grad_norm": 0.5111793279647827, "learning_rate": 3.0179711866924105e-06, "loss": 0.0451, "step": 47010 }, { "epoch": 6.983514035348285, "grad_norm": 1.0652034282684326, "learning_rate": 3.0164859646517158e-06, "loss": 0.055, "step": 47020 }, { "epoch": 6.98499925738898, "grad_norm": 0.7724589109420776, "learning_rate": 3.0150007426110206e-06, "loss": 0.0593, "step": 47030 }, { "epoch": 6.9864844794296745, "grad_norm": 1.0594000816345215, "learning_rate": 3.0135155205703255e-06, "loss": 0.0631, "step": 47040 }, { "epoch": 6.987969701470369, "grad_norm": 0.5354403853416443, "learning_rate": 3.0120302985296308e-06, "loss": 0.0618, "step": 47050 }, { "epoch": 6.989454923511065, "grad_norm": 0.9589335322380066, "learning_rate": 3.0105450764889356e-06, "loss": 0.0534, "step": 47060 }, { "epoch": 6.99094014555176, "grad_norm": 0.591218113899231, "learning_rate": 3.00905985444824e-06, "loss": 0.0506, "step": 47070 }, { "epoch": 6.992425367592455, "grad_norm": 1.093970537185669, "learning_rate": 3.007574632407545e-06, "loss": 0.0632, "step": 47080 }, { "epoch": 6.99391058963315, "grad_norm": 0.9521450996398926, "learning_rate": 3.00608941036685e-06, "loss": 0.0761, "step": 47090 }, { "epoch": 6.995395811673845, "grad_norm": 0.8672487139701843, "learning_rate": 3.004604188326155e-06, "loss": 0.0548, "step": 47100 }, { "epoch": 6.996881033714541, "grad_norm": 1.1248619556427002, "learning_rate": 3.00311896628546e-06, "loss": 0.0533, "step": 47110 }, { "epoch": 6.9983662557552355, "grad_norm": 0.7572886943817139, "learning_rate": 3.0016337442447648e-06, "loss": 0.0534, "step": 47120 }, { "epoch": 6.99985147779593, "grad_norm": 0.904132068157196, "learning_rate": 3.00014852220407e-06, "loss": 0.0623, "step": 47130 }, { "epoch": 7.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.05673398822546005, "eval_runtime": 213.0335, "eval_samples_per_second": 178.465, "eval_steps_per_second": 5.581, "step": 47131 }, { "epoch": 7.001336699836625, "grad_norm": 1.1677008867263794, "learning_rate": 2.998663300163375e-06, "loss": 0.0588, "step": 47140 }, { "epoch": 7.00282192187732, "grad_norm": 1.201698660850525, "learning_rate": 2.9971780781226793e-06, "loss": 0.0475, "step": 47150 }, { "epoch": 7.004307143918016, "grad_norm": 1.3719254732131958, "learning_rate": 2.995692856081984e-06, "loss": 0.0736, "step": 47160 }, { "epoch": 7.005792365958711, "grad_norm": 0.3711611032485962, "learning_rate": 2.9942076340412895e-06, "loss": 0.0631, "step": 47170 }, { "epoch": 7.007277587999406, "grad_norm": 0.5947009325027466, "learning_rate": 2.9927224120005943e-06, "loss": 0.0641, "step": 47180 }, { "epoch": 7.008762810040101, "grad_norm": 0.6703227162361145, "learning_rate": 2.991237189959899e-06, "loss": 0.0587, "step": 47190 }, { "epoch": 7.0102480320807965, "grad_norm": 0.85968416929245, "learning_rate": 2.9897519679192045e-06, "loss": 0.0553, "step": 47200 }, { "epoch": 7.011733254121491, "grad_norm": 0.6204405426979065, "learning_rate": 2.9882667458785093e-06, "loss": 0.0583, "step": 47210 }, { "epoch": 7.013218476162186, "grad_norm": 1.092622995376587, "learning_rate": 2.9867815238378138e-06, "loss": 0.0634, "step": 47220 }, { "epoch": 7.014703698202881, "grad_norm": 0.9893145561218262, "learning_rate": 2.9852963017971186e-06, "loss": 0.0891, "step": 47230 }, { "epoch": 7.016188920243576, "grad_norm": 0.6505508422851562, "learning_rate": 2.983811079756424e-06, "loss": 0.0728, "step": 47240 }, { "epoch": 7.017674142284272, "grad_norm": 2.0147671699523926, "learning_rate": 2.9823258577157288e-06, "loss": 0.0625, "step": 47250 }, { "epoch": 7.019159364324967, "grad_norm": 0.79218989610672, "learning_rate": 2.9808406356750336e-06, "loss": 0.0661, "step": 47260 }, { "epoch": 7.020644586365662, "grad_norm": 0.8125013113021851, "learning_rate": 2.9793554136343385e-06, "loss": 0.0577, "step": 47270 }, { "epoch": 7.022129808406357, "grad_norm": 0.9180750846862793, "learning_rate": 2.9778701915936438e-06, "loss": 0.0676, "step": 47280 }, { "epoch": 7.0236150304470515, "grad_norm": 0.8958239555358887, "learning_rate": 2.9763849695529486e-06, "loss": 0.0623, "step": 47290 }, { "epoch": 7.025100252487747, "grad_norm": 1.1006089448928833, "learning_rate": 2.974899747512253e-06, "loss": 0.0595, "step": 47300 }, { "epoch": 7.026585474528442, "grad_norm": 1.45045006275177, "learning_rate": 2.9734145254715583e-06, "loss": 0.0555, "step": 47310 }, { "epoch": 7.028070696569137, "grad_norm": 1.0360887050628662, "learning_rate": 2.971929303430863e-06, "loss": 0.0668, "step": 47320 }, { "epoch": 7.029555918609832, "grad_norm": 1.3869552612304688, "learning_rate": 2.970444081390168e-06, "loss": 0.0509, "step": 47330 }, { "epoch": 7.031041140650527, "grad_norm": 1.085456132888794, "learning_rate": 2.968958859349473e-06, "loss": 0.0613, "step": 47340 }, { "epoch": 7.032526362691223, "grad_norm": 1.137816071510315, "learning_rate": 2.967473637308778e-06, "loss": 0.0499, "step": 47350 }, { "epoch": 7.034011584731918, "grad_norm": 0.6190617680549622, "learning_rate": 2.965988415268083e-06, "loss": 0.0494, "step": 47360 }, { "epoch": 7.0354968067726125, "grad_norm": 0.7790008187294006, "learning_rate": 2.9645031932273875e-06, "loss": 0.0554, "step": 47370 }, { "epoch": 7.036982028813307, "grad_norm": 0.7529904246330261, "learning_rate": 2.9630179711866923e-06, "loss": 0.047, "step": 47380 }, { "epoch": 7.038467250854002, "grad_norm": 0.9241394400596619, "learning_rate": 2.9615327491459976e-06, "loss": 0.053, "step": 47390 }, { "epoch": 7.039952472894698, "grad_norm": 0.6133353114128113, "learning_rate": 2.9600475271053025e-06, "loss": 0.0574, "step": 47400 }, { "epoch": 7.041437694935393, "grad_norm": 1.049790620803833, "learning_rate": 2.9585623050646073e-06, "loss": 0.0523, "step": 47410 }, { "epoch": 7.042922916976088, "grad_norm": 1.0434339046478271, "learning_rate": 2.957077083023912e-06, "loss": 0.0488, "step": 47420 }, { "epoch": 7.044408139016783, "grad_norm": 0.4983573257923126, "learning_rate": 2.9555918609832175e-06, "loss": 0.0515, "step": 47430 }, { "epoch": 7.045893361057478, "grad_norm": 0.8298611640930176, "learning_rate": 2.9541066389425223e-06, "loss": 0.0521, "step": 47440 }, { "epoch": 7.047378583098173, "grad_norm": 1.118912935256958, "learning_rate": 2.9526214169018267e-06, "loss": 0.0501, "step": 47450 }, { "epoch": 7.048863805138868, "grad_norm": 1.4120548963546753, "learning_rate": 2.951136194861132e-06, "loss": 0.0662, "step": 47460 }, { "epoch": 7.050349027179563, "grad_norm": 0.8734990358352661, "learning_rate": 2.949650972820437e-06, "loss": 0.0572, "step": 47470 }, { "epoch": 7.051834249220258, "grad_norm": 1.285258412361145, "learning_rate": 2.9481657507797417e-06, "loss": 0.0516, "step": 47480 }, { "epoch": 7.053319471260954, "grad_norm": 1.261879324913025, "learning_rate": 2.9466805287390466e-06, "loss": 0.0517, "step": 47490 }, { "epoch": 7.054804693301649, "grad_norm": 1.157348871231079, "learning_rate": 2.945195306698352e-06, "loss": 0.0448, "step": 47500 }, { "epoch": 7.056289915342344, "grad_norm": 1.2821811437606812, "learning_rate": 2.9437100846576567e-06, "loss": 0.0491, "step": 47510 }, { "epoch": 7.057775137383039, "grad_norm": 0.40937334299087524, "learning_rate": 2.9422248626169616e-06, "loss": 0.0564, "step": 47520 }, { "epoch": 7.0592603594237335, "grad_norm": 0.4357692301273346, "learning_rate": 2.940739640576266e-06, "loss": 0.0635, "step": 47530 }, { "epoch": 7.060745581464429, "grad_norm": 1.136781096458435, "learning_rate": 2.9392544185355713e-06, "loss": 0.0522, "step": 47540 }, { "epoch": 7.062230803505124, "grad_norm": 1.0357050895690918, "learning_rate": 2.937769196494876e-06, "loss": 0.0569, "step": 47550 }, { "epoch": 7.063716025545819, "grad_norm": 0.6880903244018555, "learning_rate": 2.936283974454181e-06, "loss": 0.0573, "step": 47560 }, { "epoch": 7.065201247586514, "grad_norm": 0.8587028384208679, "learning_rate": 2.9347987524134863e-06, "loss": 0.0617, "step": 47570 }, { "epoch": 7.066686469627209, "grad_norm": 1.4754736423492432, "learning_rate": 2.933313530372791e-06, "loss": 0.0606, "step": 47580 }, { "epoch": 7.068171691667905, "grad_norm": 1.347342610359192, "learning_rate": 2.931828308332096e-06, "loss": 0.066, "step": 47590 }, { "epoch": 7.0696569137086, "grad_norm": 1.1508538722991943, "learning_rate": 2.9303430862914004e-06, "loss": 0.0544, "step": 47600 }, { "epoch": 7.0711421357492945, "grad_norm": 1.5262290239334106, "learning_rate": 2.928857864250706e-06, "loss": 0.0514, "step": 47610 }, { "epoch": 7.072627357789989, "grad_norm": 0.5285647511482239, "learning_rate": 2.9273726422100106e-06, "loss": 0.0499, "step": 47620 }, { "epoch": 7.074112579830684, "grad_norm": 0.4555477797985077, "learning_rate": 2.9258874201693154e-06, "loss": 0.0359, "step": 47630 }, { "epoch": 7.07559780187138, "grad_norm": 0.7044863700866699, "learning_rate": 2.9244021981286203e-06, "loss": 0.0638, "step": 47640 }, { "epoch": 7.077083023912075, "grad_norm": 1.0085748434066772, "learning_rate": 2.9229169760879256e-06, "loss": 0.0764, "step": 47650 }, { "epoch": 7.07856824595277, "grad_norm": 1.3109335899353027, "learning_rate": 2.9214317540472304e-06, "loss": 0.0607, "step": 47660 }, { "epoch": 7.080053467993465, "grad_norm": 0.44020575284957886, "learning_rate": 2.9199465320065353e-06, "loss": 0.0583, "step": 47670 }, { "epoch": 7.08153869003416, "grad_norm": 0.9954208731651306, "learning_rate": 2.9184613099658397e-06, "loss": 0.0534, "step": 47680 }, { "epoch": 7.0830239120748555, "grad_norm": 0.6965577006340027, "learning_rate": 2.916976087925145e-06, "loss": 0.0697, "step": 47690 }, { "epoch": 7.08450913411555, "grad_norm": 1.5124120712280273, "learning_rate": 2.91549086588445e-06, "loss": 0.0621, "step": 47700 }, { "epoch": 7.085994356156245, "grad_norm": 1.4099786281585693, "learning_rate": 2.9140056438437547e-06, "loss": 0.0685, "step": 47710 }, { "epoch": 7.08747957819694, "grad_norm": 0.9136437773704529, "learning_rate": 2.91252042180306e-06, "loss": 0.063, "step": 47720 }, { "epoch": 7.088964800237635, "grad_norm": 0.8798291087150574, "learning_rate": 2.911035199762365e-06, "loss": 0.0601, "step": 47730 }, { "epoch": 7.090450022278331, "grad_norm": 1.518417239189148, "learning_rate": 2.9095499777216697e-06, "loss": 0.049, "step": 47740 }, { "epoch": 7.091935244319026, "grad_norm": 0.8989495635032654, "learning_rate": 2.908064755680974e-06, "loss": 0.0536, "step": 47750 }, { "epoch": 7.093420466359721, "grad_norm": 1.1232954263687134, "learning_rate": 2.90657953364028e-06, "loss": 0.0645, "step": 47760 }, { "epoch": 7.094905688400416, "grad_norm": 1.2198522090911865, "learning_rate": 2.9050943115995843e-06, "loss": 0.0608, "step": 47770 }, { "epoch": 7.096390910441111, "grad_norm": 0.6304935812950134, "learning_rate": 2.903609089558889e-06, "loss": 0.0599, "step": 47780 }, { "epoch": 7.097876132481806, "grad_norm": 1.277036190032959, "learning_rate": 2.902123867518194e-06, "loss": 0.0537, "step": 47790 }, { "epoch": 7.099361354522501, "grad_norm": 0.8688114285469055, "learning_rate": 2.9006386454774993e-06, "loss": 0.0521, "step": 47800 }, { "epoch": 7.100846576563196, "grad_norm": 1.1619441509246826, "learning_rate": 2.899153423436804e-06, "loss": 0.0802, "step": 47810 }, { "epoch": 7.102331798603891, "grad_norm": 1.2505648136138916, "learning_rate": 2.897668201396109e-06, "loss": 0.0449, "step": 47820 }, { "epoch": 7.103817020644587, "grad_norm": 1.1496368646621704, "learning_rate": 2.8961829793554143e-06, "loss": 0.0743, "step": 47830 }, { "epoch": 7.105302242685282, "grad_norm": 0.9446761608123779, "learning_rate": 2.8946977573147187e-06, "loss": 0.0607, "step": 47840 }, { "epoch": 7.1067874647259766, "grad_norm": 0.35389065742492676, "learning_rate": 2.8932125352740236e-06, "loss": 0.0491, "step": 47850 }, { "epoch": 7.1082726867666715, "grad_norm": 0.4818675220012665, "learning_rate": 2.8917273132333284e-06, "loss": 0.041, "step": 47860 }, { "epoch": 7.109757908807366, "grad_norm": 1.4597423076629639, "learning_rate": 2.8902420911926337e-06, "loss": 0.0574, "step": 47870 }, { "epoch": 7.111243130848062, "grad_norm": 1.8125760555267334, "learning_rate": 2.8887568691519386e-06, "loss": 0.0627, "step": 47880 }, { "epoch": 7.112728352888757, "grad_norm": 0.7303992509841919, "learning_rate": 2.8872716471112434e-06, "loss": 0.0453, "step": 47890 }, { "epoch": 7.114213574929452, "grad_norm": 0.7685091495513916, "learning_rate": 2.8857864250705483e-06, "loss": 0.046, "step": 47900 }, { "epoch": 7.115698796970147, "grad_norm": 1.3156096935272217, "learning_rate": 2.8843012030298536e-06, "loss": 0.0608, "step": 47910 }, { "epoch": 7.117184019010842, "grad_norm": 0.556989848613739, "learning_rate": 2.882815980989158e-06, "loss": 0.0674, "step": 47920 }, { "epoch": 7.1186692410515375, "grad_norm": 0.4582745134830475, "learning_rate": 2.881330758948463e-06, "loss": 0.0505, "step": 47930 }, { "epoch": 7.120154463092232, "grad_norm": 0.7635936141014099, "learning_rate": 2.8798455369077677e-06, "loss": 0.0583, "step": 47940 }, { "epoch": 7.121639685132927, "grad_norm": 1.5454378128051758, "learning_rate": 2.878360314867073e-06, "loss": 0.0627, "step": 47950 }, { "epoch": 7.123124907173622, "grad_norm": 1.238945484161377, "learning_rate": 2.876875092826378e-06, "loss": 0.0666, "step": 47960 }, { "epoch": 7.124610129214317, "grad_norm": 0.886013388633728, "learning_rate": 2.8753898707856827e-06, "loss": 0.0593, "step": 47970 }, { "epoch": 7.126095351255013, "grad_norm": 0.6208621263504028, "learning_rate": 2.873904648744988e-06, "loss": 0.051, "step": 47980 }, { "epoch": 7.127580573295708, "grad_norm": 0.888614296913147, "learning_rate": 2.872419426704293e-06, "loss": 0.0587, "step": 47990 }, { "epoch": 7.129065795336403, "grad_norm": 0.8305061459541321, "learning_rate": 2.8709342046635973e-06, "loss": 0.0458, "step": 48000 }, { "epoch": 7.130551017377098, "grad_norm": 0.8907108306884766, "learning_rate": 2.869448982622902e-06, "loss": 0.0377, "step": 48010 }, { "epoch": 7.132036239417793, "grad_norm": 1.2513445615768433, "learning_rate": 2.8679637605822074e-06, "loss": 0.0579, "step": 48020 }, { "epoch": 7.133521461458488, "grad_norm": 0.8097134828567505, "learning_rate": 2.8664785385415123e-06, "loss": 0.054, "step": 48030 }, { "epoch": 7.135006683499183, "grad_norm": 0.735083281993866, "learning_rate": 2.864993316500817e-06, "loss": 0.0636, "step": 48040 }, { "epoch": 7.136491905539878, "grad_norm": 0.6065291166305542, "learning_rate": 2.863508094460122e-06, "loss": 0.0515, "step": 48050 }, { "epoch": 7.137977127580573, "grad_norm": 0.965104341506958, "learning_rate": 2.8620228724194273e-06, "loss": 0.0582, "step": 48060 }, { "epoch": 7.139462349621269, "grad_norm": 1.0279301404953003, "learning_rate": 2.8605376503787317e-06, "loss": 0.0504, "step": 48070 }, { "epoch": 7.140947571661964, "grad_norm": 0.6441022157669067, "learning_rate": 2.8590524283380365e-06, "loss": 0.0627, "step": 48080 }, { "epoch": 7.142432793702659, "grad_norm": 0.9285979866981506, "learning_rate": 2.8575672062973414e-06, "loss": 0.0516, "step": 48090 }, { "epoch": 7.1439180157433535, "grad_norm": 1.1106793880462646, "learning_rate": 2.8560819842566467e-06, "loss": 0.0791, "step": 48100 }, { "epoch": 7.145403237784048, "grad_norm": 0.6336337327957153, "learning_rate": 2.8545967622159515e-06, "loss": 0.0373, "step": 48110 }, { "epoch": 7.146888459824744, "grad_norm": 0.7305782437324524, "learning_rate": 2.8531115401752564e-06, "loss": 0.0568, "step": 48120 }, { "epoch": 7.148373681865439, "grad_norm": 0.8777934908866882, "learning_rate": 2.8516263181345617e-06, "loss": 0.0588, "step": 48130 }, { "epoch": 7.149858903906134, "grad_norm": 0.6388712525367737, "learning_rate": 2.8501410960938665e-06, "loss": 0.0405, "step": 48140 }, { "epoch": 7.151344125946829, "grad_norm": 0.9644005298614502, "learning_rate": 2.848655874053171e-06, "loss": 0.0517, "step": 48150 }, { "epoch": 7.152829347987524, "grad_norm": 0.5722053050994873, "learning_rate": 2.847170652012476e-06, "loss": 0.0615, "step": 48160 }, { "epoch": 7.15431457002822, "grad_norm": 0.550632119178772, "learning_rate": 2.845685429971781e-06, "loss": 0.0488, "step": 48170 }, { "epoch": 7.1557997920689145, "grad_norm": 0.8180745244026184, "learning_rate": 2.844200207931086e-06, "loss": 0.0569, "step": 48180 }, { "epoch": 7.157285014109609, "grad_norm": 0.5829613208770752, "learning_rate": 2.842714985890391e-06, "loss": 0.0505, "step": 48190 }, { "epoch": 7.158770236150304, "grad_norm": 1.0786627531051636, "learning_rate": 2.8412297638496957e-06, "loss": 0.0454, "step": 48200 }, { "epoch": 7.160255458190999, "grad_norm": 1.1173996925354004, "learning_rate": 2.839744541809001e-06, "loss": 0.0581, "step": 48210 }, { "epoch": 7.161740680231695, "grad_norm": 1.616842269897461, "learning_rate": 2.8382593197683054e-06, "loss": 0.0575, "step": 48220 }, { "epoch": 7.16322590227239, "grad_norm": 0.777524471282959, "learning_rate": 2.8367740977276102e-06, "loss": 0.0623, "step": 48230 }, { "epoch": 7.164711124313085, "grad_norm": 1.2010893821716309, "learning_rate": 2.8352888756869155e-06, "loss": 0.049, "step": 48240 }, { "epoch": 7.16619634635378, "grad_norm": 0.6864365339279175, "learning_rate": 2.8338036536462204e-06, "loss": 0.0428, "step": 48250 }, { "epoch": 7.167681568394475, "grad_norm": 0.8801079392433167, "learning_rate": 2.8323184316055252e-06, "loss": 0.0466, "step": 48260 }, { "epoch": 7.16916679043517, "grad_norm": 1.5361732244491577, "learning_rate": 2.83083320956483e-06, "loss": 0.0508, "step": 48270 }, { "epoch": 7.170652012475865, "grad_norm": 0.971454381942749, "learning_rate": 2.8293479875241354e-06, "loss": 0.055, "step": 48280 }, { "epoch": 7.17213723451656, "grad_norm": 0.7517418265342712, "learning_rate": 2.8278627654834402e-06, "loss": 0.0566, "step": 48290 }, { "epoch": 7.173622456557255, "grad_norm": 0.8058810234069824, "learning_rate": 2.8263775434427447e-06, "loss": 0.0694, "step": 48300 }, { "epoch": 7.17510767859795, "grad_norm": 1.1214815378189087, "learning_rate": 2.8248923214020495e-06, "loss": 0.0699, "step": 48310 }, { "epoch": 7.176592900638646, "grad_norm": 0.5619127750396729, "learning_rate": 2.823407099361355e-06, "loss": 0.0687, "step": 48320 }, { "epoch": 7.178078122679341, "grad_norm": 0.9164327383041382, "learning_rate": 2.8219218773206597e-06, "loss": 0.0538, "step": 48330 }, { "epoch": 7.1795633447200355, "grad_norm": 0.5329567193984985, "learning_rate": 2.8204366552799645e-06, "loss": 0.0601, "step": 48340 }, { "epoch": 7.1810485667607304, "grad_norm": 1.0147507190704346, "learning_rate": 2.8189514332392694e-06, "loss": 0.0596, "step": 48350 }, { "epoch": 7.182533788801426, "grad_norm": 0.6977734565734863, "learning_rate": 2.8174662111985747e-06, "loss": 0.0612, "step": 48360 }, { "epoch": 7.184019010842121, "grad_norm": 0.5618019700050354, "learning_rate": 2.8159809891578795e-06, "loss": 0.0517, "step": 48370 }, { "epoch": 7.185504232882816, "grad_norm": 1.5264066457748413, "learning_rate": 2.814495767117184e-06, "loss": 0.0598, "step": 48380 }, { "epoch": 7.186989454923511, "grad_norm": 0.5172926187515259, "learning_rate": 2.8130105450764892e-06, "loss": 0.0589, "step": 48390 }, { "epoch": 7.188474676964206, "grad_norm": 1.296660304069519, "learning_rate": 2.811525323035794e-06, "loss": 0.0504, "step": 48400 }, { "epoch": 7.189959899004902, "grad_norm": 1.113176703453064, "learning_rate": 2.810040100995099e-06, "loss": 0.0607, "step": 48410 }, { "epoch": 7.1914451210455965, "grad_norm": 0.9029462933540344, "learning_rate": 2.808554878954404e-06, "loss": 0.0646, "step": 48420 }, { "epoch": 7.192930343086291, "grad_norm": 0.4349440336227417, "learning_rate": 2.807069656913709e-06, "loss": 0.058, "step": 48430 }, { "epoch": 7.194415565126986, "grad_norm": 1.4266722202301025, "learning_rate": 2.805584434873014e-06, "loss": 0.0494, "step": 48440 }, { "epoch": 7.195900787167681, "grad_norm": 0.38038304448127747, "learning_rate": 2.8040992128323184e-06, "loss": 0.0596, "step": 48450 }, { "epoch": 7.197386009208377, "grad_norm": 0.9611129760742188, "learning_rate": 2.8026139907916232e-06, "loss": 0.0586, "step": 48460 }, { "epoch": 7.198871231249072, "grad_norm": 0.32369136810302734, "learning_rate": 2.8011287687509285e-06, "loss": 0.0497, "step": 48470 }, { "epoch": 7.200356453289767, "grad_norm": 0.7440458536148071, "learning_rate": 2.7996435467102334e-06, "loss": 0.0554, "step": 48480 }, { "epoch": 7.201841675330462, "grad_norm": 0.6804837584495544, "learning_rate": 2.7981583246695382e-06, "loss": 0.0476, "step": 48490 }, { "epoch": 7.203326897371157, "grad_norm": 0.7779265642166138, "learning_rate": 2.7966731026288435e-06, "loss": 0.0624, "step": 48500 }, { "epoch": 7.204812119411852, "grad_norm": 0.5164976716041565, "learning_rate": 2.7951878805881484e-06, "loss": 0.0582, "step": 48510 }, { "epoch": 7.206297341452547, "grad_norm": 1.5991668701171875, "learning_rate": 2.7937026585474532e-06, "loss": 0.0647, "step": 48520 }, { "epoch": 7.207782563493242, "grad_norm": 0.7785415649414062, "learning_rate": 2.7922174365067576e-06, "loss": 0.0482, "step": 48530 }, { "epoch": 7.209267785533937, "grad_norm": 0.5820591449737549, "learning_rate": 2.790732214466063e-06, "loss": 0.0433, "step": 48540 }, { "epoch": 7.210753007574632, "grad_norm": 0.9165753126144409, "learning_rate": 2.7892469924253678e-06, "loss": 0.0415, "step": 48550 }, { "epoch": 7.212238229615328, "grad_norm": 0.5261610746383667, "learning_rate": 2.7877617703846726e-06, "loss": 0.0451, "step": 48560 }, { "epoch": 7.213723451656023, "grad_norm": 1.694600224494934, "learning_rate": 2.7862765483439775e-06, "loss": 0.069, "step": 48570 }, { "epoch": 7.215208673696718, "grad_norm": 1.0975875854492188, "learning_rate": 2.7847913263032828e-06, "loss": 0.0409, "step": 48580 }, { "epoch": 7.2166938957374125, "grad_norm": 1.1013407707214355, "learning_rate": 2.7833061042625876e-06, "loss": 0.0654, "step": 48590 }, { "epoch": 7.218179117778108, "grad_norm": 1.3050427436828613, "learning_rate": 2.7818208822218925e-06, "loss": 0.052, "step": 48600 }, { "epoch": 7.219664339818803, "grad_norm": 0.5271753668785095, "learning_rate": 2.780335660181197e-06, "loss": 0.045, "step": 48610 }, { "epoch": 7.221149561859498, "grad_norm": 0.8320847749710083, "learning_rate": 2.778850438140502e-06, "loss": 0.0574, "step": 48620 }, { "epoch": 7.222634783900193, "grad_norm": 0.9930746555328369, "learning_rate": 2.777365216099807e-06, "loss": 0.057, "step": 48630 }, { "epoch": 7.224120005940888, "grad_norm": 0.9369096159934998, "learning_rate": 2.775879994059112e-06, "loss": 0.0664, "step": 48640 }, { "epoch": 7.225605227981584, "grad_norm": 1.8006670475006104, "learning_rate": 2.774394772018417e-06, "loss": 0.0698, "step": 48650 }, { "epoch": 7.227090450022279, "grad_norm": 0.9909857511520386, "learning_rate": 2.772909549977722e-06, "loss": 0.0613, "step": 48660 }, { "epoch": 7.2285756720629735, "grad_norm": 0.6781131625175476, "learning_rate": 2.771424327937027e-06, "loss": 0.0426, "step": 48670 }, { "epoch": 7.230060894103668, "grad_norm": 0.7474071383476257, "learning_rate": 2.7699391058963313e-06, "loss": 0.0464, "step": 48680 }, { "epoch": 7.231546116144363, "grad_norm": 0.791830837726593, "learning_rate": 2.768453883855637e-06, "loss": 0.0695, "step": 48690 }, { "epoch": 7.233031338185059, "grad_norm": 1.9448063373565674, "learning_rate": 2.7669686618149415e-06, "loss": 0.0523, "step": 48700 }, { "epoch": 7.234516560225754, "grad_norm": 0.4355827867984772, "learning_rate": 2.7654834397742463e-06, "loss": 0.0551, "step": 48710 }, { "epoch": 7.236001782266449, "grad_norm": 0.8302456140518188, "learning_rate": 2.763998217733551e-06, "loss": 0.0597, "step": 48720 }, { "epoch": 7.237487004307144, "grad_norm": 0.36988481879234314, "learning_rate": 2.7625129956928565e-06, "loss": 0.0431, "step": 48730 }, { "epoch": 7.238972226347839, "grad_norm": 0.8383266925811768, "learning_rate": 2.7610277736521613e-06, "loss": 0.0663, "step": 48740 }, { "epoch": 7.2404574483885344, "grad_norm": 0.5984920263290405, "learning_rate": 2.759542551611466e-06, "loss": 0.0577, "step": 48750 }, { "epoch": 7.241942670429229, "grad_norm": 1.1798092126846313, "learning_rate": 2.7580573295707715e-06, "loss": 0.0457, "step": 48760 }, { "epoch": 7.243427892469924, "grad_norm": 1.5372233390808105, "learning_rate": 2.756572107530076e-06, "loss": 0.0417, "step": 48770 }, { "epoch": 7.244913114510619, "grad_norm": 0.7862002849578857, "learning_rate": 2.7550868854893808e-06, "loss": 0.051, "step": 48780 }, { "epoch": 7.246398336551314, "grad_norm": 1.1187494993209839, "learning_rate": 2.7536016634486856e-06, "loss": 0.0469, "step": 48790 }, { "epoch": 7.24788355859201, "grad_norm": 1.2784112691879272, "learning_rate": 2.752116441407991e-06, "loss": 0.0543, "step": 48800 }, { "epoch": 7.249368780632705, "grad_norm": 0.7661743760108948, "learning_rate": 2.7506312193672958e-06, "loss": 0.0743, "step": 48810 }, { "epoch": 7.2508540026734, "grad_norm": 1.1764309406280518, "learning_rate": 2.7491459973266006e-06, "loss": 0.0626, "step": 48820 }, { "epoch": 7.2523392247140945, "grad_norm": 0.6383646726608276, "learning_rate": 2.747660775285905e-06, "loss": 0.059, "step": 48830 }, { "epoch": 7.253824446754789, "grad_norm": 1.1246609687805176, "learning_rate": 2.7461755532452108e-06, "loss": 0.0557, "step": 48840 }, { "epoch": 7.255309668795485, "grad_norm": 0.9652683734893799, "learning_rate": 2.744690331204515e-06, "loss": 0.0635, "step": 48850 }, { "epoch": 7.25679489083618, "grad_norm": 0.890220046043396, "learning_rate": 2.74320510916382e-06, "loss": 0.0586, "step": 48860 }, { "epoch": 7.258280112876875, "grad_norm": 0.9741783738136292, "learning_rate": 2.741719887123125e-06, "loss": 0.05, "step": 48870 }, { "epoch": 7.25976533491757, "grad_norm": 0.8578696250915527, "learning_rate": 2.74023466508243e-06, "loss": 0.0471, "step": 48880 }, { "epoch": 7.261250556958265, "grad_norm": 0.3909326195716858, "learning_rate": 2.738749443041735e-06, "loss": 0.0516, "step": 48890 }, { "epoch": 7.262735778998961, "grad_norm": 0.4995374083518982, "learning_rate": 2.73726422100104e-06, "loss": 0.0494, "step": 48900 }, { "epoch": 7.2642210010396555, "grad_norm": 1.06190824508667, "learning_rate": 2.735778998960345e-06, "loss": 0.0531, "step": 48910 }, { "epoch": 7.26570622308035, "grad_norm": 0.5144338607788086, "learning_rate": 2.7342937769196496e-06, "loss": 0.0656, "step": 48920 }, { "epoch": 7.267191445121045, "grad_norm": 0.649617612361908, "learning_rate": 2.7328085548789545e-06, "loss": 0.066, "step": 48930 }, { "epoch": 7.26867666716174, "grad_norm": 1.098474383354187, "learning_rate": 2.7313233328382593e-06, "loss": 0.045, "step": 48940 }, { "epoch": 7.270161889202436, "grad_norm": 0.9607246518135071, "learning_rate": 2.7298381107975646e-06, "loss": 0.049, "step": 48950 }, { "epoch": 7.271647111243131, "grad_norm": 0.7913243770599365, "learning_rate": 2.7283528887568695e-06, "loss": 0.0401, "step": 48960 }, { "epoch": 7.273132333283826, "grad_norm": 1.0159389972686768, "learning_rate": 2.7268676667161743e-06, "loss": 0.0648, "step": 48970 }, { "epoch": 7.274617555324521, "grad_norm": 0.6008278131484985, "learning_rate": 2.725382444675479e-06, "loss": 0.0445, "step": 48980 }, { "epoch": 7.2761027773652165, "grad_norm": 0.5866870880126953, "learning_rate": 2.7238972226347845e-06, "loss": 0.063, "step": 48990 }, { "epoch": 7.277587999405911, "grad_norm": 0.5861631035804749, "learning_rate": 2.722412000594089e-06, "loss": 0.0534, "step": 49000 }, { "epoch": 7.279073221446606, "grad_norm": 2.045478582382202, "learning_rate": 2.7209267785533937e-06, "loss": 0.0855, "step": 49010 }, { "epoch": 7.280558443487301, "grad_norm": 0.6939730644226074, "learning_rate": 2.719441556512699e-06, "loss": 0.0627, "step": 49020 }, { "epoch": 7.282043665527996, "grad_norm": 0.5861126780509949, "learning_rate": 2.717956334472004e-06, "loss": 0.049, "step": 49030 }, { "epoch": 7.283528887568692, "grad_norm": 1.1887887716293335, "learning_rate": 2.7164711124313087e-06, "loss": 0.0506, "step": 49040 }, { "epoch": 7.285014109609387, "grad_norm": 0.3314265310764313, "learning_rate": 2.7149858903906136e-06, "loss": 0.0403, "step": 49050 }, { "epoch": 7.286499331650082, "grad_norm": 1.404929518699646, "learning_rate": 2.713500668349919e-06, "loss": 0.0569, "step": 49060 }, { "epoch": 7.287984553690777, "grad_norm": 0.8105013370513916, "learning_rate": 2.7120154463092237e-06, "loss": 0.0618, "step": 49070 }, { "epoch": 7.2894697757314715, "grad_norm": 0.7986222505569458, "learning_rate": 2.710530224268528e-06, "loss": 0.0477, "step": 49080 }, { "epoch": 7.290954997772167, "grad_norm": 1.259995460510254, "learning_rate": 2.709045002227833e-06, "loss": 0.0533, "step": 49090 }, { "epoch": 7.292440219812862, "grad_norm": 0.9592974781990051, "learning_rate": 2.7075597801871383e-06, "loss": 0.0497, "step": 49100 }, { "epoch": 7.293925441853557, "grad_norm": 0.5274348258972168, "learning_rate": 2.706074558146443e-06, "loss": 0.0425, "step": 49110 }, { "epoch": 7.295410663894252, "grad_norm": 0.26715418696403503, "learning_rate": 2.704589336105748e-06, "loss": 0.0707, "step": 49120 }, { "epoch": 7.296895885934947, "grad_norm": 1.076434850692749, "learning_rate": 2.703104114065053e-06, "loss": 0.0803, "step": 49130 }, { "epoch": 7.298381107975643, "grad_norm": 0.31761565804481506, "learning_rate": 2.701618892024358e-06, "loss": 0.0465, "step": 49140 }, { "epoch": 7.299866330016338, "grad_norm": 0.6720213294029236, "learning_rate": 2.7001336699836626e-06, "loss": 0.0476, "step": 49150 }, { "epoch": 7.3013515520570325, "grad_norm": 1.105984091758728, "learning_rate": 2.6986484479429674e-06, "loss": 0.0452, "step": 49160 }, { "epoch": 7.302836774097727, "grad_norm": 1.1170014142990112, "learning_rate": 2.6971632259022727e-06, "loss": 0.057, "step": 49170 }, { "epoch": 7.304321996138423, "grad_norm": 1.4183934926986694, "learning_rate": 2.6956780038615776e-06, "loss": 0.0737, "step": 49180 }, { "epoch": 7.305807218179118, "grad_norm": 0.9773399829864502, "learning_rate": 2.6941927818208824e-06, "loss": 0.0402, "step": 49190 }, { "epoch": 7.307292440219813, "grad_norm": 0.5806409120559692, "learning_rate": 2.6927075597801873e-06, "loss": 0.04, "step": 49200 }, { "epoch": 7.308777662260508, "grad_norm": 0.7879276275634766, "learning_rate": 2.6912223377394926e-06, "loss": 0.0489, "step": 49210 }, { "epoch": 7.310262884301203, "grad_norm": 0.8717116713523865, "learning_rate": 2.6897371156987974e-06, "loss": 0.0683, "step": 49220 }, { "epoch": 7.3117481063418985, "grad_norm": 0.9644998908042908, "learning_rate": 2.688251893658102e-06, "loss": 0.0598, "step": 49230 }, { "epoch": 7.313233328382593, "grad_norm": 0.4911693334579468, "learning_rate": 2.6867666716174067e-06, "loss": 0.0743, "step": 49240 }, { "epoch": 7.314718550423288, "grad_norm": 1.0501363277435303, "learning_rate": 2.685281449576712e-06, "loss": 0.0755, "step": 49250 }, { "epoch": 7.316203772463983, "grad_norm": 0.6555094718933105, "learning_rate": 2.683796227536017e-06, "loss": 0.0685, "step": 49260 }, { "epoch": 7.317688994504678, "grad_norm": 1.0857270956039429, "learning_rate": 2.6823110054953217e-06, "loss": 0.0617, "step": 49270 }, { "epoch": 7.319174216545374, "grad_norm": 1.108014702796936, "learning_rate": 2.680825783454627e-06, "loss": 0.0573, "step": 49280 }, { "epoch": 7.320659438586069, "grad_norm": 0.713036835193634, "learning_rate": 2.679340561413932e-06, "loss": 0.0479, "step": 49290 }, { "epoch": 7.322144660626764, "grad_norm": 0.8778752088546753, "learning_rate": 2.6778553393732363e-06, "loss": 0.0429, "step": 49300 }, { "epoch": 7.323629882667459, "grad_norm": 0.5056967735290527, "learning_rate": 2.676370117332541e-06, "loss": 0.0466, "step": 49310 }, { "epoch": 7.3251151047081535, "grad_norm": 1.1081126928329468, "learning_rate": 2.6748848952918464e-06, "loss": 0.0545, "step": 49320 }, { "epoch": 7.326600326748849, "grad_norm": 0.40196236968040466, "learning_rate": 2.6733996732511513e-06, "loss": 0.0563, "step": 49330 }, { "epoch": 7.328085548789544, "grad_norm": 1.0329406261444092, "learning_rate": 2.671914451210456e-06, "loss": 0.0597, "step": 49340 }, { "epoch": 7.329570770830239, "grad_norm": 1.0409470796585083, "learning_rate": 2.670429229169761e-06, "loss": 0.056, "step": 49350 }, { "epoch": 7.331055992870934, "grad_norm": 1.3227382898330688, "learning_rate": 2.6689440071290663e-06, "loss": 0.0516, "step": 49360 }, { "epoch": 7.332541214911629, "grad_norm": 0.9763649702072144, "learning_rate": 2.667458785088371e-06, "loss": 0.0756, "step": 49370 }, { "epoch": 7.334026436952325, "grad_norm": 0.8181225657463074, "learning_rate": 2.6659735630476756e-06, "loss": 0.0639, "step": 49380 }, { "epoch": 7.33551165899302, "grad_norm": 1.3974295854568481, "learning_rate": 2.6644883410069804e-06, "loss": 0.0633, "step": 49390 }, { "epoch": 7.3369968810337145, "grad_norm": 0.905113935470581, "learning_rate": 2.6630031189662857e-06, "loss": 0.0689, "step": 49400 }, { "epoch": 7.338482103074409, "grad_norm": 0.6330129504203796, "learning_rate": 2.6615178969255906e-06, "loss": 0.0512, "step": 49410 }, { "epoch": 7.339967325115104, "grad_norm": 0.45538121461868286, "learning_rate": 2.6600326748848954e-06, "loss": 0.0585, "step": 49420 }, { "epoch": 7.3414525471558, "grad_norm": 1.3823957443237305, "learning_rate": 2.6585474528442007e-06, "loss": 0.0502, "step": 49430 }, { "epoch": 7.342937769196495, "grad_norm": 1.288536548614502, "learning_rate": 2.6570622308035056e-06, "loss": 0.0524, "step": 49440 }, { "epoch": 7.34442299123719, "grad_norm": 0.4768843948841095, "learning_rate": 2.6555770087628104e-06, "loss": 0.0462, "step": 49450 }, { "epoch": 7.345908213277885, "grad_norm": 0.8534879088401794, "learning_rate": 2.654091786722115e-06, "loss": 0.0392, "step": 49460 }, { "epoch": 7.34739343531858, "grad_norm": 1.0308243036270142, "learning_rate": 2.65260656468142e-06, "loss": 0.046, "step": 49470 }, { "epoch": 7.3488786573592755, "grad_norm": 0.5795043110847473, "learning_rate": 2.651121342640725e-06, "loss": 0.0486, "step": 49480 }, { "epoch": 7.35036387939997, "grad_norm": 0.5937952995300293, "learning_rate": 2.64963612060003e-06, "loss": 0.0654, "step": 49490 }, { "epoch": 7.351849101440665, "grad_norm": 0.793193519115448, "learning_rate": 2.6481508985593347e-06, "loss": 0.0508, "step": 49500 }, { "epoch": 7.35333432348136, "grad_norm": 1.429418921470642, "learning_rate": 2.64666567651864e-06, "loss": 0.0566, "step": 49510 }, { "epoch": 7.354819545522055, "grad_norm": 0.6977571845054626, "learning_rate": 2.645180454477945e-06, "loss": 0.0517, "step": 49520 }, { "epoch": 7.356304767562751, "grad_norm": 0.6724829077720642, "learning_rate": 2.6436952324372493e-06, "loss": 0.0688, "step": 49530 }, { "epoch": 7.357789989603446, "grad_norm": 0.5872288346290588, "learning_rate": 2.642210010396554e-06, "loss": 0.0661, "step": 49540 }, { "epoch": 7.359275211644141, "grad_norm": 0.7089404463768005, "learning_rate": 2.6407247883558594e-06, "loss": 0.0604, "step": 49550 }, { "epoch": 7.360760433684836, "grad_norm": 1.2058686017990112, "learning_rate": 2.6392395663151643e-06, "loss": 0.0555, "step": 49560 }, { "epoch": 7.362245655725531, "grad_norm": 0.7654306888580322, "learning_rate": 2.637754344274469e-06, "loss": 0.0664, "step": 49570 }, { "epoch": 7.363730877766226, "grad_norm": 0.548048734664917, "learning_rate": 2.6362691222337744e-06, "loss": 0.0561, "step": 49580 }, { "epoch": 7.365216099806921, "grad_norm": 0.9300860166549683, "learning_rate": 2.6347839001930793e-06, "loss": 0.0604, "step": 49590 }, { "epoch": 7.366701321847616, "grad_norm": 0.4907565116882324, "learning_rate": 2.633298678152384e-06, "loss": 0.0565, "step": 49600 }, { "epoch": 7.368186543888311, "grad_norm": 0.4754343032836914, "learning_rate": 2.6318134561116885e-06, "loss": 0.0431, "step": 49610 }, { "epoch": 7.369671765929007, "grad_norm": 0.9544875621795654, "learning_rate": 2.630328234070994e-06, "loss": 0.0464, "step": 49620 }, { "epoch": 7.371156987969702, "grad_norm": 1.2123874425888062, "learning_rate": 2.6288430120302987e-06, "loss": 0.0611, "step": 49630 }, { "epoch": 7.3726422100103965, "grad_norm": 0.28933414816856384, "learning_rate": 2.6273577899896035e-06, "loss": 0.0633, "step": 49640 }, { "epoch": 7.3741274320510914, "grad_norm": 1.4906394481658936, "learning_rate": 2.6258725679489084e-06, "loss": 0.0613, "step": 49650 }, { "epoch": 7.375612654091786, "grad_norm": 0.9483470320701599, "learning_rate": 2.6243873459082137e-06, "loss": 0.0454, "step": 49660 }, { "epoch": 7.377097876132482, "grad_norm": 1.8152319192886353, "learning_rate": 2.6229021238675185e-06, "loss": 0.0736, "step": 49670 }, { "epoch": 7.378583098173177, "grad_norm": 0.5155038833618164, "learning_rate": 2.6214169018268234e-06, "loss": 0.05, "step": 49680 }, { "epoch": 7.380068320213872, "grad_norm": 0.7419806718826294, "learning_rate": 2.6199316797861287e-06, "loss": 0.0566, "step": 49690 }, { "epoch": 7.381553542254567, "grad_norm": 1.56878662109375, "learning_rate": 2.618446457745433e-06, "loss": 0.0645, "step": 49700 }, { "epoch": 7.383038764295262, "grad_norm": 0.6593931913375854, "learning_rate": 2.616961235704738e-06, "loss": 0.0588, "step": 49710 }, { "epoch": 7.3845239863359575, "grad_norm": 1.0526626110076904, "learning_rate": 2.615476013664043e-06, "loss": 0.0613, "step": 49720 }, { "epoch": 7.386009208376652, "grad_norm": 1.0721087455749512, "learning_rate": 2.613990791623348e-06, "loss": 0.0665, "step": 49730 }, { "epoch": 7.387494430417347, "grad_norm": 0.5129325985908508, "learning_rate": 2.612505569582653e-06, "loss": 0.0565, "step": 49740 }, { "epoch": 7.388979652458042, "grad_norm": 0.7529094219207764, "learning_rate": 2.611020347541958e-06, "loss": 0.0632, "step": 49750 }, { "epoch": 7.390464874498738, "grad_norm": 0.9041786789894104, "learning_rate": 2.6095351255012622e-06, "loss": 0.0664, "step": 49760 }, { "epoch": 7.391950096539433, "grad_norm": 0.5812846422195435, "learning_rate": 2.608049903460568e-06, "loss": 0.0495, "step": 49770 }, { "epoch": 7.393435318580128, "grad_norm": 1.0239791870117188, "learning_rate": 2.6065646814198724e-06, "loss": 0.0541, "step": 49780 }, { "epoch": 7.394920540620823, "grad_norm": 0.5622916221618652, "learning_rate": 2.6050794593791772e-06, "loss": 0.0532, "step": 49790 }, { "epoch": 7.396405762661518, "grad_norm": 1.1932148933410645, "learning_rate": 2.603594237338482e-06, "loss": 0.0496, "step": 49800 }, { "epoch": 7.397890984702213, "grad_norm": 1.2361640930175781, "learning_rate": 2.6021090152977874e-06, "loss": 0.0689, "step": 49810 }, { "epoch": 7.399376206742908, "grad_norm": 1.5666189193725586, "learning_rate": 2.6006237932570922e-06, "loss": 0.0653, "step": 49820 }, { "epoch": 7.400861428783603, "grad_norm": 0.3307894766330719, "learning_rate": 2.599138571216397e-06, "loss": 0.0508, "step": 49830 }, { "epoch": 7.402346650824298, "grad_norm": 1.0216509103775024, "learning_rate": 2.5976533491757024e-06, "loss": 0.0706, "step": 49840 }, { "epoch": 7.403831872864993, "grad_norm": 0.5628541111946106, "learning_rate": 2.596168127135007e-06, "loss": 0.0453, "step": 49850 }, { "epoch": 7.405317094905689, "grad_norm": 0.6356275677680969, "learning_rate": 2.5946829050943117e-06, "loss": 0.0502, "step": 49860 }, { "epoch": 7.406802316946384, "grad_norm": 0.8141928911209106, "learning_rate": 2.5931976830536165e-06, "loss": 0.0712, "step": 49870 }, { "epoch": 7.408287538987079, "grad_norm": 0.5898461937904358, "learning_rate": 2.591712461012922e-06, "loss": 0.0595, "step": 49880 }, { "epoch": 7.4097727610277735, "grad_norm": 1.0788843631744385, "learning_rate": 2.5902272389722267e-06, "loss": 0.0512, "step": 49890 }, { "epoch": 7.411257983068468, "grad_norm": 0.790094792842865, "learning_rate": 2.5887420169315315e-06, "loss": 0.0583, "step": 49900 }, { "epoch": 7.412743205109164, "grad_norm": 1.4508802890777588, "learning_rate": 2.587256794890836e-06, "loss": 0.0608, "step": 49910 }, { "epoch": 7.414228427149859, "grad_norm": 0.36482828855514526, "learning_rate": 2.5857715728501416e-06, "loss": 0.0665, "step": 49920 }, { "epoch": 7.415713649190554, "grad_norm": 0.8149128556251526, "learning_rate": 2.584286350809446e-06, "loss": 0.0533, "step": 49930 }, { "epoch": 7.417198871231249, "grad_norm": 1.4138531684875488, "learning_rate": 2.582801128768751e-06, "loss": 0.0476, "step": 49940 }, { "epoch": 7.418684093271944, "grad_norm": 1.1289254426956177, "learning_rate": 2.5813159067280562e-06, "loss": 0.0567, "step": 49950 }, { "epoch": 7.42016931531264, "grad_norm": 0.5501538515090942, "learning_rate": 2.579830684687361e-06, "loss": 0.0384, "step": 49960 }, { "epoch": 7.4216545373533345, "grad_norm": 0.5963647961616516, "learning_rate": 2.578345462646666e-06, "loss": 0.0418, "step": 49970 }, { "epoch": 7.423139759394029, "grad_norm": 0.9482192993164062, "learning_rate": 2.576860240605971e-06, "loss": 0.0471, "step": 49980 }, { "epoch": 7.424624981434724, "grad_norm": 0.9947309494018555, "learning_rate": 2.575375018565276e-06, "loss": 0.0615, "step": 49990 }, { "epoch": 7.426110203475419, "grad_norm": 0.7360773682594299, "learning_rate": 2.5738897965245805e-06, "loss": 0.0619, "step": 50000 }, { "epoch": 7.427595425516115, "grad_norm": 1.2429487705230713, "learning_rate": 2.5724045744838854e-06, "loss": 0.0536, "step": 50010 }, { "epoch": 7.42908064755681, "grad_norm": 0.48328685760498047, "learning_rate": 2.5709193524431902e-06, "loss": 0.0524, "step": 50020 }, { "epoch": 7.430565869597505, "grad_norm": 0.6668089628219604, "learning_rate": 2.5694341304024955e-06, "loss": 0.0378, "step": 50030 }, { "epoch": 7.4320510916382, "grad_norm": 0.7315511703491211, "learning_rate": 2.5679489083618004e-06, "loss": 0.0507, "step": 50040 }, { "epoch": 7.433536313678895, "grad_norm": 0.9499955773353577, "learning_rate": 2.566463686321105e-06, "loss": 0.0583, "step": 50050 }, { "epoch": 7.43502153571959, "grad_norm": 1.1920688152313232, "learning_rate": 2.56497846428041e-06, "loss": 0.0585, "step": 50060 }, { "epoch": 7.436506757760285, "grad_norm": 0.6322932243347168, "learning_rate": 2.5634932422397153e-06, "loss": 0.0559, "step": 50070 }, { "epoch": 7.43799197980098, "grad_norm": 0.2330750972032547, "learning_rate": 2.5620080201990198e-06, "loss": 0.0438, "step": 50080 }, { "epoch": 7.439477201841675, "grad_norm": 1.0158408880233765, "learning_rate": 2.5605227981583246e-06, "loss": 0.0465, "step": 50090 }, { "epoch": 7.44096242388237, "grad_norm": 0.9743531942367554, "learning_rate": 2.55903757611763e-06, "loss": 0.0577, "step": 50100 }, { "epoch": 7.442447645923066, "grad_norm": 1.198718786239624, "learning_rate": 2.5575523540769348e-06, "loss": 0.0546, "step": 50110 }, { "epoch": 7.443932867963761, "grad_norm": 0.6434746384620667, "learning_rate": 2.5560671320362396e-06, "loss": 0.0647, "step": 50120 }, { "epoch": 7.4454180900044555, "grad_norm": 2.427293539047241, "learning_rate": 2.5545819099955445e-06, "loss": 0.0567, "step": 50130 }, { "epoch": 7.44690331204515, "grad_norm": 0.7034928202629089, "learning_rate": 2.5530966879548498e-06, "loss": 0.0615, "step": 50140 }, { "epoch": 7.448388534085846, "grad_norm": 0.41088375449180603, "learning_rate": 2.5516114659141546e-06, "loss": 0.044, "step": 50150 }, { "epoch": 7.449873756126541, "grad_norm": 0.7987599968910217, "learning_rate": 2.550126243873459e-06, "loss": 0.0553, "step": 50160 }, { "epoch": 7.451358978167236, "grad_norm": 1.0732996463775635, "learning_rate": 2.548641021832764e-06, "loss": 0.0567, "step": 50170 }, { "epoch": 7.452844200207931, "grad_norm": 0.3790551722049713, "learning_rate": 2.547155799792069e-06, "loss": 0.0524, "step": 50180 }, { "epoch": 7.454329422248626, "grad_norm": 1.484919548034668, "learning_rate": 2.545670577751374e-06, "loss": 0.055, "step": 50190 }, { "epoch": 7.455814644289322, "grad_norm": 0.797602117061615, "learning_rate": 2.544185355710679e-06, "loss": 0.0697, "step": 50200 }, { "epoch": 7.4572998663300165, "grad_norm": 0.8872159123420715, "learning_rate": 2.542700133669984e-06, "loss": 0.0582, "step": 50210 }, { "epoch": 7.458785088370711, "grad_norm": 1.2550395727157593, "learning_rate": 2.541214911629289e-06, "loss": 0.0584, "step": 50220 }, { "epoch": 7.460270310411406, "grad_norm": 0.709187388420105, "learning_rate": 2.5397296895885935e-06, "loss": 0.0453, "step": 50230 }, { "epoch": 7.461755532452101, "grad_norm": 1.2298972606658936, "learning_rate": 2.5382444675478983e-06, "loss": 0.0515, "step": 50240 }, { "epoch": 7.463240754492797, "grad_norm": 0.9435555934906006, "learning_rate": 2.5367592455072036e-06, "loss": 0.0594, "step": 50250 }, { "epoch": 7.464725976533492, "grad_norm": 1.29453706741333, "learning_rate": 2.5352740234665085e-06, "loss": 0.0544, "step": 50260 }, { "epoch": 7.466211198574187, "grad_norm": 0.9184277653694153, "learning_rate": 2.5337888014258133e-06, "loss": 0.0627, "step": 50270 }, { "epoch": 7.467696420614882, "grad_norm": 0.1898077428340912, "learning_rate": 2.532303579385118e-06, "loss": 0.0449, "step": 50280 }, { "epoch": 7.469181642655577, "grad_norm": 0.5328799486160278, "learning_rate": 2.5308183573444235e-06, "loss": 0.0458, "step": 50290 }, { "epoch": 7.470666864696272, "grad_norm": 0.43151047825813293, "learning_rate": 2.5293331353037283e-06, "loss": 0.0567, "step": 50300 }, { "epoch": 7.472152086736967, "grad_norm": 1.438880205154419, "learning_rate": 2.5278479132630328e-06, "loss": 0.0638, "step": 50310 }, { "epoch": 7.473637308777662, "grad_norm": 1.543481469154358, "learning_rate": 2.5263626912223376e-06, "loss": 0.0687, "step": 50320 }, { "epoch": 7.475122530818357, "grad_norm": 0.7402935028076172, "learning_rate": 2.524877469181643e-06, "loss": 0.0657, "step": 50330 }, { "epoch": 7.476607752859053, "grad_norm": 1.9598997831344604, "learning_rate": 2.5233922471409478e-06, "loss": 0.0535, "step": 50340 }, { "epoch": 7.478092974899748, "grad_norm": 0.6482336521148682, "learning_rate": 2.5219070251002526e-06, "loss": 0.0571, "step": 50350 }, { "epoch": 7.479578196940443, "grad_norm": 0.7986770272254944, "learning_rate": 2.520421803059558e-06, "loss": 0.049, "step": 50360 }, { "epoch": 7.481063418981138, "grad_norm": 0.5971524715423584, "learning_rate": 2.5189365810188627e-06, "loss": 0.0464, "step": 50370 }, { "epoch": 7.4825486410218325, "grad_norm": 0.5334088802337646, "learning_rate": 2.517451358978167e-06, "loss": 0.0379, "step": 50380 }, { "epoch": 7.484033863062528, "grad_norm": 0.7337051630020142, "learning_rate": 2.515966136937472e-06, "loss": 0.0442, "step": 50390 }, { "epoch": 7.485519085103223, "grad_norm": 0.6580100655555725, "learning_rate": 2.5144809148967773e-06, "loss": 0.0558, "step": 50400 }, { "epoch": 7.487004307143918, "grad_norm": 2.0155866146087646, "learning_rate": 2.512995692856082e-06, "loss": 0.0524, "step": 50410 }, { "epoch": 7.488489529184613, "grad_norm": 1.34869384765625, "learning_rate": 2.511510470815387e-06, "loss": 0.0498, "step": 50420 }, { "epoch": 7.489974751225308, "grad_norm": 0.701053261756897, "learning_rate": 2.510025248774692e-06, "loss": 0.0629, "step": 50430 }, { "epoch": 7.491459973266004, "grad_norm": 0.890023410320282, "learning_rate": 2.508540026733997e-06, "loss": 0.044, "step": 50440 }, { "epoch": 7.492945195306699, "grad_norm": 1.885815978050232, "learning_rate": 2.507054804693302e-06, "loss": 0.0562, "step": 50450 }, { "epoch": 7.4944304173473935, "grad_norm": 0.8531511425971985, "learning_rate": 2.5055695826526065e-06, "loss": 0.0661, "step": 50460 }, { "epoch": 7.495915639388088, "grad_norm": 1.0763678550720215, "learning_rate": 2.504084360611912e-06, "loss": 0.0709, "step": 50470 }, { "epoch": 7.497400861428783, "grad_norm": 1.0584913492202759, "learning_rate": 2.5025991385712166e-06, "loss": 0.0465, "step": 50480 }, { "epoch": 7.498886083469479, "grad_norm": 0.806760311126709, "learning_rate": 2.5011139165305215e-06, "loss": 0.046, "step": 50490 }, { "epoch": 7.500371305510174, "grad_norm": 0.5623704791069031, "learning_rate": 2.4996286944898267e-06, "loss": 0.066, "step": 50500 }, { "epoch": 7.501856527550869, "grad_norm": 1.3250830173492432, "learning_rate": 2.498143472449131e-06, "loss": 0.0697, "step": 50510 }, { "epoch": 7.503341749591564, "grad_norm": 1.0660487413406372, "learning_rate": 2.4966582504084365e-06, "loss": 0.0576, "step": 50520 }, { "epoch": 7.504826971632259, "grad_norm": 1.622944951057434, "learning_rate": 2.4951730283677413e-06, "loss": 0.0606, "step": 50530 }, { "epoch": 7.506312193672954, "grad_norm": 1.4378278255462646, "learning_rate": 2.493687806327046e-06, "loss": 0.0601, "step": 50540 }, { "epoch": 7.507797415713649, "grad_norm": 0.5110106468200684, "learning_rate": 2.492202584286351e-06, "loss": 0.0478, "step": 50550 }, { "epoch": 7.509282637754344, "grad_norm": 1.3880940675735474, "learning_rate": 2.490717362245656e-06, "loss": 0.0669, "step": 50560 }, { "epoch": 7.510767859795039, "grad_norm": 0.3109069764614105, "learning_rate": 2.4892321402049607e-06, "loss": 0.0534, "step": 50570 }, { "epoch": 7.512253081835734, "grad_norm": 1.1899399757385254, "learning_rate": 2.4877469181642656e-06, "loss": 0.0574, "step": 50580 }, { "epoch": 7.51373830387643, "grad_norm": 0.43168699741363525, "learning_rate": 2.4862616961235704e-06, "loss": 0.0587, "step": 50590 }, { "epoch": 7.515223525917125, "grad_norm": 1.5160905122756958, "learning_rate": 2.4847764740828757e-06, "loss": 0.0699, "step": 50600 }, { "epoch": 7.51670874795782, "grad_norm": 0.7211483120918274, "learning_rate": 2.4832912520421806e-06, "loss": 0.0549, "step": 50610 }, { "epoch": 7.5181939699985145, "grad_norm": 1.1276590824127197, "learning_rate": 2.4818060300014854e-06, "loss": 0.0677, "step": 50620 }, { "epoch": 7.519679192039209, "grad_norm": 0.8371122479438782, "learning_rate": 2.4803208079607903e-06, "loss": 0.05, "step": 50630 }, { "epoch": 7.521164414079905, "grad_norm": 0.7217128872871399, "learning_rate": 2.478835585920095e-06, "loss": 0.0599, "step": 50640 }, { "epoch": 7.5226496361206, "grad_norm": 1.5906058549880981, "learning_rate": 2.4773503638794004e-06, "loss": 0.0576, "step": 50650 }, { "epoch": 7.524134858161295, "grad_norm": 0.8109161257743835, "learning_rate": 2.475865141838705e-06, "loss": 0.0629, "step": 50660 }, { "epoch": 7.52562008020199, "grad_norm": 0.9763447642326355, "learning_rate": 2.47437991979801e-06, "loss": 0.0378, "step": 50670 }, { "epoch": 7.527105302242685, "grad_norm": 1.2344034910202026, "learning_rate": 2.472894697757315e-06, "loss": 0.0531, "step": 50680 }, { "epoch": 7.528590524283381, "grad_norm": 1.221603512763977, "learning_rate": 2.47140947571662e-06, "loss": 0.053, "step": 50690 }, { "epoch": 7.5300757463240755, "grad_norm": 1.376077651977539, "learning_rate": 2.4699242536759247e-06, "loss": 0.049, "step": 50700 }, { "epoch": 7.53156096836477, "grad_norm": 1.7430099248886108, "learning_rate": 2.4684390316352296e-06, "loss": 0.0603, "step": 50710 }, { "epoch": 7.533046190405465, "grad_norm": 0.40242937207221985, "learning_rate": 2.4669538095945344e-06, "loss": 0.0584, "step": 50720 }, { "epoch": 7.53453141244616, "grad_norm": 0.4440084993839264, "learning_rate": 2.4654685875538393e-06, "loss": 0.0441, "step": 50730 }, { "epoch": 7.536016634486856, "grad_norm": 1.1307166814804077, "learning_rate": 2.4639833655131446e-06, "loss": 0.0449, "step": 50740 }, { "epoch": 7.537501856527551, "grad_norm": 1.3973692655563354, "learning_rate": 2.4624981434724494e-06, "loss": 0.0549, "step": 50750 }, { "epoch": 7.538987078568246, "grad_norm": 1.0603303909301758, "learning_rate": 2.4610129214317543e-06, "loss": 0.0487, "step": 50760 }, { "epoch": 7.540472300608941, "grad_norm": 0.7205509543418884, "learning_rate": 2.459527699391059e-06, "loss": 0.0433, "step": 50770 }, { "epoch": 7.5419575226496365, "grad_norm": 0.5332754254341125, "learning_rate": 2.458042477350364e-06, "loss": 0.0485, "step": 50780 }, { "epoch": 7.543442744690331, "grad_norm": 1.2866615056991577, "learning_rate": 2.456557255309669e-06, "loss": 0.0558, "step": 50790 }, { "epoch": 7.544927966731026, "grad_norm": 1.0548276901245117, "learning_rate": 2.455072033268974e-06, "loss": 0.0554, "step": 50800 }, { "epoch": 7.546413188771721, "grad_norm": 1.173340916633606, "learning_rate": 2.4535868112282786e-06, "loss": 0.0651, "step": 50810 }, { "epoch": 7.547898410812416, "grad_norm": 1.104522466659546, "learning_rate": 2.452101589187584e-06, "loss": 0.0596, "step": 50820 }, { "epoch": 7.549383632853112, "grad_norm": 0.3949863314628601, "learning_rate": 2.4506163671468887e-06, "loss": 0.0492, "step": 50830 }, { "epoch": 7.550868854893807, "grad_norm": 1.375347375869751, "learning_rate": 2.4491311451061936e-06, "loss": 0.0569, "step": 50840 }, { "epoch": 7.552354076934502, "grad_norm": 1.8367573022842407, "learning_rate": 2.4476459230654984e-06, "loss": 0.0443, "step": 50850 }, { "epoch": 7.553839298975197, "grad_norm": 1.2803938388824463, "learning_rate": 2.4461607010248033e-06, "loss": 0.0462, "step": 50860 }, { "epoch": 7.555324521015892, "grad_norm": 0.6907781958580017, "learning_rate": 2.4446754789841086e-06, "loss": 0.0558, "step": 50870 }, { "epoch": 7.556809743056587, "grad_norm": 0.6908086538314819, "learning_rate": 2.4431902569434134e-06, "loss": 0.0519, "step": 50880 }, { "epoch": 7.558294965097282, "grad_norm": 0.7062651515007019, "learning_rate": 2.4417050349027183e-06, "loss": 0.0699, "step": 50890 }, { "epoch": 7.559780187137977, "grad_norm": 0.5621799826622009, "learning_rate": 2.440219812862023e-06, "loss": 0.0482, "step": 50900 }, { "epoch": 7.561265409178672, "grad_norm": 1.0808478593826294, "learning_rate": 2.438734590821328e-06, "loss": 0.053, "step": 50910 }, { "epoch": 7.562750631219368, "grad_norm": 0.8863072991371155, "learning_rate": 2.437249368780633e-06, "loss": 0.0522, "step": 50920 }, { "epoch": 7.564235853260063, "grad_norm": 0.433900386095047, "learning_rate": 2.4357641467399377e-06, "loss": 0.0557, "step": 50930 }, { "epoch": 7.5657210753007575, "grad_norm": 0.46487393975257874, "learning_rate": 2.4342789246992426e-06, "loss": 0.0559, "step": 50940 }, { "epoch": 7.5672062973414524, "grad_norm": 1.7626889944076538, "learning_rate": 2.432793702658548e-06, "loss": 0.048, "step": 50950 }, { "epoch": 7.568691519382147, "grad_norm": 1.2852072715759277, "learning_rate": 2.4313084806178523e-06, "loss": 0.0463, "step": 50960 }, { "epoch": 7.570176741422843, "grad_norm": 1.1062737703323364, "learning_rate": 2.4298232585771576e-06, "loss": 0.0449, "step": 50970 }, { "epoch": 7.571661963463538, "grad_norm": 0.741830587387085, "learning_rate": 2.4283380365364624e-06, "loss": 0.0538, "step": 50980 }, { "epoch": 7.573147185504233, "grad_norm": 1.578948736190796, "learning_rate": 2.4268528144957673e-06, "loss": 0.0631, "step": 50990 }, { "epoch": 7.574632407544928, "grad_norm": 0.8625466227531433, "learning_rate": 2.4253675924550725e-06, "loss": 0.0437, "step": 51000 }, { "epoch": 7.576117629585623, "grad_norm": 1.6692250967025757, "learning_rate": 2.423882370414377e-06, "loss": 0.0381, "step": 51010 }, { "epoch": 7.5776028516263185, "grad_norm": 0.9216070771217346, "learning_rate": 2.4223971483736823e-06, "loss": 0.0446, "step": 51020 }, { "epoch": 7.579088073667013, "grad_norm": 1.6464141607284546, "learning_rate": 2.420911926332987e-06, "loss": 0.0638, "step": 51030 }, { "epoch": 7.580573295707708, "grad_norm": 0.4722020626068115, "learning_rate": 2.419426704292292e-06, "loss": 0.0501, "step": 51040 }, { "epoch": 7.582058517748403, "grad_norm": 0.9962632060050964, "learning_rate": 2.417941482251597e-06, "loss": 0.0555, "step": 51050 }, { "epoch": 7.583543739789098, "grad_norm": 0.5598209500312805, "learning_rate": 2.4164562602109017e-06, "loss": 0.0474, "step": 51060 }, { "epoch": 7.585028961829794, "grad_norm": 1.4611873626708984, "learning_rate": 2.4149710381702065e-06, "loss": 0.0639, "step": 51070 }, { "epoch": 7.586514183870489, "grad_norm": 0.9028664231300354, "learning_rate": 2.4134858161295114e-06, "loss": 0.0561, "step": 51080 }, { "epoch": 7.587999405911184, "grad_norm": 0.5143422484397888, "learning_rate": 2.4120005940888163e-06, "loss": 0.0571, "step": 51090 }, { "epoch": 7.589484627951879, "grad_norm": 1.009613275527954, "learning_rate": 2.4105153720481215e-06, "loss": 0.048, "step": 51100 }, { "epoch": 7.5909698499925735, "grad_norm": 0.5486442446708679, "learning_rate": 2.409030150007426e-06, "loss": 0.0601, "step": 51110 }, { "epoch": 7.592455072033269, "grad_norm": 0.6895651817321777, "learning_rate": 2.4075449279667313e-06, "loss": 0.0704, "step": 51120 }, { "epoch": 7.593940294073964, "grad_norm": 1.1183245182037354, "learning_rate": 2.406059705926036e-06, "loss": 0.0511, "step": 51130 }, { "epoch": 7.595425516114659, "grad_norm": 0.41055673360824585, "learning_rate": 2.404574483885341e-06, "loss": 0.0451, "step": 51140 }, { "epoch": 7.596910738155354, "grad_norm": 0.33190709352493286, "learning_rate": 2.4030892618446462e-06, "loss": 0.0369, "step": 51150 }, { "epoch": 7.598395960196049, "grad_norm": 0.44257375597953796, "learning_rate": 2.4016040398039507e-06, "loss": 0.0492, "step": 51160 }, { "epoch": 7.599881182236745, "grad_norm": 1.0603954792022705, "learning_rate": 2.400118817763256e-06, "loss": 0.0397, "step": 51170 }, { "epoch": 7.60136640427744, "grad_norm": 1.061376929283142, "learning_rate": 2.398633595722561e-06, "loss": 0.0646, "step": 51180 }, { "epoch": 7.6028516263181345, "grad_norm": 0.30382776260375977, "learning_rate": 2.3971483736818657e-06, "loss": 0.0471, "step": 51190 }, { "epoch": 7.604336848358829, "grad_norm": 0.6130696535110474, "learning_rate": 2.3956631516411705e-06, "loss": 0.059, "step": 51200 }, { "epoch": 7.605822070399524, "grad_norm": 0.900826096534729, "learning_rate": 2.3941779296004754e-06, "loss": 0.0568, "step": 51210 }, { "epoch": 7.60730729244022, "grad_norm": 1.1771337985992432, "learning_rate": 2.3926927075597802e-06, "loss": 0.0648, "step": 51220 }, { "epoch": 7.608792514480915, "grad_norm": 1.5176537036895752, "learning_rate": 2.3912074855190855e-06, "loss": 0.0504, "step": 51230 }, { "epoch": 7.61027773652161, "grad_norm": 0.759335994720459, "learning_rate": 2.38972226347839e-06, "loss": 0.0348, "step": 51240 }, { "epoch": 7.611762958562305, "grad_norm": 1.2962226867675781, "learning_rate": 2.3882370414376952e-06, "loss": 0.0683, "step": 51250 }, { "epoch": 7.613248180603, "grad_norm": 0.6675172448158264, "learning_rate": 2.386751819397e-06, "loss": 0.0649, "step": 51260 }, { "epoch": 7.6147334026436955, "grad_norm": 0.7499662637710571, "learning_rate": 2.385266597356305e-06, "loss": 0.0605, "step": 51270 }, { "epoch": 7.61621862468439, "grad_norm": 0.8165285587310791, "learning_rate": 2.38378137531561e-06, "loss": 0.0493, "step": 51280 }, { "epoch": 7.617703846725085, "grad_norm": 1.2159758806228638, "learning_rate": 2.3822961532749147e-06, "loss": 0.0489, "step": 51290 }, { "epoch": 7.61918906876578, "grad_norm": 1.0896236896514893, "learning_rate": 2.38081093123422e-06, "loss": 0.0632, "step": 51300 }, { "epoch": 7.620674290806475, "grad_norm": 1.3957602977752686, "learning_rate": 2.3793257091935244e-06, "loss": 0.0464, "step": 51310 }, { "epoch": 7.622159512847171, "grad_norm": 0.7095270156860352, "learning_rate": 2.3778404871528297e-06, "loss": 0.0583, "step": 51320 }, { "epoch": 7.623644734887866, "grad_norm": 0.8131939768791199, "learning_rate": 2.3763552651121345e-06, "loss": 0.0651, "step": 51330 }, { "epoch": 7.625129956928561, "grad_norm": 0.3545960783958435, "learning_rate": 2.3748700430714394e-06, "loss": 0.0441, "step": 51340 }, { "epoch": 7.626615178969256, "grad_norm": 1.155347466468811, "learning_rate": 2.3733848210307442e-06, "loss": 0.0453, "step": 51350 }, { "epoch": 7.628100401009951, "grad_norm": 1.5265934467315674, "learning_rate": 2.371899598990049e-06, "loss": 0.051, "step": 51360 }, { "epoch": 7.629585623050646, "grad_norm": 0.5227043032646179, "learning_rate": 2.370414376949354e-06, "loss": 0.039, "step": 51370 }, { "epoch": 7.631070845091341, "grad_norm": 0.9405038356781006, "learning_rate": 2.3689291549086592e-06, "loss": 0.0394, "step": 51380 }, { "epoch": 7.632556067132036, "grad_norm": 0.7868645787239075, "learning_rate": 2.367443932867964e-06, "loss": 0.0526, "step": 51390 }, { "epoch": 7.634041289172731, "grad_norm": 0.6418449878692627, "learning_rate": 2.365958710827269e-06, "loss": 0.0591, "step": 51400 }, { "epoch": 7.635526511213427, "grad_norm": 0.6941629648208618, "learning_rate": 2.364473488786574e-06, "loss": 0.0659, "step": 51410 }, { "epoch": 7.637011733254122, "grad_norm": 1.0778722763061523, "learning_rate": 2.3629882667458787e-06, "loss": 0.0574, "step": 51420 }, { "epoch": 7.6384969552948165, "grad_norm": 0.9663993716239929, "learning_rate": 2.3615030447051835e-06, "loss": 0.0558, "step": 51430 }, { "epoch": 7.639982177335511, "grad_norm": 0.7751511335372925, "learning_rate": 2.3600178226644884e-06, "loss": 0.0574, "step": 51440 }, { "epoch": 7.641467399376207, "grad_norm": 0.7096276879310608, "learning_rate": 2.3585326006237936e-06, "loss": 0.0555, "step": 51450 }, { "epoch": 7.642952621416902, "grad_norm": 0.23418889939785004, "learning_rate": 2.357047378583098e-06, "loss": 0.0493, "step": 51460 }, { "epoch": 7.644437843457597, "grad_norm": 1.6592222452163696, "learning_rate": 2.3555621565424034e-06, "loss": 0.0617, "step": 51470 }, { "epoch": 7.645923065498292, "grad_norm": 1.0019330978393555, "learning_rate": 2.3540769345017082e-06, "loss": 0.0755, "step": 51480 }, { "epoch": 7.647408287538987, "grad_norm": 0.5399784445762634, "learning_rate": 2.352591712461013e-06, "loss": 0.0481, "step": 51490 }, { "epoch": 7.648893509579683, "grad_norm": 0.8970789909362793, "learning_rate": 2.351106490420318e-06, "loss": 0.0656, "step": 51500 }, { "epoch": 7.6503787316203775, "grad_norm": 0.7304027080535889, "learning_rate": 2.349621268379623e-06, "loss": 0.0467, "step": 51510 }, { "epoch": 7.651863953661072, "grad_norm": 0.49368733167648315, "learning_rate": 2.348136046338928e-06, "loss": 0.0558, "step": 51520 }, { "epoch": 7.653349175701767, "grad_norm": 0.6555716395378113, "learning_rate": 2.346650824298233e-06, "loss": 0.0563, "step": 51530 }, { "epoch": 7.654834397742462, "grad_norm": 1.2767904996871948, "learning_rate": 2.3451656022575378e-06, "loss": 0.0536, "step": 51540 }, { "epoch": 7.656319619783158, "grad_norm": 0.8550059199333191, "learning_rate": 2.3436803802168426e-06, "loss": 0.0714, "step": 51550 }, { "epoch": 7.657804841823853, "grad_norm": 0.6468340754508972, "learning_rate": 2.3421951581761475e-06, "loss": 0.0665, "step": 51560 }, { "epoch": 7.659290063864548, "grad_norm": 0.9511982798576355, "learning_rate": 2.3407099361354524e-06, "loss": 0.0522, "step": 51570 }, { "epoch": 7.660775285905243, "grad_norm": 1.087459921836853, "learning_rate": 2.3392247140947576e-06, "loss": 0.055, "step": 51580 }, { "epoch": 7.662260507945938, "grad_norm": 1.2931932210922241, "learning_rate": 2.337739492054062e-06, "loss": 0.0548, "step": 51590 }, { "epoch": 7.663745729986633, "grad_norm": 1.1031076908111572, "learning_rate": 2.3362542700133673e-06, "loss": 0.0493, "step": 51600 }, { "epoch": 7.665230952027328, "grad_norm": 0.9600025415420532, "learning_rate": 2.334769047972672e-06, "loss": 0.0462, "step": 51610 }, { "epoch": 7.666716174068023, "grad_norm": 0.45130455493927, "learning_rate": 2.333283825931977e-06, "loss": 0.0672, "step": 51620 }, { "epoch": 7.668201396108718, "grad_norm": 1.0152677297592163, "learning_rate": 2.331798603891282e-06, "loss": 0.064, "step": 51630 }, { "epoch": 7.669686618149413, "grad_norm": 0.9798538684844971, "learning_rate": 2.3303133818505868e-06, "loss": 0.045, "step": 51640 }, { "epoch": 7.671171840190109, "grad_norm": 1.3446485996246338, "learning_rate": 2.3288281598098916e-06, "loss": 0.0712, "step": 51650 }, { "epoch": 7.672657062230804, "grad_norm": 0.8345891237258911, "learning_rate": 2.3273429377691965e-06, "loss": 0.0673, "step": 51660 }, { "epoch": 7.674142284271499, "grad_norm": 0.708493173122406, "learning_rate": 2.3258577157285018e-06, "loss": 0.0695, "step": 51670 }, { "epoch": 7.6756275063121935, "grad_norm": 0.6146459579467773, "learning_rate": 2.3243724936878066e-06, "loss": 0.0478, "step": 51680 }, { "epoch": 7.677112728352888, "grad_norm": 0.636083722114563, "learning_rate": 2.3228872716471115e-06, "loss": 0.0622, "step": 51690 }, { "epoch": 7.678597950393584, "grad_norm": 0.8670116066932678, "learning_rate": 2.3214020496064163e-06, "loss": 0.0694, "step": 51700 }, { "epoch": 7.680083172434279, "grad_norm": 1.6330854892730713, "learning_rate": 2.319916827565721e-06, "loss": 0.0374, "step": 51710 }, { "epoch": 7.681568394474974, "grad_norm": 0.9551437497138977, "learning_rate": 2.318431605525026e-06, "loss": 0.0531, "step": 51720 }, { "epoch": 7.683053616515669, "grad_norm": 1.1932165622711182, "learning_rate": 2.3169463834843313e-06, "loss": 0.0606, "step": 51730 }, { "epoch": 7.684538838556364, "grad_norm": 0.9700741767883301, "learning_rate": 2.3154611614436358e-06, "loss": 0.0709, "step": 51740 }, { "epoch": 7.68602406059706, "grad_norm": 0.6711783409118652, "learning_rate": 2.313975939402941e-06, "loss": 0.0566, "step": 51750 }, { "epoch": 7.6875092826377545, "grad_norm": 1.296689748764038, "learning_rate": 2.312490717362246e-06, "loss": 0.0814, "step": 51760 }, { "epoch": 7.688994504678449, "grad_norm": 1.6995421648025513, "learning_rate": 2.3110054953215508e-06, "loss": 0.0648, "step": 51770 }, { "epoch": 7.690479726719144, "grad_norm": 1.0179144144058228, "learning_rate": 2.3095202732808556e-06, "loss": 0.0684, "step": 51780 }, { "epoch": 7.691964948759839, "grad_norm": 0.3230994641780853, "learning_rate": 2.3080350512401605e-06, "loss": 0.063, "step": 51790 }, { "epoch": 7.693450170800535, "grad_norm": 0.6015810966491699, "learning_rate": 2.3065498291994658e-06, "loss": 0.0556, "step": 51800 }, { "epoch": 7.69493539284123, "grad_norm": 0.5402663946151733, "learning_rate": 2.30506460715877e-06, "loss": 0.0477, "step": 51810 }, { "epoch": 7.696420614881925, "grad_norm": 0.5034302473068237, "learning_rate": 2.3035793851180755e-06, "loss": 0.0502, "step": 51820 }, { "epoch": 7.69790583692262, "grad_norm": 1.468308448791504, "learning_rate": 2.3020941630773803e-06, "loss": 0.0629, "step": 51830 }, { "epoch": 7.6993910589633145, "grad_norm": 0.7070731520652771, "learning_rate": 2.300608941036685e-06, "loss": 0.0487, "step": 51840 }, { "epoch": 7.70087628100401, "grad_norm": 0.8511458039283752, "learning_rate": 2.29912371899599e-06, "loss": 0.0618, "step": 51850 }, { "epoch": 7.702361503044705, "grad_norm": 1.0228487253189087, "learning_rate": 2.297638496955295e-06, "loss": 0.0545, "step": 51860 }, { "epoch": 7.7038467250854, "grad_norm": 0.9535961747169495, "learning_rate": 2.2961532749145998e-06, "loss": 0.054, "step": 51870 }, { "epoch": 7.705331947126095, "grad_norm": 0.6639396548271179, "learning_rate": 2.294668052873905e-06, "loss": 0.0702, "step": 51880 }, { "epoch": 7.70681716916679, "grad_norm": 0.8463499546051025, "learning_rate": 2.2931828308332095e-06, "loss": 0.0521, "step": 51890 }, { "epoch": 7.708302391207486, "grad_norm": 0.9757125973701477, "learning_rate": 2.2916976087925147e-06, "loss": 0.0603, "step": 51900 }, { "epoch": 7.709787613248181, "grad_norm": 0.6511328816413879, "learning_rate": 2.2902123867518196e-06, "loss": 0.053, "step": 51910 }, { "epoch": 7.7112728352888755, "grad_norm": 0.5482888221740723, "learning_rate": 2.2887271647111245e-06, "loss": 0.0582, "step": 51920 }, { "epoch": 7.71275805732957, "grad_norm": 0.8813942670822144, "learning_rate": 2.2872419426704297e-06, "loss": 0.0563, "step": 51930 }, { "epoch": 7.714243279370266, "grad_norm": 0.5689736604690552, "learning_rate": 2.285756720629734e-06, "loss": 0.0564, "step": 51940 }, { "epoch": 7.715728501410961, "grad_norm": 0.744314432144165, "learning_rate": 2.2842714985890395e-06, "loss": 0.0476, "step": 51950 }, { "epoch": 7.717213723451656, "grad_norm": 0.7114430665969849, "learning_rate": 2.2827862765483443e-06, "loss": 0.0517, "step": 51960 }, { "epoch": 7.718698945492351, "grad_norm": 0.8304001688957214, "learning_rate": 2.281301054507649e-06, "loss": 0.0509, "step": 51970 }, { "epoch": 7.720184167533046, "grad_norm": 0.5770793557167053, "learning_rate": 2.279815832466954e-06, "loss": 0.0548, "step": 51980 }, { "epoch": 7.721669389573742, "grad_norm": 0.4372476041316986, "learning_rate": 2.278330610426259e-06, "loss": 0.0495, "step": 51990 }, { "epoch": 7.7231546116144365, "grad_norm": 1.424376368522644, "learning_rate": 2.2768453883855637e-06, "loss": 0.0598, "step": 52000 }, { "epoch": 7.724639833655131, "grad_norm": 0.3935073912143707, "learning_rate": 2.2753601663448686e-06, "loss": 0.0638, "step": 52010 }, { "epoch": 7.726125055695826, "grad_norm": 0.4132566750049591, "learning_rate": 2.2738749443041735e-06, "loss": 0.0546, "step": 52020 }, { "epoch": 7.727610277736522, "grad_norm": 0.8765859007835388, "learning_rate": 2.2723897222634787e-06, "loss": 0.0548, "step": 52030 }, { "epoch": 7.729095499777217, "grad_norm": 0.8252843022346497, "learning_rate": 2.270904500222783e-06, "loss": 0.0569, "step": 52040 }, { "epoch": 7.730580721817912, "grad_norm": 1.28359854221344, "learning_rate": 2.2694192781820884e-06, "loss": 0.0654, "step": 52050 }, { "epoch": 7.732065943858607, "grad_norm": 0.8124073147773743, "learning_rate": 2.2679340561413933e-06, "loss": 0.0544, "step": 52060 }, { "epoch": 7.733551165899302, "grad_norm": 0.8168864250183105, "learning_rate": 2.266448834100698e-06, "loss": 0.061, "step": 52070 }, { "epoch": 7.7350363879399975, "grad_norm": 1.260461449623108, "learning_rate": 2.2649636120600034e-06, "loss": 0.057, "step": 52080 }, { "epoch": 7.736521609980692, "grad_norm": 0.668998122215271, "learning_rate": 2.263478390019308e-06, "loss": 0.0472, "step": 52090 }, { "epoch": 7.738006832021387, "grad_norm": 1.221310019493103, "learning_rate": 2.261993167978613e-06, "loss": 0.0428, "step": 52100 }, { "epoch": 7.739492054062082, "grad_norm": 0.6181626319885254, "learning_rate": 2.260507945937918e-06, "loss": 0.067, "step": 52110 }, { "epoch": 7.740977276102777, "grad_norm": 0.6286818981170654, "learning_rate": 2.259022723897223e-06, "loss": 0.0441, "step": 52120 }, { "epoch": 7.742462498143473, "grad_norm": 0.8674864172935486, "learning_rate": 2.2575375018565277e-06, "loss": 0.0601, "step": 52130 }, { "epoch": 7.743947720184168, "grad_norm": 0.7923988103866577, "learning_rate": 2.2560522798158326e-06, "loss": 0.0443, "step": 52140 }, { "epoch": 7.745432942224863, "grad_norm": 1.3793833255767822, "learning_rate": 2.2545670577751374e-06, "loss": 0.0513, "step": 52150 }, { "epoch": 7.746918164265558, "grad_norm": 1.1016931533813477, "learning_rate": 2.2530818357344423e-06, "loss": 0.0556, "step": 52160 }, { "epoch": 7.7484033863062525, "grad_norm": 0.9921779632568359, "learning_rate": 2.251596613693747e-06, "loss": 0.0636, "step": 52170 }, { "epoch": 7.749888608346948, "grad_norm": 0.7895703911781311, "learning_rate": 2.2501113916530524e-06, "loss": 0.0809, "step": 52180 }, { "epoch": 7.751373830387643, "grad_norm": 0.9937105774879456, "learning_rate": 2.2486261696123573e-06, "loss": 0.0633, "step": 52190 }, { "epoch": 7.752859052428338, "grad_norm": 0.8154398202896118, "learning_rate": 2.247140947571662e-06, "loss": 0.0518, "step": 52200 }, { "epoch": 7.754344274469033, "grad_norm": 0.7080826163291931, "learning_rate": 2.245655725530967e-06, "loss": 0.0548, "step": 52210 }, { "epoch": 7.755829496509728, "grad_norm": 0.2519676089286804, "learning_rate": 2.244170503490272e-06, "loss": 0.058, "step": 52220 }, { "epoch": 7.757314718550424, "grad_norm": 0.39050978422164917, "learning_rate": 2.242685281449577e-06, "loss": 0.0608, "step": 52230 }, { "epoch": 7.7587999405911185, "grad_norm": 0.906412661075592, "learning_rate": 2.2412000594088816e-06, "loss": 0.0685, "step": 52240 }, { "epoch": 7.7602851626318134, "grad_norm": 0.9142884612083435, "learning_rate": 2.239714837368187e-06, "loss": 0.0591, "step": 52250 }, { "epoch": 7.761770384672508, "grad_norm": 0.45599719882011414, "learning_rate": 2.2382296153274917e-06, "loss": 0.0588, "step": 52260 }, { "epoch": 7.763255606713203, "grad_norm": 0.7237184643745422, "learning_rate": 2.2367443932867966e-06, "loss": 0.0518, "step": 52270 }, { "epoch": 7.764740828753899, "grad_norm": 1.3857414722442627, "learning_rate": 2.2352591712461014e-06, "loss": 0.0556, "step": 52280 }, { "epoch": 7.766226050794594, "grad_norm": 0.861685037612915, "learning_rate": 2.2337739492054063e-06, "loss": 0.0654, "step": 52290 }, { "epoch": 7.767711272835289, "grad_norm": 0.43121787905693054, "learning_rate": 2.232288727164711e-06, "loss": 0.0532, "step": 52300 }, { "epoch": 7.769196494875984, "grad_norm": 0.9009015560150146, "learning_rate": 2.2308035051240164e-06, "loss": 0.046, "step": 52310 }, { "epoch": 7.770681716916679, "grad_norm": 0.5191366672515869, "learning_rate": 2.2293182830833213e-06, "loss": 0.0521, "step": 52320 }, { "epoch": 7.772166938957374, "grad_norm": 1.2074165344238281, "learning_rate": 2.227833061042626e-06, "loss": 0.0428, "step": 52330 }, { "epoch": 7.773652160998069, "grad_norm": 0.3799234926700592, "learning_rate": 2.226347839001931e-06, "loss": 0.0617, "step": 52340 }, { "epoch": 7.775137383038764, "grad_norm": 0.7373781800270081, "learning_rate": 2.224862616961236e-06, "loss": 0.0699, "step": 52350 }, { "epoch": 7.776622605079459, "grad_norm": 0.8077521920204163, "learning_rate": 2.2233773949205407e-06, "loss": 0.0534, "step": 52360 }, { "epoch": 7.778107827120154, "grad_norm": 0.6925342082977295, "learning_rate": 2.2218921728798456e-06, "loss": 0.0478, "step": 52370 }, { "epoch": 7.77959304916085, "grad_norm": 0.9443783760070801, "learning_rate": 2.220406950839151e-06, "loss": 0.0657, "step": 52380 }, { "epoch": 7.781078271201545, "grad_norm": 0.4062022268772125, "learning_rate": 2.2189217287984553e-06, "loss": 0.0429, "step": 52390 }, { "epoch": 7.78256349324224, "grad_norm": 0.4872238337993622, "learning_rate": 2.2174365067577606e-06, "loss": 0.0584, "step": 52400 }, { "epoch": 7.7840487152829345, "grad_norm": 0.7286458611488342, "learning_rate": 2.2159512847170654e-06, "loss": 0.0433, "step": 52410 }, { "epoch": 7.785533937323629, "grad_norm": 1.2079678773880005, "learning_rate": 2.2144660626763703e-06, "loss": 0.0717, "step": 52420 }, { "epoch": 7.787019159364325, "grad_norm": 0.7324601411819458, "learning_rate": 2.212980840635675e-06, "loss": 0.0735, "step": 52430 }, { "epoch": 7.78850438140502, "grad_norm": 0.6645302176475525, "learning_rate": 2.21149561859498e-06, "loss": 0.0494, "step": 52440 }, { "epoch": 7.789989603445715, "grad_norm": 0.8328620195388794, "learning_rate": 2.2100103965542853e-06, "loss": 0.051, "step": 52450 }, { "epoch": 7.79147482548641, "grad_norm": 0.7536907196044922, "learning_rate": 2.20852517451359e-06, "loss": 0.0669, "step": 52460 }, { "epoch": 7.792960047527105, "grad_norm": 0.7514217495918274, "learning_rate": 2.207039952472895e-06, "loss": 0.0474, "step": 52470 }, { "epoch": 7.794445269567801, "grad_norm": 0.9645404815673828, "learning_rate": 2.2055547304322e-06, "loss": 0.0658, "step": 52480 }, { "epoch": 7.7959304916084955, "grad_norm": 0.726373016834259, "learning_rate": 2.2040695083915047e-06, "loss": 0.0662, "step": 52490 }, { "epoch": 7.79741571364919, "grad_norm": 0.6940188407897949, "learning_rate": 2.2025842863508096e-06, "loss": 0.0476, "step": 52500 }, { "epoch": 7.798900935689885, "grad_norm": 0.8864266276359558, "learning_rate": 2.2010990643101144e-06, "loss": 0.0607, "step": 52510 }, { "epoch": 7.800386157730581, "grad_norm": 0.7016078233718872, "learning_rate": 2.1996138422694193e-06, "loss": 0.0354, "step": 52520 }, { "epoch": 7.801871379771276, "grad_norm": 0.42285242676734924, "learning_rate": 2.1981286202287245e-06, "loss": 0.0515, "step": 52530 }, { "epoch": 7.803356601811971, "grad_norm": 1.1683651208877563, "learning_rate": 2.196643398188029e-06, "loss": 0.0633, "step": 52540 }, { "epoch": 7.804841823852666, "grad_norm": 1.3887553215026855, "learning_rate": 2.1951581761473343e-06, "loss": 0.0682, "step": 52550 }, { "epoch": 7.806327045893361, "grad_norm": 0.7513377070426941, "learning_rate": 2.193672954106639e-06, "loss": 0.0351, "step": 52560 }, { "epoch": 7.8078122679340565, "grad_norm": 0.6102052330970764, "learning_rate": 2.192187732065944e-06, "loss": 0.0512, "step": 52570 }, { "epoch": 7.809297489974751, "grad_norm": 0.46565932035446167, "learning_rate": 2.1907025100252493e-06, "loss": 0.067, "step": 52580 }, { "epoch": 7.810782712015446, "grad_norm": 0.8263485431671143, "learning_rate": 2.1892172879845537e-06, "loss": 0.0484, "step": 52590 }, { "epoch": 7.812267934056141, "grad_norm": 0.8608595132827759, "learning_rate": 2.187732065943859e-06, "loss": 0.0549, "step": 52600 }, { "epoch": 7.813753156096837, "grad_norm": 1.4060004949569702, "learning_rate": 2.186246843903164e-06, "loss": 0.0529, "step": 52610 }, { "epoch": 7.815238378137532, "grad_norm": 0.8970702886581421, "learning_rate": 2.1847616218624687e-06, "loss": 0.0636, "step": 52620 }, { "epoch": 7.816723600178227, "grad_norm": 0.3874322175979614, "learning_rate": 2.1832763998217735e-06, "loss": 0.0545, "step": 52630 }, { "epoch": 7.818208822218922, "grad_norm": 2.098299264907837, "learning_rate": 2.1817911777810784e-06, "loss": 0.0362, "step": 52640 }, { "epoch": 7.819694044259617, "grad_norm": 0.3663277328014374, "learning_rate": 2.1803059557403833e-06, "loss": 0.0573, "step": 52650 }, { "epoch": 7.821179266300312, "grad_norm": 0.8399795889854431, "learning_rate": 2.1788207336996885e-06, "loss": 0.0484, "step": 52660 }, { "epoch": 7.822664488341007, "grad_norm": 1.1129577159881592, "learning_rate": 2.177335511658993e-06, "loss": 0.0467, "step": 52670 }, { "epoch": 7.824149710381702, "grad_norm": 0.6233702898025513, "learning_rate": 2.1758502896182982e-06, "loss": 0.0825, "step": 52680 }, { "epoch": 7.825634932422397, "grad_norm": 1.300802230834961, "learning_rate": 2.174365067577603e-06, "loss": 0.0493, "step": 52690 }, { "epoch": 7.827120154463092, "grad_norm": 0.338610976934433, "learning_rate": 2.172879845536908e-06, "loss": 0.0549, "step": 52700 }, { "epoch": 7.828605376503788, "grad_norm": 0.6561576128005981, "learning_rate": 2.171394623496213e-06, "loss": 0.0544, "step": 52710 }, { "epoch": 7.830090598544483, "grad_norm": 0.48244038224220276, "learning_rate": 2.1699094014555177e-06, "loss": 0.0555, "step": 52720 }, { "epoch": 7.8315758205851775, "grad_norm": 0.2857292890548706, "learning_rate": 2.168424179414823e-06, "loss": 0.0516, "step": 52730 }, { "epoch": 7.833061042625872, "grad_norm": 0.9775968194007874, "learning_rate": 2.1669389573741274e-06, "loss": 0.0556, "step": 52740 }, { "epoch": 7.834546264666567, "grad_norm": 0.8055846691131592, "learning_rate": 2.1654537353334327e-06, "loss": 0.0591, "step": 52750 }, { "epoch": 7.836031486707263, "grad_norm": 1.5750608444213867, "learning_rate": 2.1639685132927375e-06, "loss": 0.0636, "step": 52760 }, { "epoch": 7.837516708747958, "grad_norm": 0.9055830240249634, "learning_rate": 2.1624832912520424e-06, "loss": 0.0478, "step": 52770 }, { "epoch": 7.839001930788653, "grad_norm": 0.4669037461280823, "learning_rate": 2.1609980692113472e-06, "loss": 0.0656, "step": 52780 }, { "epoch": 7.840487152829348, "grad_norm": 1.7319399118423462, "learning_rate": 2.159512847170652e-06, "loss": 0.0762, "step": 52790 }, { "epoch": 7.841972374870043, "grad_norm": 0.9436827898025513, "learning_rate": 2.158027625129957e-06, "loss": 0.0523, "step": 52800 }, { "epoch": 7.8434575969107385, "grad_norm": 1.1199607849121094, "learning_rate": 2.1565424030892622e-06, "loss": 0.0527, "step": 52810 }, { "epoch": 7.844942818951433, "grad_norm": 0.680494487285614, "learning_rate": 2.1550571810485667e-06, "loss": 0.049, "step": 52820 }, { "epoch": 7.846428040992128, "grad_norm": 1.027863621711731, "learning_rate": 2.153571959007872e-06, "loss": 0.0434, "step": 52830 }, { "epoch": 7.847913263032823, "grad_norm": 0.5206725001335144, "learning_rate": 2.152086736967177e-06, "loss": 0.0404, "step": 52840 }, { "epoch": 7.849398485073518, "grad_norm": 0.6419275403022766, "learning_rate": 2.1506015149264817e-06, "loss": 0.0598, "step": 52850 }, { "epoch": 7.850883707114214, "grad_norm": 1.9312747716903687, "learning_rate": 2.1491162928857865e-06, "loss": 0.0482, "step": 52860 }, { "epoch": 7.852368929154909, "grad_norm": 0.9515159130096436, "learning_rate": 2.1476310708450914e-06, "loss": 0.0689, "step": 52870 }, { "epoch": 7.853854151195604, "grad_norm": 0.5467528700828552, "learning_rate": 2.1461458488043967e-06, "loss": 0.07, "step": 52880 }, { "epoch": 7.855339373236299, "grad_norm": 0.9535314440727234, "learning_rate": 2.144660626763701e-06, "loss": 0.0678, "step": 52890 }, { "epoch": 7.8568245952769935, "grad_norm": 0.6750079989433289, "learning_rate": 2.1431754047230064e-06, "loss": 0.0744, "step": 52900 }, { "epoch": 7.858309817317689, "grad_norm": 1.2106472253799438, "learning_rate": 2.1416901826823112e-06, "loss": 0.059, "step": 52910 }, { "epoch": 7.859795039358384, "grad_norm": 1.0857863426208496, "learning_rate": 2.140204960641616e-06, "loss": 0.0547, "step": 52920 }, { "epoch": 7.861280261399079, "grad_norm": 1.1065659523010254, "learning_rate": 2.138719738600921e-06, "loss": 0.0556, "step": 52930 }, { "epoch": 7.862765483439774, "grad_norm": 1.1433991193771362, "learning_rate": 2.137234516560226e-06, "loss": 0.0623, "step": 52940 }, { "epoch": 7.864250705480469, "grad_norm": 0.4376066327095032, "learning_rate": 2.1357492945195307e-06, "loss": 0.0546, "step": 52950 }, { "epoch": 7.865735927521165, "grad_norm": 0.9114493727684021, "learning_rate": 2.134264072478836e-06, "loss": 0.0547, "step": 52960 }, { "epoch": 7.86722114956186, "grad_norm": 0.5998684167861938, "learning_rate": 2.132778850438141e-06, "loss": 0.045, "step": 52970 }, { "epoch": 7.8687063716025545, "grad_norm": 0.5542938113212585, "learning_rate": 2.1312936283974456e-06, "loss": 0.0506, "step": 52980 }, { "epoch": 7.870191593643249, "grad_norm": 0.6076810956001282, "learning_rate": 2.1298084063567505e-06, "loss": 0.0437, "step": 52990 }, { "epoch": 7.871676815683944, "grad_norm": 0.691862165927887, "learning_rate": 2.1283231843160554e-06, "loss": 0.0686, "step": 53000 }, { "epoch": 7.87316203772464, "grad_norm": 1.2608790397644043, "learning_rate": 2.1268379622753606e-06, "loss": 0.0612, "step": 53010 }, { "epoch": 7.874647259765335, "grad_norm": 0.6385157704353333, "learning_rate": 2.125352740234665e-06, "loss": 0.0643, "step": 53020 }, { "epoch": 7.87613248180603, "grad_norm": 1.4922287464141846, "learning_rate": 2.1238675181939704e-06, "loss": 0.0688, "step": 53030 }, { "epoch": 7.877617703846725, "grad_norm": 0.8557190299034119, "learning_rate": 2.122382296153275e-06, "loss": 0.0517, "step": 53040 }, { "epoch": 7.87910292588742, "grad_norm": 0.8984439373016357, "learning_rate": 2.12089707411258e-06, "loss": 0.0448, "step": 53050 }, { "epoch": 7.8805881479281155, "grad_norm": 0.6511837244033813, "learning_rate": 2.119411852071885e-06, "loss": 0.0536, "step": 53060 }, { "epoch": 7.88207336996881, "grad_norm": 0.7384523153305054, "learning_rate": 2.1179266300311898e-06, "loss": 0.0609, "step": 53070 }, { "epoch": 7.883558592009505, "grad_norm": 0.6342982649803162, "learning_rate": 2.1164414079904946e-06, "loss": 0.0501, "step": 53080 }, { "epoch": 7.8850438140502, "grad_norm": 1.1879791021347046, "learning_rate": 2.1149561859497995e-06, "loss": 0.051, "step": 53090 }, { "epoch": 7.886529036090896, "grad_norm": 1.1492778062820435, "learning_rate": 2.1134709639091048e-06, "loss": 0.0661, "step": 53100 }, { "epoch": 7.888014258131591, "grad_norm": 1.4980379343032837, "learning_rate": 2.1119857418684096e-06, "loss": 0.0498, "step": 53110 }, { "epoch": 7.889499480172286, "grad_norm": 1.5005109310150146, "learning_rate": 2.1105005198277145e-06, "loss": 0.046, "step": 53120 }, { "epoch": 7.890984702212981, "grad_norm": 0.8864477872848511, "learning_rate": 2.1090152977870193e-06, "loss": 0.0581, "step": 53130 }, { "epoch": 7.8924699242536756, "grad_norm": 0.39365842938423157, "learning_rate": 2.107530075746324e-06, "loss": 0.0362, "step": 53140 }, { "epoch": 7.893955146294371, "grad_norm": 0.6498399376869202, "learning_rate": 2.106044853705629e-06, "loss": 0.0612, "step": 53150 }, { "epoch": 7.895440368335066, "grad_norm": 1.087868094444275, "learning_rate": 2.1045596316649343e-06, "loss": 0.0404, "step": 53160 }, { "epoch": 7.896925590375761, "grad_norm": 0.6875630617141724, "learning_rate": 2.1030744096242388e-06, "loss": 0.0685, "step": 53170 }, { "epoch": 7.898410812416456, "grad_norm": 1.0016955137252808, "learning_rate": 2.101589187583544e-06, "loss": 0.058, "step": 53180 }, { "epoch": 7.899896034457152, "grad_norm": 0.6489076614379883, "learning_rate": 2.100103965542849e-06, "loss": 0.056, "step": 53190 }, { "epoch": 7.901381256497847, "grad_norm": 0.7610294818878174, "learning_rate": 2.0986187435021538e-06, "loss": 0.0646, "step": 53200 }, { "epoch": 7.902866478538542, "grad_norm": 0.771050214767456, "learning_rate": 2.0971335214614586e-06, "loss": 0.0504, "step": 53210 }, { "epoch": 7.9043517005792365, "grad_norm": 1.212473750114441, "learning_rate": 2.0956482994207635e-06, "loss": 0.0522, "step": 53220 }, { "epoch": 7.905836922619931, "grad_norm": 0.8780522346496582, "learning_rate": 2.0941630773800683e-06, "loss": 0.0585, "step": 53230 }, { "epoch": 7.907322144660627, "grad_norm": 1.4317315816879272, "learning_rate": 2.092677855339373e-06, "loss": 0.0556, "step": 53240 }, { "epoch": 7.908807366701322, "grad_norm": 0.8244507312774658, "learning_rate": 2.0911926332986785e-06, "loss": 0.0405, "step": 53250 }, { "epoch": 7.910292588742017, "grad_norm": 1.3808245658874512, "learning_rate": 2.0897074112579833e-06, "loss": 0.0592, "step": 53260 }, { "epoch": 7.911777810782712, "grad_norm": 1.1323333978652954, "learning_rate": 2.088222189217288e-06, "loss": 0.0574, "step": 53270 }, { "epoch": 7.913263032823407, "grad_norm": 0.7280920147895813, "learning_rate": 2.086736967176593e-06, "loss": 0.0584, "step": 53280 }, { "epoch": 7.914748254864103, "grad_norm": 1.3350197076797485, "learning_rate": 2.085251745135898e-06, "loss": 0.0549, "step": 53290 }, { "epoch": 7.9162334769047975, "grad_norm": 0.5780650973320007, "learning_rate": 2.0837665230952028e-06, "loss": 0.0705, "step": 53300 }, { "epoch": 7.917718698945492, "grad_norm": 0.8865066170692444, "learning_rate": 2.082281301054508e-06, "loss": 0.0623, "step": 53310 }, { "epoch": 7.919203920986187, "grad_norm": 0.5735422372817993, "learning_rate": 2.0807960790138125e-06, "loss": 0.0536, "step": 53320 }, { "epoch": 7.920689143026882, "grad_norm": 0.9639255404472351, "learning_rate": 2.0793108569731178e-06, "loss": 0.0496, "step": 53330 }, { "epoch": 7.922174365067578, "grad_norm": 1.7265294790267944, "learning_rate": 2.0778256349324226e-06, "loss": 0.055, "step": 53340 }, { "epoch": 7.923659587108273, "grad_norm": 0.5997419953346252, "learning_rate": 2.0763404128917275e-06, "loss": 0.0474, "step": 53350 }, { "epoch": 7.925144809148968, "grad_norm": 0.8460346460342407, "learning_rate": 2.0748551908510323e-06, "loss": 0.0474, "step": 53360 }, { "epoch": 7.926630031189663, "grad_norm": 1.1648005247116089, "learning_rate": 2.073369968810337e-06, "loss": 0.073, "step": 53370 }, { "epoch": 7.928115253230358, "grad_norm": 0.7186694145202637, "learning_rate": 2.0718847467696425e-06, "loss": 0.0489, "step": 53380 }, { "epoch": 7.929600475271053, "grad_norm": 0.8861649632453918, "learning_rate": 2.0703995247289473e-06, "loss": 0.0594, "step": 53390 }, { "epoch": 7.931085697311748, "grad_norm": 0.42687320709228516, "learning_rate": 2.068914302688252e-06, "loss": 0.0817, "step": 53400 }, { "epoch": 7.932570919352443, "grad_norm": 0.4874408543109894, "learning_rate": 2.067429080647557e-06, "loss": 0.0587, "step": 53410 }, { "epoch": 7.934056141393138, "grad_norm": 0.8394566774368286, "learning_rate": 2.065943858606862e-06, "loss": 0.0536, "step": 53420 }, { "epoch": 7.935541363433833, "grad_norm": 0.8180386424064636, "learning_rate": 2.0644586365661667e-06, "loss": 0.0621, "step": 53430 }, { "epoch": 7.937026585474529, "grad_norm": 0.6095578670501709, "learning_rate": 2.0629734145254716e-06, "loss": 0.0527, "step": 53440 }, { "epoch": 7.938511807515224, "grad_norm": 1.1461389064788818, "learning_rate": 2.0614881924847765e-06, "loss": 0.0654, "step": 53450 }, { "epoch": 7.939997029555919, "grad_norm": 1.306644082069397, "learning_rate": 2.0600029704440817e-06, "loss": 0.077, "step": 53460 }, { "epoch": 7.9414822515966135, "grad_norm": 0.6092720627784729, "learning_rate": 2.058517748403386e-06, "loss": 0.0553, "step": 53470 }, { "epoch": 7.942967473637308, "grad_norm": 0.5493023991584778, "learning_rate": 2.0570325263626915e-06, "loss": 0.0502, "step": 53480 }, { "epoch": 7.944452695678004, "grad_norm": 1.4455081224441528, "learning_rate": 2.0555473043219963e-06, "loss": 0.0646, "step": 53490 }, { "epoch": 7.945937917718699, "grad_norm": 0.392264187335968, "learning_rate": 2.054062082281301e-06, "loss": 0.0629, "step": 53500 }, { "epoch": 7.947423139759394, "grad_norm": 0.8776362538337708, "learning_rate": 2.0525768602406065e-06, "loss": 0.0575, "step": 53510 }, { "epoch": 7.948908361800089, "grad_norm": 1.0791746377944946, "learning_rate": 2.051091638199911e-06, "loss": 0.047, "step": 53520 }, { "epoch": 7.950393583840784, "grad_norm": 0.6361451148986816, "learning_rate": 2.049606416159216e-06, "loss": 0.0642, "step": 53530 }, { "epoch": 7.9518788058814796, "grad_norm": 0.7630805373191833, "learning_rate": 2.048121194118521e-06, "loss": 0.0581, "step": 53540 }, { "epoch": 7.9533640279221745, "grad_norm": 1.1073448657989502, "learning_rate": 2.046635972077826e-06, "loss": 0.0448, "step": 53550 }, { "epoch": 7.954849249962869, "grad_norm": 1.3098224401474, "learning_rate": 2.0451507500371307e-06, "loss": 0.0615, "step": 53560 }, { "epoch": 7.956334472003564, "grad_norm": 0.7846435308456421, "learning_rate": 2.0436655279964356e-06, "loss": 0.0545, "step": 53570 }, { "epoch": 7.957819694044259, "grad_norm": 0.8411532044410706, "learning_rate": 2.0421803059557404e-06, "loss": 0.0473, "step": 53580 }, { "epoch": 7.959304916084955, "grad_norm": 1.306350827217102, "learning_rate": 2.0406950839150453e-06, "loss": 0.0683, "step": 53590 }, { "epoch": 7.96079013812565, "grad_norm": 0.5132039189338684, "learning_rate": 2.03920986187435e-06, "loss": 0.0565, "step": 53600 }, { "epoch": 7.962275360166345, "grad_norm": 0.5860397815704346, "learning_rate": 2.0377246398336554e-06, "loss": 0.051, "step": 53610 }, { "epoch": 7.96376058220704, "grad_norm": 1.0573729276657104, "learning_rate": 2.03623941779296e-06, "loss": 0.0645, "step": 53620 }, { "epoch": 7.9652458042477345, "grad_norm": 1.315981388092041, "learning_rate": 2.034754195752265e-06, "loss": 0.0467, "step": 53630 }, { "epoch": 7.96673102628843, "grad_norm": 0.9606397747993469, "learning_rate": 2.03326897371157e-06, "loss": 0.0536, "step": 53640 }, { "epoch": 7.968216248329125, "grad_norm": 0.4387343227863312, "learning_rate": 2.031783751670875e-06, "loss": 0.0627, "step": 53650 }, { "epoch": 7.96970147036982, "grad_norm": 1.525460124015808, "learning_rate": 2.03029852963018e-06, "loss": 0.076, "step": 53660 }, { "epoch": 7.971186692410515, "grad_norm": 0.7140664458274841, "learning_rate": 2.0288133075894846e-06, "loss": 0.0503, "step": 53670 }, { "epoch": 7.972671914451211, "grad_norm": 1.0060763359069824, "learning_rate": 2.02732808554879e-06, "loss": 0.0628, "step": 53680 }, { "epoch": 7.974157136491906, "grad_norm": 1.8016340732574463, "learning_rate": 2.0258428635080947e-06, "loss": 0.0574, "step": 53690 }, { "epoch": 7.975642358532601, "grad_norm": 0.6302917003631592, "learning_rate": 2.0243576414673996e-06, "loss": 0.062, "step": 53700 }, { "epoch": 7.9771275805732955, "grad_norm": 0.5254745483398438, "learning_rate": 2.0228724194267044e-06, "loss": 0.0535, "step": 53710 }, { "epoch": 7.97861280261399, "grad_norm": 0.3416903614997864, "learning_rate": 2.0213871973860093e-06, "loss": 0.0521, "step": 53720 }, { "epoch": 7.980098024654686, "grad_norm": 0.40586283802986145, "learning_rate": 2.019901975345314e-06, "loss": 0.0724, "step": 53730 }, { "epoch": 7.981583246695381, "grad_norm": 1.3886269330978394, "learning_rate": 2.0184167533046194e-06, "loss": 0.0574, "step": 53740 }, { "epoch": 7.983068468736076, "grad_norm": 1.389870524406433, "learning_rate": 2.016931531263924e-06, "loss": 0.0622, "step": 53750 }, { "epoch": 7.984553690776771, "grad_norm": 0.6103036403656006, "learning_rate": 2.015446309223229e-06, "loss": 0.0473, "step": 53760 }, { "epoch": 7.986038912817467, "grad_norm": 0.42124733328819275, "learning_rate": 2.013961087182534e-06, "loss": 0.0668, "step": 53770 }, { "epoch": 7.987524134858162, "grad_norm": 0.7079092264175415, "learning_rate": 2.012475865141839e-06, "loss": 0.052, "step": 53780 }, { "epoch": 7.9890093568988565, "grad_norm": 1.561933159828186, "learning_rate": 2.0109906431011437e-06, "loss": 0.0752, "step": 53790 }, { "epoch": 7.990494578939551, "grad_norm": 0.95597904920578, "learning_rate": 2.0095054210604486e-06, "loss": 0.0502, "step": 53800 }, { "epoch": 7.991979800980246, "grad_norm": 0.5312618017196655, "learning_rate": 2.008020199019754e-06, "loss": 0.0446, "step": 53810 }, { "epoch": 7.993465023020942, "grad_norm": 0.8646672368049622, "learning_rate": 2.0065349769790583e-06, "loss": 0.0554, "step": 53820 }, { "epoch": 7.994950245061637, "grad_norm": 0.9731066823005676, "learning_rate": 2.0050497549383636e-06, "loss": 0.0663, "step": 53830 }, { "epoch": 7.996435467102332, "grad_norm": 0.6797077059745789, "learning_rate": 2.0035645328976684e-06, "loss": 0.05, "step": 53840 }, { "epoch": 7.997920689143027, "grad_norm": 0.8420076966285706, "learning_rate": 2.0020793108569733e-06, "loss": 0.0582, "step": 53850 }, { "epoch": 7.999405911183722, "grad_norm": 0.8660672307014465, "learning_rate": 2.000594088816278e-06, "loss": 0.0575, "step": 53860 }, { "epoch": 8.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.05548640340566635, "eval_runtime": 214.1959, "eval_samples_per_second": 177.496, "eval_steps_per_second": 5.551, "step": 53864 }, { "epoch": 8.000891133224417, "grad_norm": 1.6648470163345337, "learning_rate": 1.999108866775583e-06, "loss": 0.0673, "step": 53870 }, { "epoch": 8.002376355265111, "grad_norm": 0.6469563245773315, "learning_rate": 1.997623644734888e-06, "loss": 0.0668, "step": 53880 }, { "epoch": 8.003861577305807, "grad_norm": 1.2048108577728271, "learning_rate": 1.996138422694193e-06, "loss": 0.0616, "step": 53890 }, { "epoch": 8.005346799346503, "grad_norm": 0.7064803838729858, "learning_rate": 1.994653200653498e-06, "loss": 0.0461, "step": 53900 }, { "epoch": 8.006832021387197, "grad_norm": 1.7400462627410889, "learning_rate": 1.993167978612803e-06, "loss": 0.0562, "step": 53910 }, { "epoch": 8.008317243427893, "grad_norm": 0.8522458076477051, "learning_rate": 1.9916827565721077e-06, "loss": 0.0434, "step": 53920 }, { "epoch": 8.009802465468587, "grad_norm": 0.41414836049079895, "learning_rate": 1.9901975345314126e-06, "loss": 0.0457, "step": 53930 }, { "epoch": 8.011287687509283, "grad_norm": 1.1489344835281372, "learning_rate": 1.9887123124907174e-06, "loss": 0.0547, "step": 53940 }, { "epoch": 8.012772909549978, "grad_norm": 0.36201971769332886, "learning_rate": 1.9872270904500223e-06, "loss": 0.0544, "step": 53950 }, { "epoch": 8.014258131590672, "grad_norm": 0.940636157989502, "learning_rate": 1.9857418684093276e-06, "loss": 0.0553, "step": 53960 }, { "epoch": 8.015743353631368, "grad_norm": 0.7558504343032837, "learning_rate": 1.984256646368632e-06, "loss": 0.0613, "step": 53970 }, { "epoch": 8.017228575672062, "grad_norm": 0.6963912844657898, "learning_rate": 1.9827714243279373e-06, "loss": 0.0521, "step": 53980 }, { "epoch": 8.018713797712758, "grad_norm": 1.4509975910186768, "learning_rate": 1.981286202287242e-06, "loss": 0.0622, "step": 53990 }, { "epoch": 8.020199019753454, "grad_norm": 1.1569229364395142, "learning_rate": 1.979800980246547e-06, "loss": 0.054, "step": 54000 }, { "epoch": 8.021684241794148, "grad_norm": 0.5657662749290466, "learning_rate": 1.978315758205852e-06, "loss": 0.0515, "step": 54010 }, { "epoch": 8.023169463834844, "grad_norm": 0.42393553256988525, "learning_rate": 1.9768305361651567e-06, "loss": 0.0565, "step": 54020 }, { "epoch": 8.024654685875538, "grad_norm": 0.5691762566566467, "learning_rate": 1.975345314124462e-06, "loss": 0.0533, "step": 54030 }, { "epoch": 8.026139907916233, "grad_norm": 0.9995362758636475, "learning_rate": 1.973860092083767e-06, "loss": 0.0596, "step": 54040 }, { "epoch": 8.02762512995693, "grad_norm": 0.9841630458831787, "learning_rate": 1.9723748700430717e-06, "loss": 0.0577, "step": 54050 }, { "epoch": 8.029110351997623, "grad_norm": 0.9374364614486694, "learning_rate": 1.9708896480023765e-06, "loss": 0.0669, "step": 54060 }, { "epoch": 8.030595574038319, "grad_norm": 0.5411103367805481, "learning_rate": 1.9694044259616814e-06, "loss": 0.0609, "step": 54070 }, { "epoch": 8.032080796079013, "grad_norm": 1.2413806915283203, "learning_rate": 1.9679192039209863e-06, "loss": 0.0416, "step": 54080 }, { "epoch": 8.033566018119709, "grad_norm": 0.8985723257064819, "learning_rate": 1.9664339818802915e-06, "loss": 0.0606, "step": 54090 }, { "epoch": 8.035051240160405, "grad_norm": 0.7435330152511597, "learning_rate": 1.964948759839596e-06, "loss": 0.0462, "step": 54100 }, { "epoch": 8.036536462201099, "grad_norm": 0.7745005488395691, "learning_rate": 1.9634635377989013e-06, "loss": 0.0541, "step": 54110 }, { "epoch": 8.038021684241794, "grad_norm": 1.5749375820159912, "learning_rate": 1.961978315758206e-06, "loss": 0.0704, "step": 54120 }, { "epoch": 8.039506906282488, "grad_norm": 1.094252109527588, "learning_rate": 1.960493093717511e-06, "loss": 0.0847, "step": 54130 }, { "epoch": 8.040992128323184, "grad_norm": 0.38332584500312805, "learning_rate": 1.959007871676816e-06, "loss": 0.0425, "step": 54140 }, { "epoch": 8.04247735036388, "grad_norm": 0.9757881760597229, "learning_rate": 1.9575226496361207e-06, "loss": 0.0611, "step": 54150 }, { "epoch": 8.043962572404574, "grad_norm": 0.6582308411598206, "learning_rate": 1.956037427595426e-06, "loss": 0.044, "step": 54160 }, { "epoch": 8.04544779444527, "grad_norm": 1.326470971107483, "learning_rate": 1.9545522055547304e-06, "loss": 0.0569, "step": 54170 }, { "epoch": 8.046933016485964, "grad_norm": 1.4490509033203125, "learning_rate": 1.9530669835140357e-06, "loss": 0.0623, "step": 54180 }, { "epoch": 8.04841823852666, "grad_norm": 0.7956948280334473, "learning_rate": 1.9515817614733405e-06, "loss": 0.0505, "step": 54190 }, { "epoch": 8.049903460567355, "grad_norm": 0.8142948746681213, "learning_rate": 1.9500965394326454e-06, "loss": 0.0596, "step": 54200 }, { "epoch": 8.05138868260805, "grad_norm": 0.8596924543380737, "learning_rate": 1.9486113173919502e-06, "loss": 0.0393, "step": 54210 }, { "epoch": 8.052873904648745, "grad_norm": 0.9736611247062683, "learning_rate": 1.947126095351255e-06, "loss": 0.0543, "step": 54220 }, { "epoch": 8.05435912668944, "grad_norm": 1.1759473085403442, "learning_rate": 1.94564087331056e-06, "loss": 0.048, "step": 54230 }, { "epoch": 8.055844348730135, "grad_norm": 0.7489706873893738, "learning_rate": 1.9441556512698652e-06, "loss": 0.0539, "step": 54240 }, { "epoch": 8.05732957077083, "grad_norm": 0.7064876556396484, "learning_rate": 1.9426704292291697e-06, "loss": 0.0752, "step": 54250 }, { "epoch": 8.058814792811525, "grad_norm": 1.0841573476791382, "learning_rate": 1.941185207188475e-06, "loss": 0.051, "step": 54260 }, { "epoch": 8.06030001485222, "grad_norm": 0.6828540563583374, "learning_rate": 1.93969998514778e-06, "loss": 0.0469, "step": 54270 }, { "epoch": 8.061785236892916, "grad_norm": 0.9682624936103821, "learning_rate": 1.9382147631070847e-06, "loss": 0.0491, "step": 54280 }, { "epoch": 8.06327045893361, "grad_norm": 0.630560040473938, "learning_rate": 1.9367295410663895e-06, "loss": 0.0502, "step": 54290 }, { "epoch": 8.064755680974306, "grad_norm": 0.7623802423477173, "learning_rate": 1.9352443190256944e-06, "loss": 0.0487, "step": 54300 }, { "epoch": 8.066240903015, "grad_norm": 0.6193575263023376, "learning_rate": 1.9337590969849997e-06, "loss": 0.0449, "step": 54310 }, { "epoch": 8.067726125055696, "grad_norm": 0.35763445496559143, "learning_rate": 1.932273874944304e-06, "loss": 0.0441, "step": 54320 }, { "epoch": 8.069211347096392, "grad_norm": 0.6113510131835938, "learning_rate": 1.9307886529036094e-06, "loss": 0.0609, "step": 54330 }, { "epoch": 8.070696569137086, "grad_norm": 0.9819941520690918, "learning_rate": 1.9293034308629142e-06, "loss": 0.0749, "step": 54340 }, { "epoch": 8.072181791177782, "grad_norm": 1.105443000793457, "learning_rate": 1.927818208822219e-06, "loss": 0.0552, "step": 54350 }, { "epoch": 8.073667013218476, "grad_norm": 1.36037278175354, "learning_rate": 1.926332986781524e-06, "loss": 0.0667, "step": 54360 }, { "epoch": 8.075152235259171, "grad_norm": 0.5551352500915527, "learning_rate": 1.924847764740829e-06, "loss": 0.0636, "step": 54370 }, { "epoch": 8.076637457299867, "grad_norm": 1.235351324081421, "learning_rate": 1.9233625427001337e-06, "loss": 0.0578, "step": 54380 }, { "epoch": 8.078122679340561, "grad_norm": 0.669603168964386, "learning_rate": 1.921877320659439e-06, "loss": 0.0491, "step": 54390 }, { "epoch": 8.079607901381257, "grad_norm": 0.5650709271430969, "learning_rate": 1.9203920986187434e-06, "loss": 0.0635, "step": 54400 }, { "epoch": 8.081093123421951, "grad_norm": 1.0669199228286743, "learning_rate": 1.9189068765780487e-06, "loss": 0.0924, "step": 54410 }, { "epoch": 8.082578345462647, "grad_norm": 0.9767273664474487, "learning_rate": 1.9174216545373535e-06, "loss": 0.0671, "step": 54420 }, { "epoch": 8.084063567503343, "grad_norm": 1.03615403175354, "learning_rate": 1.9159364324966584e-06, "loss": 0.0466, "step": 54430 }, { "epoch": 8.085548789544037, "grad_norm": 0.7880136370658875, "learning_rate": 1.9144512104559636e-06, "loss": 0.0654, "step": 54440 }, { "epoch": 8.087034011584732, "grad_norm": 1.0869358777999878, "learning_rate": 1.912965988415268e-06, "loss": 0.0612, "step": 54450 }, { "epoch": 8.088519233625426, "grad_norm": 1.2350759506225586, "learning_rate": 1.9114807663745734e-06, "loss": 0.0502, "step": 54460 }, { "epoch": 8.090004455666122, "grad_norm": 1.341839075088501, "learning_rate": 1.9099955443338782e-06, "loss": 0.0443, "step": 54470 }, { "epoch": 8.091489677706818, "grad_norm": 0.43474024534225464, "learning_rate": 1.908510322293183e-06, "loss": 0.041, "step": 54480 }, { "epoch": 8.092974899747512, "grad_norm": 1.368962287902832, "learning_rate": 1.9070251002524877e-06, "loss": 0.066, "step": 54490 }, { "epoch": 8.094460121788208, "grad_norm": 1.0121878385543823, "learning_rate": 1.9055398782117928e-06, "loss": 0.069, "step": 54500 }, { "epoch": 8.095945343828902, "grad_norm": 0.4158135652542114, "learning_rate": 1.9040546561710976e-06, "loss": 0.0453, "step": 54510 }, { "epoch": 8.097430565869598, "grad_norm": 0.7408210039138794, "learning_rate": 1.9025694341304027e-06, "loss": 0.0662, "step": 54520 }, { "epoch": 8.098915787910293, "grad_norm": 0.5974799990653992, "learning_rate": 1.9010842120897074e-06, "loss": 0.0491, "step": 54530 }, { "epoch": 8.100401009950987, "grad_norm": 1.0753979682922363, "learning_rate": 1.8995989900490124e-06, "loss": 0.0648, "step": 54540 }, { "epoch": 8.101886231991683, "grad_norm": 0.9504005908966064, "learning_rate": 1.8981137680083175e-06, "loss": 0.0495, "step": 54550 }, { "epoch": 8.103371454032377, "grad_norm": 0.9773847460746765, "learning_rate": 1.8966285459676224e-06, "loss": 0.0656, "step": 54560 }, { "epoch": 8.104856676073073, "grad_norm": 0.8386691808700562, "learning_rate": 1.8951433239269274e-06, "loss": 0.0468, "step": 54570 }, { "epoch": 8.106341898113769, "grad_norm": 0.9314102530479431, "learning_rate": 1.893658101886232e-06, "loss": 0.0618, "step": 54580 }, { "epoch": 8.107827120154463, "grad_norm": 0.8424334526062012, "learning_rate": 1.8921728798455371e-06, "loss": 0.0651, "step": 54590 }, { "epoch": 8.109312342195159, "grad_norm": 1.1387025117874146, "learning_rate": 1.890687657804842e-06, "loss": 0.0633, "step": 54600 }, { "epoch": 8.110797564235853, "grad_norm": 1.1821174621582031, "learning_rate": 1.889202435764147e-06, "loss": 0.0672, "step": 54610 }, { "epoch": 8.112282786276548, "grad_norm": 0.7393181920051575, "learning_rate": 1.8877172137234517e-06, "loss": 0.0564, "step": 54620 }, { "epoch": 8.113768008317244, "grad_norm": 0.8846901655197144, "learning_rate": 1.8862319916827568e-06, "loss": 0.0624, "step": 54630 }, { "epoch": 8.115253230357938, "grad_norm": 0.6640163660049438, "learning_rate": 1.8847467696420616e-06, "loss": 0.0529, "step": 54640 }, { "epoch": 8.116738452398634, "grad_norm": 1.2793275117874146, "learning_rate": 1.8832615476013665e-06, "loss": 0.0632, "step": 54650 }, { "epoch": 8.118223674439328, "grad_norm": 0.901630163192749, "learning_rate": 1.8817763255606713e-06, "loss": 0.0518, "step": 54660 }, { "epoch": 8.119708896480024, "grad_norm": 0.7542802095413208, "learning_rate": 1.8802911035199764e-06, "loss": 0.0547, "step": 54670 }, { "epoch": 8.12119411852072, "grad_norm": 0.851569652557373, "learning_rate": 1.8788058814792813e-06, "loss": 0.0511, "step": 54680 }, { "epoch": 8.122679340561413, "grad_norm": 1.1253145933151245, "learning_rate": 1.8773206594385861e-06, "loss": 0.0523, "step": 54690 }, { "epoch": 8.12416456260211, "grad_norm": 0.4665616750717163, "learning_rate": 1.8758354373978912e-06, "loss": 0.0569, "step": 54700 }, { "epoch": 8.125649784642803, "grad_norm": 1.1059377193450928, "learning_rate": 1.874350215357196e-06, "loss": 0.0588, "step": 54710 }, { "epoch": 8.127135006683499, "grad_norm": 0.6096968650817871, "learning_rate": 1.8728649933165011e-06, "loss": 0.0698, "step": 54720 }, { "epoch": 8.128620228724195, "grad_norm": 1.5796551704406738, "learning_rate": 1.8713797712758058e-06, "loss": 0.0657, "step": 54730 }, { "epoch": 8.130105450764889, "grad_norm": 0.34364181756973267, "learning_rate": 1.8698945492351108e-06, "loss": 0.0465, "step": 54740 }, { "epoch": 8.131590672805585, "grad_norm": 0.48883184790611267, "learning_rate": 1.8684093271944157e-06, "loss": 0.0468, "step": 54750 }, { "epoch": 8.133075894846279, "grad_norm": 1.1846975088119507, "learning_rate": 1.8669241051537208e-06, "loss": 0.0745, "step": 54760 }, { "epoch": 8.134561116886974, "grad_norm": 0.8794407248497009, "learning_rate": 1.8654388831130254e-06, "loss": 0.0448, "step": 54770 }, { "epoch": 8.13604633892767, "grad_norm": 0.44761931896209717, "learning_rate": 1.8639536610723305e-06, "loss": 0.0586, "step": 54780 }, { "epoch": 8.137531560968364, "grad_norm": 1.2459882497787476, "learning_rate": 1.8624684390316353e-06, "loss": 0.0445, "step": 54790 }, { "epoch": 8.13901678300906, "grad_norm": 1.1183502674102783, "learning_rate": 1.8609832169909404e-06, "loss": 0.0768, "step": 54800 }, { "epoch": 8.140502005049754, "grad_norm": 0.39576178789138794, "learning_rate": 1.859497994950245e-06, "loss": 0.0579, "step": 54810 }, { "epoch": 8.14198722709045, "grad_norm": 1.019203782081604, "learning_rate": 1.8580127729095501e-06, "loss": 0.0515, "step": 54820 }, { "epoch": 8.143472449131146, "grad_norm": 1.2380660772323608, "learning_rate": 1.8565275508688552e-06, "loss": 0.054, "step": 54830 }, { "epoch": 8.14495767117184, "grad_norm": 1.7992037534713745, "learning_rate": 1.8550423288281598e-06, "loss": 0.0564, "step": 54840 }, { "epoch": 8.146442893212535, "grad_norm": 0.9479325413703918, "learning_rate": 1.853557106787465e-06, "loss": 0.0535, "step": 54850 }, { "epoch": 8.14792811525323, "grad_norm": 0.8101366758346558, "learning_rate": 1.8520718847467698e-06, "loss": 0.0534, "step": 54860 }, { "epoch": 8.149413337293925, "grad_norm": 0.4467363655567169, "learning_rate": 1.8505866627060748e-06, "loss": 0.0601, "step": 54870 }, { "epoch": 8.150898559334621, "grad_norm": 0.7251670956611633, "learning_rate": 1.8491014406653795e-06, "loss": 0.0597, "step": 54880 }, { "epoch": 8.152383781375315, "grad_norm": 0.28151366114616394, "learning_rate": 1.8476162186246845e-06, "loss": 0.0508, "step": 54890 }, { "epoch": 8.15386900341601, "grad_norm": 0.4550783038139343, "learning_rate": 1.8461309965839894e-06, "loss": 0.0586, "step": 54900 }, { "epoch": 8.155354225456707, "grad_norm": 0.6677719354629517, "learning_rate": 1.8446457745432945e-06, "loss": 0.0546, "step": 54910 }, { "epoch": 8.1568394474974, "grad_norm": 1.1175285577774048, "learning_rate": 1.8431605525025991e-06, "loss": 0.0489, "step": 54920 }, { "epoch": 8.158324669538096, "grad_norm": 0.46331268548965454, "learning_rate": 1.8416753304619042e-06, "loss": 0.0525, "step": 54930 }, { "epoch": 8.15980989157879, "grad_norm": 1.2049654722213745, "learning_rate": 1.840190108421209e-06, "loss": 0.0496, "step": 54940 }, { "epoch": 8.161295113619486, "grad_norm": 0.4104573130607605, "learning_rate": 1.838704886380514e-06, "loss": 0.0521, "step": 54950 }, { "epoch": 8.162780335660182, "grad_norm": 0.787076473236084, "learning_rate": 1.8372196643398192e-06, "loss": 0.0641, "step": 54960 }, { "epoch": 8.164265557700876, "grad_norm": 0.6896853446960449, "learning_rate": 1.8357344422991238e-06, "loss": 0.0499, "step": 54970 }, { "epoch": 8.165750779741572, "grad_norm": 1.139472246170044, "learning_rate": 1.8342492202584289e-06, "loss": 0.0451, "step": 54980 }, { "epoch": 8.167236001782266, "grad_norm": 1.1083751916885376, "learning_rate": 1.8327639982177337e-06, "loss": 0.0552, "step": 54990 }, { "epoch": 8.168721223822962, "grad_norm": 1.0357023477554321, "learning_rate": 1.8312787761770386e-06, "loss": 0.0471, "step": 55000 }, { "epoch": 8.170206445863657, "grad_norm": 0.9517379999160767, "learning_rate": 1.8297935541363435e-06, "loss": 0.055, "step": 55010 }, { "epoch": 8.171691667904351, "grad_norm": 0.9328690767288208, "learning_rate": 1.8283083320956485e-06, "loss": 0.0599, "step": 55020 }, { "epoch": 8.173176889945047, "grad_norm": 0.6500536799430847, "learning_rate": 1.8268231100549532e-06, "loss": 0.0545, "step": 55030 }, { "epoch": 8.174662111985741, "grad_norm": 0.9316378235816956, "learning_rate": 1.8253378880142582e-06, "loss": 0.0348, "step": 55040 }, { "epoch": 8.176147334026437, "grad_norm": 0.9480088353157043, "learning_rate": 1.823852665973563e-06, "loss": 0.0593, "step": 55050 }, { "epoch": 8.177632556067133, "grad_norm": 0.7086759209632874, "learning_rate": 1.8223674439328682e-06, "loss": 0.0699, "step": 55060 }, { "epoch": 8.179117778107827, "grad_norm": 1.6740468740463257, "learning_rate": 1.8208822218921728e-06, "loss": 0.0744, "step": 55070 }, { "epoch": 8.180603000148523, "grad_norm": 1.1423155069351196, "learning_rate": 1.8193969998514779e-06, "loss": 0.0532, "step": 55080 }, { "epoch": 8.182088222189217, "grad_norm": 0.4345645606517792, "learning_rate": 1.817911777810783e-06, "loss": 0.0519, "step": 55090 }, { "epoch": 8.183573444229912, "grad_norm": 0.9073742032051086, "learning_rate": 1.8164265557700878e-06, "loss": 0.0505, "step": 55100 }, { "epoch": 8.185058666270608, "grad_norm": 0.9864577054977417, "learning_rate": 1.8149413337293929e-06, "loss": 0.0687, "step": 55110 }, { "epoch": 8.186543888311302, "grad_norm": 0.9961680769920349, "learning_rate": 1.8134561116886975e-06, "loss": 0.0511, "step": 55120 }, { "epoch": 8.188029110351998, "grad_norm": 0.4023669958114624, "learning_rate": 1.8119708896480026e-06, "loss": 0.0399, "step": 55130 }, { "epoch": 8.189514332392692, "grad_norm": 0.8669695854187012, "learning_rate": 1.8104856676073074e-06, "loss": 0.0545, "step": 55140 }, { "epoch": 8.190999554433388, "grad_norm": 0.6370580792427063, "learning_rate": 1.8090004455666125e-06, "loss": 0.0474, "step": 55150 }, { "epoch": 8.192484776474084, "grad_norm": 0.6495532393455505, "learning_rate": 1.8075152235259172e-06, "loss": 0.0597, "step": 55160 }, { "epoch": 8.193969998514778, "grad_norm": 0.6302558779716492, "learning_rate": 1.8060300014852222e-06, "loss": 0.0511, "step": 55170 }, { "epoch": 8.195455220555473, "grad_norm": 0.9278355836868286, "learning_rate": 1.804544779444527e-06, "loss": 0.0486, "step": 55180 }, { "epoch": 8.196940442596167, "grad_norm": 0.4719637334346771, "learning_rate": 1.803059557403832e-06, "loss": 0.0473, "step": 55190 }, { "epoch": 8.198425664636863, "grad_norm": 1.1095895767211914, "learning_rate": 1.8015743353631368e-06, "loss": 0.0593, "step": 55200 }, { "epoch": 8.199910886677559, "grad_norm": 0.5811156034469604, "learning_rate": 1.8000891133224419e-06, "loss": 0.039, "step": 55210 }, { "epoch": 8.201396108718253, "grad_norm": 1.1787195205688477, "learning_rate": 1.798603891281747e-06, "loss": 0.055, "step": 55220 }, { "epoch": 8.202881330758949, "grad_norm": 0.4531806409358978, "learning_rate": 1.7971186692410516e-06, "loss": 0.049, "step": 55230 }, { "epoch": 8.204366552799643, "grad_norm": 0.6535776257514954, "learning_rate": 1.7956334472003566e-06, "loss": 0.0562, "step": 55240 }, { "epoch": 8.205851774840339, "grad_norm": 0.8606031537055969, "learning_rate": 1.7941482251596615e-06, "loss": 0.0706, "step": 55250 }, { "epoch": 8.207336996881034, "grad_norm": 0.8530101776123047, "learning_rate": 1.7926630031189666e-06, "loss": 0.0366, "step": 55260 }, { "epoch": 8.208822218921728, "grad_norm": 0.9330909848213196, "learning_rate": 1.7911777810782712e-06, "loss": 0.0609, "step": 55270 }, { "epoch": 8.210307440962424, "grad_norm": 1.5024702548980713, "learning_rate": 1.7896925590375763e-06, "loss": 0.0607, "step": 55280 }, { "epoch": 8.211792663003118, "grad_norm": 0.6589770317077637, "learning_rate": 1.7882073369968811e-06, "loss": 0.0546, "step": 55290 }, { "epoch": 8.213277885043814, "grad_norm": 1.931390643119812, "learning_rate": 1.7867221149561862e-06, "loss": 0.0496, "step": 55300 }, { "epoch": 8.21476310708451, "grad_norm": 0.75163334608078, "learning_rate": 1.7852368929154909e-06, "loss": 0.0659, "step": 55310 }, { "epoch": 8.216248329125204, "grad_norm": 0.9942273497581482, "learning_rate": 1.783751670874796e-06, "loss": 0.0528, "step": 55320 }, { "epoch": 8.2177335511659, "grad_norm": 0.8498992323875427, "learning_rate": 1.7822664488341008e-06, "loss": 0.0437, "step": 55330 }, { "epoch": 8.219218773206594, "grad_norm": 1.12864089012146, "learning_rate": 1.7807812267934059e-06, "loss": 0.0459, "step": 55340 }, { "epoch": 8.22070399524729, "grad_norm": 0.8504659533500671, "learning_rate": 1.7792960047527107e-06, "loss": 0.0767, "step": 55350 }, { "epoch": 8.222189217287985, "grad_norm": 0.7916573882102966, "learning_rate": 1.7778107827120156e-06, "loss": 0.0492, "step": 55360 }, { "epoch": 8.22367443932868, "grad_norm": 1.0378488302230835, "learning_rate": 1.7763255606713206e-06, "loss": 0.0788, "step": 55370 }, { "epoch": 8.225159661369375, "grad_norm": 0.593513011932373, "learning_rate": 1.7748403386306253e-06, "loss": 0.0596, "step": 55380 }, { "epoch": 8.226644883410069, "grad_norm": 1.3286253213882446, "learning_rate": 1.7733551165899303e-06, "loss": 0.0503, "step": 55390 }, { "epoch": 8.228130105450765, "grad_norm": 0.4538455009460449, "learning_rate": 1.7718698945492352e-06, "loss": 0.0629, "step": 55400 }, { "epoch": 8.22961532749146, "grad_norm": 1.29067063331604, "learning_rate": 1.7703846725085403e-06, "loss": 0.073, "step": 55410 }, { "epoch": 8.231100549532155, "grad_norm": 0.9755465388298035, "learning_rate": 1.768899450467845e-06, "loss": 0.054, "step": 55420 }, { "epoch": 8.23258577157285, "grad_norm": 1.3098180294036865, "learning_rate": 1.76741422842715e-06, "loss": 0.0396, "step": 55430 }, { "epoch": 8.234070993613546, "grad_norm": 1.1818701028823853, "learning_rate": 1.7659290063864548e-06, "loss": 0.0721, "step": 55440 }, { "epoch": 8.23555621565424, "grad_norm": 1.2597570419311523, "learning_rate": 1.76444378434576e-06, "loss": 0.0554, "step": 55450 }, { "epoch": 8.237041437694936, "grad_norm": 0.5721325874328613, "learning_rate": 1.7629585623050646e-06, "loss": 0.0576, "step": 55460 }, { "epoch": 8.23852665973563, "grad_norm": 0.9122353196144104, "learning_rate": 1.7614733402643696e-06, "loss": 0.0557, "step": 55470 }, { "epoch": 8.240011881776326, "grad_norm": 0.48903775215148926, "learning_rate": 1.7599881182236747e-06, "loss": 0.0766, "step": 55480 }, { "epoch": 8.241497103817021, "grad_norm": 0.7196714282035828, "learning_rate": 1.7585028961829796e-06, "loss": 0.056, "step": 55490 }, { "epoch": 8.242982325857716, "grad_norm": 0.540144681930542, "learning_rate": 1.7570176741422846e-06, "loss": 0.0431, "step": 55500 }, { "epoch": 8.244467547898411, "grad_norm": 0.8926095366477966, "learning_rate": 1.7555324521015893e-06, "loss": 0.0587, "step": 55510 }, { "epoch": 8.245952769939105, "grad_norm": 0.639405369758606, "learning_rate": 1.7540472300608943e-06, "loss": 0.0561, "step": 55520 }, { "epoch": 8.247437991979801, "grad_norm": 0.5663328170776367, "learning_rate": 1.7525620080201992e-06, "loss": 0.0424, "step": 55530 }, { "epoch": 8.248923214020497, "grad_norm": 0.5691666603088379, "learning_rate": 1.751076785979504e-06, "loss": 0.059, "step": 55540 }, { "epoch": 8.250408436061191, "grad_norm": 0.7660884857177734, "learning_rate": 1.749591563938809e-06, "loss": 0.0563, "step": 55550 }, { "epoch": 8.251893658101887, "grad_norm": 0.3192717730998993, "learning_rate": 1.748106341898114e-06, "loss": 0.0473, "step": 55560 }, { "epoch": 8.25337888014258, "grad_norm": 1.3229044675827026, "learning_rate": 1.7466211198574186e-06, "loss": 0.0577, "step": 55570 }, { "epoch": 8.254864102183276, "grad_norm": 0.6860600709915161, "learning_rate": 1.7451358978167237e-06, "loss": 0.0531, "step": 55580 }, { "epoch": 8.256349324223972, "grad_norm": 0.7781535387039185, "learning_rate": 1.7436506757760285e-06, "loss": 0.041, "step": 55590 }, { "epoch": 8.257834546264666, "grad_norm": 0.48318976163864136, "learning_rate": 1.7421654537353336e-06, "loss": 0.0456, "step": 55600 }, { "epoch": 8.259319768305362, "grad_norm": 0.8603309392929077, "learning_rate": 1.7406802316946387e-06, "loss": 0.0511, "step": 55610 }, { "epoch": 8.260804990346056, "grad_norm": 0.7285515666007996, "learning_rate": 1.7391950096539433e-06, "loss": 0.0522, "step": 55620 }, { "epoch": 8.262290212386752, "grad_norm": 0.8494543433189392, "learning_rate": 1.7377097876132484e-06, "loss": 0.0366, "step": 55630 }, { "epoch": 8.263775434427448, "grad_norm": 1.2635679244995117, "learning_rate": 1.7362245655725533e-06, "loss": 0.044, "step": 55640 }, { "epoch": 8.265260656468142, "grad_norm": 0.6081938147544861, "learning_rate": 1.7347393435318583e-06, "loss": 0.0546, "step": 55650 }, { "epoch": 8.266745878508837, "grad_norm": 0.6587321162223816, "learning_rate": 1.733254121491163e-06, "loss": 0.0521, "step": 55660 }, { "epoch": 8.268231100549531, "grad_norm": 0.6453005075454712, "learning_rate": 1.731768899450468e-06, "loss": 0.0547, "step": 55670 }, { "epoch": 8.269716322590227, "grad_norm": 1.6176621913909912, "learning_rate": 1.7302836774097729e-06, "loss": 0.069, "step": 55680 }, { "epoch": 8.271201544630923, "grad_norm": 1.0043728351593018, "learning_rate": 1.728798455369078e-06, "loss": 0.0437, "step": 55690 }, { "epoch": 8.272686766671617, "grad_norm": 1.269233226776123, "learning_rate": 1.7273132333283826e-06, "loss": 0.0562, "step": 55700 }, { "epoch": 8.274171988712313, "grad_norm": 0.8003224730491638, "learning_rate": 1.7258280112876877e-06, "loss": 0.0646, "step": 55710 }, { "epoch": 8.275657210753007, "grad_norm": 0.9215367436408997, "learning_rate": 1.7243427892469925e-06, "loss": 0.0642, "step": 55720 }, { "epoch": 8.277142432793703, "grad_norm": 0.4948424696922302, "learning_rate": 1.7228575672062974e-06, "loss": 0.058, "step": 55730 }, { "epoch": 8.278627654834398, "grad_norm": 1.7762740850448608, "learning_rate": 1.7213723451656025e-06, "loss": 0.0432, "step": 55740 }, { "epoch": 8.280112876875092, "grad_norm": 1.6269477605819702, "learning_rate": 1.7198871231249073e-06, "loss": 0.0699, "step": 55750 }, { "epoch": 8.281598098915788, "grad_norm": 0.7696530222892761, "learning_rate": 1.7184019010842124e-06, "loss": 0.0647, "step": 55760 }, { "epoch": 8.283083320956482, "grad_norm": 0.37772563099861145, "learning_rate": 1.716916679043517e-06, "loss": 0.0564, "step": 55770 }, { "epoch": 8.284568542997178, "grad_norm": 0.7547775506973267, "learning_rate": 1.715431457002822e-06, "loss": 0.0755, "step": 55780 }, { "epoch": 8.286053765037874, "grad_norm": 0.5472797751426697, "learning_rate": 1.713946234962127e-06, "loss": 0.0546, "step": 55790 }, { "epoch": 8.287538987078568, "grad_norm": 1.0821386575698853, "learning_rate": 1.712461012921432e-06, "loss": 0.0702, "step": 55800 }, { "epoch": 8.289024209119264, "grad_norm": 1.0084789991378784, "learning_rate": 1.7109757908807367e-06, "loss": 0.0675, "step": 55810 }, { "epoch": 8.290509431159958, "grad_norm": 1.4478343725204468, "learning_rate": 1.7094905688400417e-06, "loss": 0.058, "step": 55820 }, { "epoch": 8.291994653200653, "grad_norm": 0.6397274732589722, "learning_rate": 1.7080053467993466e-06, "loss": 0.0507, "step": 55830 }, { "epoch": 8.29347987524135, "grad_norm": 0.4168252944946289, "learning_rate": 1.7065201247586517e-06, "loss": 0.0406, "step": 55840 }, { "epoch": 8.294965097282043, "grad_norm": 0.8522523641586304, "learning_rate": 1.7050349027179563e-06, "loss": 0.0347, "step": 55850 }, { "epoch": 8.296450319322739, "grad_norm": 1.1035298109054565, "learning_rate": 1.7035496806772614e-06, "loss": 0.0654, "step": 55860 }, { "epoch": 8.297935541363433, "grad_norm": 0.6701522469520569, "learning_rate": 1.7020644586365664e-06, "loss": 0.0434, "step": 55870 }, { "epoch": 8.299420763404129, "grad_norm": 0.8941202759742737, "learning_rate": 1.7005792365958713e-06, "loss": 0.0644, "step": 55880 }, { "epoch": 8.300905985444825, "grad_norm": 1.1706655025482178, "learning_rate": 1.6990940145551762e-06, "loss": 0.075, "step": 55890 }, { "epoch": 8.302391207485519, "grad_norm": 0.5262462496757507, "learning_rate": 1.697608792514481e-06, "loss": 0.0556, "step": 55900 }, { "epoch": 8.303876429526214, "grad_norm": 0.9723024368286133, "learning_rate": 1.696123570473786e-06, "loss": 0.0491, "step": 55910 }, { "epoch": 8.305361651566908, "grad_norm": 0.891448438167572, "learning_rate": 1.6946383484330907e-06, "loss": 0.0596, "step": 55920 }, { "epoch": 8.306846873607604, "grad_norm": 0.42021748423576355, "learning_rate": 1.6931531263923958e-06, "loss": 0.0434, "step": 55930 }, { "epoch": 8.3083320956483, "grad_norm": 0.6452431678771973, "learning_rate": 1.6916679043517007e-06, "loss": 0.0706, "step": 55940 }, { "epoch": 8.309817317688994, "grad_norm": 1.2964681386947632, "learning_rate": 1.6901826823110057e-06, "loss": 0.0352, "step": 55950 }, { "epoch": 8.31130253972969, "grad_norm": 1.099612832069397, "learning_rate": 1.6886974602703104e-06, "loss": 0.064, "step": 55960 }, { "epoch": 8.312787761770384, "grad_norm": 0.3394363522529602, "learning_rate": 1.6872122382296154e-06, "loss": 0.0485, "step": 55970 }, { "epoch": 8.31427298381108, "grad_norm": 0.8450482487678528, "learning_rate": 1.6857270161889203e-06, "loss": 0.0496, "step": 55980 }, { "epoch": 8.315758205851775, "grad_norm": 0.8660183548927307, "learning_rate": 1.6842417941482254e-06, "loss": 0.0617, "step": 55990 }, { "epoch": 8.31724342789247, "grad_norm": 0.7594779133796692, "learning_rate": 1.6827565721075304e-06, "loss": 0.0501, "step": 56000 }, { "epoch": 8.318728649933165, "grad_norm": 1.148614525794983, "learning_rate": 1.681271350066835e-06, "loss": 0.0708, "step": 56010 }, { "epoch": 8.32021387197386, "grad_norm": 0.5654726624488831, "learning_rate": 1.6797861280261401e-06, "loss": 0.0454, "step": 56020 }, { "epoch": 8.321699094014555, "grad_norm": 1.0416905879974365, "learning_rate": 1.678300905985445e-06, "loss": 0.0397, "step": 56030 }, { "epoch": 8.32318431605525, "grad_norm": 1.6517877578735352, "learning_rate": 1.67681568394475e-06, "loss": 0.0628, "step": 56040 }, { "epoch": 8.324669538095945, "grad_norm": 2.160710334777832, "learning_rate": 1.6753304619040547e-06, "loss": 0.049, "step": 56050 }, { "epoch": 8.32615476013664, "grad_norm": 0.4089074432849884, "learning_rate": 1.6738452398633598e-06, "loss": 0.048, "step": 56060 }, { "epoch": 8.327639982177336, "grad_norm": 0.6569660305976868, "learning_rate": 1.6723600178226646e-06, "loss": 0.0579, "step": 56070 }, { "epoch": 8.32912520421803, "grad_norm": 0.9492629766464233, "learning_rate": 1.6708747957819695e-06, "loss": 0.0647, "step": 56080 }, { "epoch": 8.330610426258726, "grad_norm": 0.38269171118736267, "learning_rate": 1.6693895737412744e-06, "loss": 0.0542, "step": 56090 }, { "epoch": 8.33209564829942, "grad_norm": 1.2882914543151855, "learning_rate": 1.6679043517005794e-06, "loss": 0.0472, "step": 56100 }, { "epoch": 8.333580870340116, "grad_norm": 0.41408294439315796, "learning_rate": 1.666419129659884e-06, "loss": 0.0406, "step": 56110 }, { "epoch": 8.335066092380812, "grad_norm": 1.3629785776138306, "learning_rate": 1.6649339076191891e-06, "loss": 0.083, "step": 56120 }, { "epoch": 8.336551314421506, "grad_norm": 0.6229886412620544, "learning_rate": 1.663448685578494e-06, "loss": 0.0517, "step": 56130 }, { "epoch": 8.338036536462202, "grad_norm": 0.6783386468887329, "learning_rate": 1.661963463537799e-06, "loss": 0.0568, "step": 56140 }, { "epoch": 8.339521758502896, "grad_norm": 0.3790055513381958, "learning_rate": 1.6604782414971041e-06, "loss": 0.061, "step": 56150 }, { "epoch": 8.341006980543591, "grad_norm": 1.6844587326049805, "learning_rate": 1.6589930194564088e-06, "loss": 0.0845, "step": 56160 }, { "epoch": 8.342492202584287, "grad_norm": 1.0809643268585205, "learning_rate": 1.6575077974157138e-06, "loss": 0.0368, "step": 56170 }, { "epoch": 8.343977424624981, "grad_norm": 1.7007912397384644, "learning_rate": 1.6560225753750187e-06, "loss": 0.0574, "step": 56180 }, { "epoch": 8.345462646665677, "grad_norm": 2.82881760597229, "learning_rate": 1.6545373533343238e-06, "loss": 0.0514, "step": 56190 }, { "epoch": 8.346947868706371, "grad_norm": 2.6078879833221436, "learning_rate": 1.6530521312936284e-06, "loss": 0.0427, "step": 56200 }, { "epoch": 8.348433090747067, "grad_norm": 0.698557436466217, "learning_rate": 1.6515669092529335e-06, "loss": 0.0621, "step": 56210 }, { "epoch": 8.349918312787763, "grad_norm": 1.1243953704833984, "learning_rate": 1.6500816872122383e-06, "loss": 0.0439, "step": 56220 }, { "epoch": 8.351403534828457, "grad_norm": 0.25133031606674194, "learning_rate": 1.6485964651715434e-06, "loss": 0.032, "step": 56230 }, { "epoch": 8.352888756869152, "grad_norm": 1.685579776763916, "learning_rate": 1.647111243130848e-06, "loss": 0.0684, "step": 56240 }, { "epoch": 8.354373978909846, "grad_norm": 1.0357993841171265, "learning_rate": 1.6456260210901531e-06, "loss": 0.0545, "step": 56250 }, { "epoch": 8.355859200950542, "grad_norm": 1.7972323894500732, "learning_rate": 1.644140799049458e-06, "loss": 0.0631, "step": 56260 }, { "epoch": 8.357344422991238, "grad_norm": 1.4118080139160156, "learning_rate": 1.6426555770087628e-06, "loss": 0.0523, "step": 56270 }, { "epoch": 8.358829645031932, "grad_norm": 1.0385082960128784, "learning_rate": 1.641170354968068e-06, "loss": 0.0469, "step": 56280 }, { "epoch": 8.360314867072628, "grad_norm": 1.1347720623016357, "learning_rate": 1.6396851329273728e-06, "loss": 0.0495, "step": 56290 }, { "epoch": 8.361800089113322, "grad_norm": 0.974901556968689, "learning_rate": 1.6381999108866778e-06, "loss": 0.0637, "step": 56300 }, { "epoch": 8.363285311154018, "grad_norm": 0.9575043320655823, "learning_rate": 1.6367146888459825e-06, "loss": 0.0521, "step": 56310 }, { "epoch": 8.364770533194713, "grad_norm": 1.842950701713562, "learning_rate": 1.6352294668052875e-06, "loss": 0.0655, "step": 56320 }, { "epoch": 8.366255755235407, "grad_norm": 0.8532291054725647, "learning_rate": 1.6337442447645924e-06, "loss": 0.0523, "step": 56330 }, { "epoch": 8.367740977276103, "grad_norm": 0.7238110899925232, "learning_rate": 1.6322590227238975e-06, "loss": 0.0535, "step": 56340 }, { "epoch": 8.369226199316797, "grad_norm": 0.788982093334198, "learning_rate": 1.6307738006832021e-06, "loss": 0.0579, "step": 56350 }, { "epoch": 8.370711421357493, "grad_norm": 0.8190876245498657, "learning_rate": 1.6292885786425072e-06, "loss": 0.065, "step": 56360 }, { "epoch": 8.372196643398189, "grad_norm": 1.094773292541504, "learning_rate": 1.627803356601812e-06, "loss": 0.0561, "step": 56370 }, { "epoch": 8.373681865438883, "grad_norm": 0.4097306430339813, "learning_rate": 1.6263181345611171e-06, "loss": 0.0474, "step": 56380 }, { "epoch": 8.375167087479578, "grad_norm": 0.6835191249847412, "learning_rate": 1.6248329125204218e-06, "loss": 0.0723, "step": 56390 }, { "epoch": 8.376652309520273, "grad_norm": 0.9203733205795288, "learning_rate": 1.6233476904797268e-06, "loss": 0.0501, "step": 56400 }, { "epoch": 8.378137531560968, "grad_norm": 1.1533256769180298, "learning_rate": 1.6218624684390319e-06, "loss": 0.0488, "step": 56410 }, { "epoch": 8.379622753601664, "grad_norm": 1.44968843460083, "learning_rate": 1.6203772463983367e-06, "loss": 0.0511, "step": 56420 }, { "epoch": 8.381107975642358, "grad_norm": 0.7023779153823853, "learning_rate": 1.6188920243576416e-06, "loss": 0.0582, "step": 56430 }, { "epoch": 8.382593197683054, "grad_norm": 1.1704336404800415, "learning_rate": 1.6174068023169465e-06, "loss": 0.0632, "step": 56440 }, { "epoch": 8.384078419723748, "grad_norm": 0.7215237617492676, "learning_rate": 1.6159215802762515e-06, "loss": 0.0427, "step": 56450 }, { "epoch": 8.385563641764444, "grad_norm": 0.4235459864139557, "learning_rate": 1.6144363582355562e-06, "loss": 0.0477, "step": 56460 }, { "epoch": 8.38704886380514, "grad_norm": 0.7092357277870178, "learning_rate": 1.6129511361948612e-06, "loss": 0.0368, "step": 56470 }, { "epoch": 8.388534085845833, "grad_norm": 1.170106291770935, "learning_rate": 1.611465914154166e-06, "loss": 0.06, "step": 56480 }, { "epoch": 8.39001930788653, "grad_norm": 0.5858461260795593, "learning_rate": 1.6099806921134712e-06, "loss": 0.0452, "step": 56490 }, { "epoch": 8.391504529927223, "grad_norm": 0.7332956790924072, "learning_rate": 1.6084954700727758e-06, "loss": 0.0569, "step": 56500 }, { "epoch": 8.392989751967919, "grad_norm": 1.0417643785476685, "learning_rate": 1.6070102480320809e-06, "loss": 0.0525, "step": 56510 }, { "epoch": 8.394474974008615, "grad_norm": 0.4032362103462219, "learning_rate": 1.6055250259913857e-06, "loss": 0.0394, "step": 56520 }, { "epoch": 8.395960196049309, "grad_norm": 0.6877124309539795, "learning_rate": 1.6040398039506908e-06, "loss": 0.0587, "step": 56530 }, { "epoch": 8.397445418090005, "grad_norm": 1.4561941623687744, "learning_rate": 1.6025545819099959e-06, "loss": 0.0642, "step": 56540 }, { "epoch": 8.3989306401307, "grad_norm": 1.2329953908920288, "learning_rate": 1.6010693598693005e-06, "loss": 0.0599, "step": 56550 }, { "epoch": 8.400415862171394, "grad_norm": 1.5175464153289795, "learning_rate": 1.5995841378286056e-06, "loss": 0.0496, "step": 56560 }, { "epoch": 8.40190108421209, "grad_norm": 1.3104963302612305, "learning_rate": 1.5980989157879104e-06, "loss": 0.0663, "step": 56570 }, { "epoch": 8.403386306252784, "grad_norm": 0.6927348971366882, "learning_rate": 1.5966136937472155e-06, "loss": 0.0593, "step": 56580 }, { "epoch": 8.40487152829348, "grad_norm": 0.9610560536384583, "learning_rate": 1.5951284717065202e-06, "loss": 0.0584, "step": 56590 }, { "epoch": 8.406356750334176, "grad_norm": 0.5008347630500793, "learning_rate": 1.5936432496658252e-06, "loss": 0.0458, "step": 56600 }, { "epoch": 8.40784197237487, "grad_norm": 0.6039450764656067, "learning_rate": 1.59215802762513e-06, "loss": 0.0431, "step": 56610 }, { "epoch": 8.409327194415566, "grad_norm": 1.2651342153549194, "learning_rate": 1.590672805584435e-06, "loss": 0.0573, "step": 56620 }, { "epoch": 8.41081241645626, "grad_norm": 0.8857653737068176, "learning_rate": 1.5891875835437398e-06, "loss": 0.0553, "step": 56630 }, { "epoch": 8.412297638496955, "grad_norm": 0.8885740041732788, "learning_rate": 1.5877023615030449e-06, "loss": 0.0598, "step": 56640 }, { "epoch": 8.413782860537651, "grad_norm": 0.7766587138175964, "learning_rate": 1.5862171394623495e-06, "loss": 0.046, "step": 56650 }, { "epoch": 8.415268082578345, "grad_norm": 1.0298758745193481, "learning_rate": 1.5847319174216546e-06, "loss": 0.0372, "step": 56660 }, { "epoch": 8.416753304619041, "grad_norm": 1.0436177253723145, "learning_rate": 1.5832466953809597e-06, "loss": 0.0554, "step": 56670 }, { "epoch": 8.418238526659735, "grad_norm": 0.6761430501937866, "learning_rate": 1.5817614733402645e-06, "loss": 0.0533, "step": 56680 }, { "epoch": 8.41972374870043, "grad_norm": 1.1792784929275513, "learning_rate": 1.5802762512995696e-06, "loss": 0.0554, "step": 56690 }, { "epoch": 8.421208970741127, "grad_norm": 0.33128201961517334, "learning_rate": 1.5787910292588742e-06, "loss": 0.0416, "step": 56700 }, { "epoch": 8.42269419278182, "grad_norm": 0.54185551404953, "learning_rate": 1.5773058072181793e-06, "loss": 0.0586, "step": 56710 }, { "epoch": 8.424179414822516, "grad_norm": 0.5396039485931396, "learning_rate": 1.5758205851774841e-06, "loss": 0.0536, "step": 56720 }, { "epoch": 8.42566463686321, "grad_norm": 1.5792487859725952, "learning_rate": 1.5743353631367892e-06, "loss": 0.0562, "step": 56730 }, { "epoch": 8.427149858903906, "grad_norm": 1.8107473850250244, "learning_rate": 1.5728501410960939e-06, "loss": 0.0599, "step": 56740 }, { "epoch": 8.428635080944602, "grad_norm": 0.57285076379776, "learning_rate": 1.571364919055399e-06, "loss": 0.0394, "step": 56750 }, { "epoch": 8.430120302985296, "grad_norm": 0.35481494665145874, "learning_rate": 1.5698796970147038e-06, "loss": 0.0527, "step": 56760 }, { "epoch": 8.431605525025992, "grad_norm": 1.0526456832885742, "learning_rate": 1.5683944749740089e-06, "loss": 0.0565, "step": 56770 }, { "epoch": 8.433090747066686, "grad_norm": 1.1689788103103638, "learning_rate": 1.5669092529333135e-06, "loss": 0.0538, "step": 56780 }, { "epoch": 8.434575969107382, "grad_norm": 1.2657625675201416, "learning_rate": 1.5654240308926186e-06, "loss": 0.0546, "step": 56790 }, { "epoch": 8.436061191148077, "grad_norm": 0.6141216158866882, "learning_rate": 1.5639388088519236e-06, "loss": 0.0446, "step": 56800 }, { "epoch": 8.437546413188771, "grad_norm": 0.4884616732597351, "learning_rate": 1.5624535868112283e-06, "loss": 0.0537, "step": 56810 }, { "epoch": 8.439031635229467, "grad_norm": 1.2065073251724243, "learning_rate": 1.5609683647705334e-06, "loss": 0.0582, "step": 56820 }, { "epoch": 8.440516857270161, "grad_norm": 0.9194113612174988, "learning_rate": 1.5594831427298382e-06, "loss": 0.0549, "step": 56830 }, { "epoch": 8.442002079310857, "grad_norm": 0.5705419182777405, "learning_rate": 1.5579979206891433e-06, "loss": 0.0528, "step": 56840 }, { "epoch": 8.443487301351553, "grad_norm": 0.7145352363586426, "learning_rate": 1.556512698648448e-06, "loss": 0.0435, "step": 56850 }, { "epoch": 8.444972523392247, "grad_norm": 1.5799872875213623, "learning_rate": 1.555027476607753e-06, "loss": 0.0694, "step": 56860 }, { "epoch": 8.446457745432943, "grad_norm": 1.1314457654953003, "learning_rate": 1.5535422545670579e-06, "loss": 0.0475, "step": 56870 }, { "epoch": 8.447942967473637, "grad_norm": 1.1472721099853516, "learning_rate": 1.552057032526363e-06, "loss": 0.0459, "step": 56880 }, { "epoch": 8.449428189514332, "grad_norm": 0.9981784224510193, "learning_rate": 1.5505718104856676e-06, "loss": 0.0546, "step": 56890 }, { "epoch": 8.450913411555028, "grad_norm": 1.7208770513534546, "learning_rate": 1.5490865884449726e-06, "loss": 0.0666, "step": 56900 }, { "epoch": 8.452398633595722, "grad_norm": 1.0905085802078247, "learning_rate": 1.5476013664042775e-06, "loss": 0.0598, "step": 56910 }, { "epoch": 8.453883855636418, "grad_norm": 0.7236934304237366, "learning_rate": 1.5461161443635826e-06, "loss": 0.0615, "step": 56920 }, { "epoch": 8.455369077677112, "grad_norm": 0.36417078971862793, "learning_rate": 1.5446309223228876e-06, "loss": 0.0572, "step": 56930 }, { "epoch": 8.456854299717808, "grad_norm": 0.8872111439704895, "learning_rate": 1.5431457002821923e-06, "loss": 0.0505, "step": 56940 }, { "epoch": 8.458339521758504, "grad_norm": 1.3937108516693115, "learning_rate": 1.5416604782414973e-06, "loss": 0.086, "step": 56950 }, { "epoch": 8.459824743799198, "grad_norm": 0.8593041896820068, "learning_rate": 1.5401752562008022e-06, "loss": 0.0507, "step": 56960 }, { "epoch": 8.461309965839893, "grad_norm": 1.7641494274139404, "learning_rate": 1.538690034160107e-06, "loss": 0.0508, "step": 56970 }, { "epoch": 8.462795187880587, "grad_norm": 0.31947317719459534, "learning_rate": 1.537204812119412e-06, "loss": 0.0379, "step": 56980 }, { "epoch": 8.464280409921283, "grad_norm": 1.2858604192733765, "learning_rate": 1.535719590078717e-06, "loss": 0.0535, "step": 56990 }, { "epoch": 8.465765631961979, "grad_norm": 1.130893588066101, "learning_rate": 1.5342343680380216e-06, "loss": 0.062, "step": 57000 }, { "epoch": 8.467250854002673, "grad_norm": 0.2929428517818451, "learning_rate": 1.5327491459973267e-06, "loss": 0.0638, "step": 57010 }, { "epoch": 8.468736076043369, "grad_norm": 0.8168018460273743, "learning_rate": 1.5312639239566316e-06, "loss": 0.0658, "step": 57020 }, { "epoch": 8.470221298084063, "grad_norm": 1.2483536005020142, "learning_rate": 1.5297787019159366e-06, "loss": 0.0662, "step": 57030 }, { "epoch": 8.471706520124759, "grad_norm": 1.250025987625122, "learning_rate": 1.5282934798752413e-06, "loss": 0.039, "step": 57040 }, { "epoch": 8.473191742165454, "grad_norm": 1.2861472368240356, "learning_rate": 1.5268082578345463e-06, "loss": 0.0725, "step": 57050 }, { "epoch": 8.474676964206148, "grad_norm": 0.44773104786872864, "learning_rate": 1.5253230357938514e-06, "loss": 0.0683, "step": 57060 }, { "epoch": 8.476162186246844, "grad_norm": 1.063197135925293, "learning_rate": 1.5238378137531563e-06, "loss": 0.0525, "step": 57070 }, { "epoch": 8.477647408287538, "grad_norm": 0.8567971587181091, "learning_rate": 1.5223525917124613e-06, "loss": 0.044, "step": 57080 }, { "epoch": 8.479132630328234, "grad_norm": 0.8655422925949097, "learning_rate": 1.520867369671766e-06, "loss": 0.0687, "step": 57090 }, { "epoch": 8.48061785236893, "grad_norm": 0.5591942071914673, "learning_rate": 1.519382147631071e-06, "loss": 0.0754, "step": 57100 }, { "epoch": 8.482103074409624, "grad_norm": 1.0601160526275635, "learning_rate": 1.517896925590376e-06, "loss": 0.0545, "step": 57110 }, { "epoch": 8.48358829645032, "grad_norm": 0.7717120051383972, "learning_rate": 1.516411703549681e-06, "loss": 0.0542, "step": 57120 }, { "epoch": 8.485073518491014, "grad_norm": 1.1000733375549316, "learning_rate": 1.5149264815089856e-06, "loss": 0.043, "step": 57130 }, { "epoch": 8.48655874053171, "grad_norm": 0.4473594129085541, "learning_rate": 1.5134412594682907e-06, "loss": 0.0568, "step": 57140 }, { "epoch": 8.488043962572405, "grad_norm": 0.5959775447845459, "learning_rate": 1.5119560374275955e-06, "loss": 0.0698, "step": 57150 }, { "epoch": 8.4895291846131, "grad_norm": 1.5816816091537476, "learning_rate": 1.5104708153869004e-06, "loss": 0.0762, "step": 57160 }, { "epoch": 8.491014406653795, "grad_norm": 1.4168264865875244, "learning_rate": 1.5089855933462053e-06, "loss": 0.0441, "step": 57170 }, { "epoch": 8.492499628694489, "grad_norm": 1.0785027742385864, "learning_rate": 1.5075003713055103e-06, "loss": 0.0722, "step": 57180 }, { "epoch": 8.493984850735185, "grad_norm": 0.5428130030632019, "learning_rate": 1.5060151492648154e-06, "loss": 0.0474, "step": 57190 }, { "epoch": 8.49547007277588, "grad_norm": 0.9714481830596924, "learning_rate": 1.50452992722412e-06, "loss": 0.0634, "step": 57200 }, { "epoch": 8.496955294816575, "grad_norm": 0.5792000889778137, "learning_rate": 1.503044705183425e-06, "loss": 0.0602, "step": 57210 }, { "epoch": 8.49844051685727, "grad_norm": 0.6629564166069031, "learning_rate": 1.50155948314273e-06, "loss": 0.0398, "step": 57220 }, { "epoch": 8.499925738897966, "grad_norm": 0.5812675356864929, "learning_rate": 1.500074261102035e-06, "loss": 0.0447, "step": 57230 }, { "epoch": 8.50141096093866, "grad_norm": 0.9888134598731995, "learning_rate": 1.4985890390613397e-06, "loss": 0.0476, "step": 57240 }, { "epoch": 8.502896182979356, "grad_norm": 0.4237917363643646, "learning_rate": 1.4971038170206447e-06, "loss": 0.0344, "step": 57250 }, { "epoch": 8.50438140502005, "grad_norm": 0.7772937417030334, "learning_rate": 1.4956185949799496e-06, "loss": 0.0541, "step": 57260 }, { "epoch": 8.505866627060746, "grad_norm": 0.8068942427635193, "learning_rate": 1.4941333729392547e-06, "loss": 0.0669, "step": 57270 }, { "epoch": 8.507351849101441, "grad_norm": 0.7912581562995911, "learning_rate": 1.4926481508985593e-06, "loss": 0.0602, "step": 57280 }, { "epoch": 8.508837071142135, "grad_norm": 0.6579568386077881, "learning_rate": 1.4911629288578644e-06, "loss": 0.05, "step": 57290 }, { "epoch": 8.510322293182831, "grad_norm": 0.6232265830039978, "learning_rate": 1.4896777068171692e-06, "loss": 0.068, "step": 57300 }, { "epoch": 8.511807515223525, "grad_norm": 0.5684225559234619, "learning_rate": 1.4881924847764743e-06, "loss": 0.0505, "step": 57310 }, { "epoch": 8.513292737264221, "grad_norm": 0.5152314305305481, "learning_rate": 1.4867072627357792e-06, "loss": 0.0527, "step": 57320 }, { "epoch": 8.514777959304917, "grad_norm": 0.5281168818473816, "learning_rate": 1.485222040695084e-06, "loss": 0.0537, "step": 57330 }, { "epoch": 8.516263181345611, "grad_norm": 0.5841067433357239, "learning_rate": 1.483736818654389e-06, "loss": 0.0479, "step": 57340 }, { "epoch": 8.517748403386307, "grad_norm": 0.749219536781311, "learning_rate": 1.4822515966136937e-06, "loss": 0.0793, "step": 57350 }, { "epoch": 8.519233625427, "grad_norm": 1.2122151851654053, "learning_rate": 1.4807663745729988e-06, "loss": 0.0633, "step": 57360 }, { "epoch": 8.520718847467696, "grad_norm": 1.5486130714416504, "learning_rate": 1.4792811525323037e-06, "loss": 0.0534, "step": 57370 }, { "epoch": 8.522204069508392, "grad_norm": 1.1606239080429077, "learning_rate": 1.4777959304916087e-06, "loss": 0.0619, "step": 57380 }, { "epoch": 8.523689291549086, "grad_norm": 0.9954538345336914, "learning_rate": 1.4763107084509134e-06, "loss": 0.0636, "step": 57390 }, { "epoch": 8.525174513589782, "grad_norm": 1.7831127643585205, "learning_rate": 1.4748254864102184e-06, "loss": 0.0512, "step": 57400 }, { "epoch": 8.526659735630476, "grad_norm": 0.38579750061035156, "learning_rate": 1.4733402643695233e-06, "loss": 0.051, "step": 57410 }, { "epoch": 8.528144957671172, "grad_norm": 0.3082354664802551, "learning_rate": 1.4718550423288284e-06, "loss": 0.0714, "step": 57420 }, { "epoch": 8.529630179711868, "grad_norm": 0.7812128663063049, "learning_rate": 1.470369820288133e-06, "loss": 0.0494, "step": 57430 }, { "epoch": 8.531115401752562, "grad_norm": 1.4353504180908203, "learning_rate": 1.468884598247438e-06, "loss": 0.0738, "step": 57440 }, { "epoch": 8.532600623793257, "grad_norm": 1.3545862436294556, "learning_rate": 1.4673993762067432e-06, "loss": 0.0522, "step": 57450 }, { "epoch": 8.534085845833951, "grad_norm": 0.9643948674201965, "learning_rate": 1.465914154166048e-06, "loss": 0.0581, "step": 57460 }, { "epoch": 8.535571067874647, "grad_norm": 1.1293238401412964, "learning_rate": 1.464428932125353e-06, "loss": 0.0691, "step": 57470 }, { "epoch": 8.537056289915343, "grad_norm": 0.47619926929473877, "learning_rate": 1.4629437100846577e-06, "loss": 0.0608, "step": 57480 }, { "epoch": 8.538541511956037, "grad_norm": 0.8765807747840881, "learning_rate": 1.4614584880439628e-06, "loss": 0.0547, "step": 57490 }, { "epoch": 8.540026733996733, "grad_norm": 0.8261600732803345, "learning_rate": 1.4599732660032676e-06, "loss": 0.0584, "step": 57500 }, { "epoch": 8.541511956037427, "grad_norm": 0.9158366918563843, "learning_rate": 1.4584880439625725e-06, "loss": 0.0456, "step": 57510 }, { "epoch": 8.542997178078123, "grad_norm": 0.6802687048912048, "learning_rate": 1.4570028219218774e-06, "loss": 0.0497, "step": 57520 }, { "epoch": 8.544482400118818, "grad_norm": 0.918070375919342, "learning_rate": 1.4555175998811824e-06, "loss": 0.0501, "step": 57530 }, { "epoch": 8.545967622159512, "grad_norm": 0.9905110597610474, "learning_rate": 1.454032377840487e-06, "loss": 0.0571, "step": 57540 }, { "epoch": 8.547452844200208, "grad_norm": 1.13737154006958, "learning_rate": 1.4525471557997921e-06, "loss": 0.053, "step": 57550 }, { "epoch": 8.548938066240902, "grad_norm": 0.25579169392585754, "learning_rate": 1.451061933759097e-06, "loss": 0.0352, "step": 57560 }, { "epoch": 8.550423288281598, "grad_norm": 0.6201579570770264, "learning_rate": 1.449576711718402e-06, "loss": 0.0457, "step": 57570 }, { "epoch": 8.551908510322294, "grad_norm": 0.6442703604698181, "learning_rate": 1.4480914896777071e-06, "loss": 0.0494, "step": 57580 }, { "epoch": 8.553393732362988, "grad_norm": 1.298711895942688, "learning_rate": 1.4466062676370118e-06, "loss": 0.0549, "step": 57590 }, { "epoch": 8.554878954403684, "grad_norm": 0.8238638639450073, "learning_rate": 1.4451210455963169e-06, "loss": 0.0481, "step": 57600 }, { "epoch": 8.556364176444378, "grad_norm": 1.0968530178070068, "learning_rate": 1.4436358235556217e-06, "loss": 0.0399, "step": 57610 }, { "epoch": 8.557849398485073, "grad_norm": 0.766776442527771, "learning_rate": 1.4421506015149268e-06, "loss": 0.0616, "step": 57620 }, { "epoch": 8.55933462052577, "grad_norm": 0.8537440299987793, "learning_rate": 1.4406653794742314e-06, "loss": 0.0636, "step": 57630 }, { "epoch": 8.560819842566463, "grad_norm": 1.3083012104034424, "learning_rate": 1.4391801574335365e-06, "loss": 0.0513, "step": 57640 }, { "epoch": 8.562305064607159, "grad_norm": 0.84224933385849, "learning_rate": 1.4376949353928413e-06, "loss": 0.0525, "step": 57650 }, { "epoch": 8.563790286647853, "grad_norm": 1.0603967905044556, "learning_rate": 1.4362097133521464e-06, "loss": 0.0418, "step": 57660 }, { "epoch": 8.565275508688549, "grad_norm": 1.2050191164016724, "learning_rate": 1.434724491311451e-06, "loss": 0.0483, "step": 57670 }, { "epoch": 8.566760730729245, "grad_norm": 0.6597033739089966, "learning_rate": 1.4332392692707561e-06, "loss": 0.069, "step": 57680 }, { "epoch": 8.568245952769939, "grad_norm": 1.193673849105835, "learning_rate": 1.431754047230061e-06, "loss": 0.0613, "step": 57690 }, { "epoch": 8.569731174810634, "grad_norm": 0.638843834400177, "learning_rate": 1.4302688251893658e-06, "loss": 0.0574, "step": 57700 }, { "epoch": 8.57121639685133, "grad_norm": 0.8168714046478271, "learning_rate": 1.4287836031486707e-06, "loss": 0.0407, "step": 57710 }, { "epoch": 8.572701618892024, "grad_norm": 0.19827646017074585, "learning_rate": 1.4272983811079758e-06, "loss": 0.0345, "step": 57720 }, { "epoch": 8.57418684093272, "grad_norm": 1.014280080795288, "learning_rate": 1.4258131590672808e-06, "loss": 0.0606, "step": 57730 }, { "epoch": 8.575672062973414, "grad_norm": 0.2834644913673401, "learning_rate": 1.4243279370265855e-06, "loss": 0.0444, "step": 57740 }, { "epoch": 8.57715728501411, "grad_norm": 1.5880845785140991, "learning_rate": 1.4228427149858906e-06, "loss": 0.0573, "step": 57750 }, { "epoch": 8.578642507054806, "grad_norm": 2.1034553050994873, "learning_rate": 1.4213574929451954e-06, "loss": 0.0594, "step": 57760 }, { "epoch": 8.5801277290955, "grad_norm": 0.22017979621887207, "learning_rate": 1.4198722709045005e-06, "loss": 0.0677, "step": 57770 }, { "epoch": 8.581612951136195, "grad_norm": 0.544924795627594, "learning_rate": 1.4183870488638051e-06, "loss": 0.0419, "step": 57780 }, { "epoch": 8.58309817317689, "grad_norm": 1.1174598932266235, "learning_rate": 1.4169018268231102e-06, "loss": 0.0461, "step": 57790 }, { "epoch": 8.584583395217585, "grad_norm": 0.851573646068573, "learning_rate": 1.415416604782415e-06, "loss": 0.0479, "step": 57800 }, { "epoch": 8.586068617258281, "grad_norm": 0.7249863147735596, "learning_rate": 1.4139313827417201e-06, "loss": 0.0498, "step": 57810 }, { "epoch": 8.587553839298975, "grad_norm": 0.815183699131012, "learning_rate": 1.4124461607010248e-06, "loss": 0.0476, "step": 57820 }, { "epoch": 8.58903906133967, "grad_norm": 0.8330841064453125, "learning_rate": 1.4109609386603298e-06, "loss": 0.0666, "step": 57830 }, { "epoch": 8.590524283380365, "grad_norm": 0.32213395833969116, "learning_rate": 1.4094757166196347e-06, "loss": 0.0483, "step": 57840 }, { "epoch": 8.59200950542106, "grad_norm": 0.7228590250015259, "learning_rate": 1.4079904945789398e-06, "loss": 0.062, "step": 57850 }, { "epoch": 8.593494727461756, "grad_norm": 1.877468228340149, "learning_rate": 1.4065052725382446e-06, "loss": 0.0522, "step": 57860 }, { "epoch": 8.59497994950245, "grad_norm": 1.2192243337631226, "learning_rate": 1.4050200504975495e-06, "loss": 0.083, "step": 57870 }, { "epoch": 8.596465171543146, "grad_norm": 0.4343273937702179, "learning_rate": 1.4035348284568545e-06, "loss": 0.0543, "step": 57880 }, { "epoch": 8.59795039358384, "grad_norm": 0.4854268729686737, "learning_rate": 1.4020496064161592e-06, "loss": 0.043, "step": 57890 }, { "epoch": 8.599435615624536, "grad_norm": 1.3126654624938965, "learning_rate": 1.4005643843754643e-06, "loss": 0.0581, "step": 57900 }, { "epoch": 8.600920837665232, "grad_norm": 0.8461434841156006, "learning_rate": 1.3990791623347691e-06, "loss": 0.0569, "step": 57910 }, { "epoch": 8.602406059705926, "grad_norm": 0.22064021229743958, "learning_rate": 1.3975939402940742e-06, "loss": 0.0608, "step": 57920 }, { "epoch": 8.603891281746622, "grad_norm": 0.5799633860588074, "learning_rate": 1.3961087182533788e-06, "loss": 0.0432, "step": 57930 }, { "epoch": 8.605376503787316, "grad_norm": 0.8562087416648865, "learning_rate": 1.3946234962126839e-06, "loss": 0.0513, "step": 57940 }, { "epoch": 8.606861725828011, "grad_norm": 1.1041144132614136, "learning_rate": 1.3931382741719887e-06, "loss": 0.0631, "step": 57950 }, { "epoch": 8.608346947868707, "grad_norm": 1.4788848161697388, "learning_rate": 1.3916530521312938e-06, "loss": 0.0405, "step": 57960 }, { "epoch": 8.609832169909401, "grad_norm": 1.005614995956421, "learning_rate": 1.3901678300905985e-06, "loss": 0.0838, "step": 57970 }, { "epoch": 8.611317391950097, "grad_norm": 0.7344144582748413, "learning_rate": 1.3886826080499035e-06, "loss": 0.0694, "step": 57980 }, { "epoch": 8.612802613990791, "grad_norm": 0.5920735597610474, "learning_rate": 1.3871973860092086e-06, "loss": 0.0457, "step": 57990 }, { "epoch": 8.614287836031487, "grad_norm": 0.4997226595878601, "learning_rate": 1.3857121639685135e-06, "loss": 0.051, "step": 58000 }, { "epoch": 8.615773058072183, "grad_norm": 1.0840632915496826, "learning_rate": 1.3842269419278185e-06, "loss": 0.0637, "step": 58010 }, { "epoch": 8.617258280112877, "grad_norm": 0.500771164894104, "learning_rate": 1.3827417198871232e-06, "loss": 0.0456, "step": 58020 }, { "epoch": 8.618743502153572, "grad_norm": 0.6269918084144592, "learning_rate": 1.3812564978464282e-06, "loss": 0.0566, "step": 58030 }, { "epoch": 8.620228724194266, "grad_norm": 0.8221311569213867, "learning_rate": 1.379771275805733e-06, "loss": 0.0785, "step": 58040 }, { "epoch": 8.621713946234962, "grad_norm": 0.5016255378723145, "learning_rate": 1.378286053765038e-06, "loss": 0.057, "step": 58050 }, { "epoch": 8.623199168275658, "grad_norm": 0.9415386915206909, "learning_rate": 1.3768008317243428e-06, "loss": 0.034, "step": 58060 }, { "epoch": 8.624684390316352, "grad_norm": 1.6327321529388428, "learning_rate": 1.3753156096836479e-06, "loss": 0.0507, "step": 58070 }, { "epoch": 8.626169612357048, "grad_norm": 0.9239318370819092, "learning_rate": 1.3738303876429525e-06, "loss": 0.0561, "step": 58080 }, { "epoch": 8.627654834397742, "grad_norm": 0.25347572565078735, "learning_rate": 1.3723451656022576e-06, "loss": 0.0582, "step": 58090 }, { "epoch": 8.629140056438438, "grad_norm": 0.5622754096984863, "learning_rate": 1.3708599435615624e-06, "loss": 0.0454, "step": 58100 }, { "epoch": 8.630625278479133, "grad_norm": 1.1426067352294922, "learning_rate": 1.3693747215208675e-06, "loss": 0.047, "step": 58110 }, { "epoch": 8.632110500519827, "grad_norm": 1.66712486743927, "learning_rate": 1.3678894994801726e-06, "loss": 0.0551, "step": 58120 }, { "epoch": 8.633595722560523, "grad_norm": 0.9281148314476013, "learning_rate": 1.3664042774394772e-06, "loss": 0.0487, "step": 58130 }, { "epoch": 8.635080944601217, "grad_norm": 0.7190086841583252, "learning_rate": 1.3649190553987823e-06, "loss": 0.0508, "step": 58140 }, { "epoch": 8.636566166641913, "grad_norm": 2.1104774475097656, "learning_rate": 1.3634338333580872e-06, "loss": 0.0617, "step": 58150 }, { "epoch": 8.638051388682609, "grad_norm": 0.7850050926208496, "learning_rate": 1.3619486113173922e-06, "loss": 0.0507, "step": 58160 }, { "epoch": 8.639536610723303, "grad_norm": 0.592963457107544, "learning_rate": 1.3604633892766969e-06, "loss": 0.0672, "step": 58170 }, { "epoch": 8.641021832763998, "grad_norm": 1.102217197418213, "learning_rate": 1.358978167236002e-06, "loss": 0.0741, "step": 58180 }, { "epoch": 8.642507054804693, "grad_norm": 0.5851567983627319, "learning_rate": 1.3574929451953068e-06, "loss": 0.0443, "step": 58190 }, { "epoch": 8.643992276845388, "grad_norm": 0.8554288148880005, "learning_rate": 1.3560077231546119e-06, "loss": 0.0524, "step": 58200 }, { "epoch": 8.645477498886084, "grad_norm": 0.8415836691856384, "learning_rate": 1.3545225011139165e-06, "loss": 0.0476, "step": 58210 }, { "epoch": 8.646962720926778, "grad_norm": 1.2989027500152588, "learning_rate": 1.3530372790732216e-06, "loss": 0.0437, "step": 58220 }, { "epoch": 8.648447942967474, "grad_norm": 0.886523425579071, "learning_rate": 1.3515520570325264e-06, "loss": 0.0356, "step": 58230 }, { "epoch": 8.649933165008168, "grad_norm": 0.4318709373474121, "learning_rate": 1.3500668349918313e-06, "loss": 0.0525, "step": 58240 }, { "epoch": 8.651418387048864, "grad_norm": 1.2308825254440308, "learning_rate": 1.3485816129511364e-06, "loss": 0.0554, "step": 58250 }, { "epoch": 8.65290360908956, "grad_norm": 0.6957741379737854, "learning_rate": 1.3470963909104412e-06, "loss": 0.0402, "step": 58260 }, { "epoch": 8.654388831130253, "grad_norm": 0.4085921347141266, "learning_rate": 1.3456111688697463e-06, "loss": 0.0562, "step": 58270 }, { "epoch": 8.65587405317095, "grad_norm": 0.7616286277770996, "learning_rate": 1.344125946829051e-06, "loss": 0.07, "step": 58280 }, { "epoch": 8.657359275211643, "grad_norm": 0.45014530420303345, "learning_rate": 1.342640724788356e-06, "loss": 0.059, "step": 58290 }, { "epoch": 8.658844497252339, "grad_norm": 1.3107436895370483, "learning_rate": 1.3411555027476609e-06, "loss": 0.0732, "step": 58300 }, { "epoch": 8.660329719293035, "grad_norm": 0.715117335319519, "learning_rate": 1.339670280706966e-06, "loss": 0.0447, "step": 58310 }, { "epoch": 8.661814941333729, "grad_norm": 0.9561058282852173, "learning_rate": 1.3381850586662706e-06, "loss": 0.0587, "step": 58320 }, { "epoch": 8.663300163374425, "grad_norm": 1.1679437160491943, "learning_rate": 1.3366998366255756e-06, "loss": 0.0401, "step": 58330 }, { "epoch": 8.664785385415119, "grad_norm": 0.3750496208667755, "learning_rate": 1.3352146145848805e-06, "loss": 0.065, "step": 58340 }, { "epoch": 8.666270607455814, "grad_norm": 0.5229451656341553, "learning_rate": 1.3337293925441856e-06, "loss": 0.0376, "step": 58350 }, { "epoch": 8.66775582949651, "grad_norm": 0.6506863236427307, "learning_rate": 1.3322441705034902e-06, "loss": 0.0508, "step": 58360 }, { "epoch": 8.669241051537204, "grad_norm": 0.9688324928283691, "learning_rate": 1.3307589484627953e-06, "loss": 0.0582, "step": 58370 }, { "epoch": 8.6707262735779, "grad_norm": 0.7780069708824158, "learning_rate": 1.3292737264221003e-06, "loss": 0.0513, "step": 58380 }, { "epoch": 8.672211495618594, "grad_norm": 0.8056851625442505, "learning_rate": 1.3277885043814052e-06, "loss": 0.0671, "step": 58390 }, { "epoch": 8.67369671765929, "grad_norm": 1.495537519454956, "learning_rate": 1.32630328234071e-06, "loss": 0.0451, "step": 58400 }, { "epoch": 8.675181939699986, "grad_norm": 1.1849182844161987, "learning_rate": 1.324818060300015e-06, "loss": 0.0479, "step": 58410 }, { "epoch": 8.67666716174068, "grad_norm": 1.2419134378433228, "learning_rate": 1.32333283825932e-06, "loss": 0.0802, "step": 58420 }, { "epoch": 8.678152383781375, "grad_norm": 1.0507631301879883, "learning_rate": 1.3218476162186246e-06, "loss": 0.0599, "step": 58430 }, { "epoch": 8.679637605822071, "grad_norm": 1.0075569152832031, "learning_rate": 1.3203623941779297e-06, "loss": 0.0525, "step": 58440 }, { "epoch": 8.681122827862765, "grad_norm": 0.7980392575263977, "learning_rate": 1.3188771721372346e-06, "loss": 0.0437, "step": 58450 }, { "epoch": 8.682608049903461, "grad_norm": 0.5224838256835938, "learning_rate": 1.3173919500965396e-06, "loss": 0.0547, "step": 58460 }, { "epoch": 8.684093271944155, "grad_norm": 0.7504544258117676, "learning_rate": 1.3159067280558443e-06, "loss": 0.0565, "step": 58470 }, { "epoch": 8.68557849398485, "grad_norm": 0.7422505617141724, "learning_rate": 1.3144215060151493e-06, "loss": 0.0669, "step": 58480 }, { "epoch": 8.687063716025547, "grad_norm": 0.5323972702026367, "learning_rate": 1.3129362839744542e-06, "loss": 0.053, "step": 58490 }, { "epoch": 8.68854893806624, "grad_norm": 0.5587813258171082, "learning_rate": 1.3114510619337593e-06, "loss": 0.0728, "step": 58500 }, { "epoch": 8.690034160106936, "grad_norm": 1.3359700441360474, "learning_rate": 1.3099658398930643e-06, "loss": 0.0561, "step": 58510 }, { "epoch": 8.69151938214763, "grad_norm": 1.0480090379714966, "learning_rate": 1.308480617852369e-06, "loss": 0.0468, "step": 58520 }, { "epoch": 8.693004604188326, "grad_norm": 0.5198378562927246, "learning_rate": 1.306995395811674e-06, "loss": 0.0718, "step": 58530 }, { "epoch": 8.694489826229022, "grad_norm": 0.986173152923584, "learning_rate": 1.305510173770979e-06, "loss": 0.0529, "step": 58540 }, { "epoch": 8.695975048269716, "grad_norm": 0.5360456109046936, "learning_rate": 1.304024951730284e-06, "loss": 0.0526, "step": 58550 }, { "epoch": 8.697460270310412, "grad_norm": 0.38922441005706787, "learning_rate": 1.3025397296895886e-06, "loss": 0.0518, "step": 58560 }, { "epoch": 8.698945492351106, "grad_norm": 1.2982813119888306, "learning_rate": 1.3010545076488937e-06, "loss": 0.0578, "step": 58570 }, { "epoch": 8.700430714391802, "grad_norm": 1.141973853111267, "learning_rate": 1.2995692856081985e-06, "loss": 0.0618, "step": 58580 }, { "epoch": 8.701915936432497, "grad_norm": 1.176173448562622, "learning_rate": 1.2980840635675034e-06, "loss": 0.0526, "step": 58590 }, { "epoch": 8.703401158473191, "grad_norm": 1.5215836763381958, "learning_rate": 1.2965988415268083e-06, "loss": 0.0488, "step": 58600 }, { "epoch": 8.704886380513887, "grad_norm": 0.5017318725585938, "learning_rate": 1.2951136194861133e-06, "loss": 0.0648, "step": 58610 }, { "epoch": 8.706371602554581, "grad_norm": 0.46008479595184326, "learning_rate": 1.293628397445418e-06, "loss": 0.0496, "step": 58620 }, { "epoch": 8.707856824595277, "grad_norm": 0.5816351771354675, "learning_rate": 1.292143175404723e-06, "loss": 0.0689, "step": 58630 }, { "epoch": 8.709342046635973, "grad_norm": 0.3985196352005005, "learning_rate": 1.2906579533640281e-06, "loss": 0.0621, "step": 58640 }, { "epoch": 8.710827268676667, "grad_norm": 0.8761214017868042, "learning_rate": 1.289172731323333e-06, "loss": 0.0538, "step": 58650 }, { "epoch": 8.712312490717363, "grad_norm": 0.8328244686126709, "learning_rate": 1.287687509282638e-06, "loss": 0.0497, "step": 58660 }, { "epoch": 8.713797712758057, "grad_norm": 1.0810935497283936, "learning_rate": 1.2862022872419427e-06, "loss": 0.085, "step": 58670 }, { "epoch": 8.715282934798752, "grad_norm": 1.138525128364563, "learning_rate": 1.2847170652012477e-06, "loss": 0.0604, "step": 58680 }, { "epoch": 8.716768156839448, "grad_norm": 1.0397404432296753, "learning_rate": 1.2832318431605526e-06, "loss": 0.0609, "step": 58690 }, { "epoch": 8.718253378880142, "grad_norm": 2.0040159225463867, "learning_rate": 1.2817466211198577e-06, "loss": 0.0432, "step": 58700 }, { "epoch": 8.719738600920838, "grad_norm": 0.5572036504745483, "learning_rate": 1.2802613990791623e-06, "loss": 0.0594, "step": 58710 }, { "epoch": 8.721223822961532, "grad_norm": 0.7825028300285339, "learning_rate": 1.2787761770384674e-06, "loss": 0.0514, "step": 58720 }, { "epoch": 8.722709045002228, "grad_norm": 0.37247392535209656, "learning_rate": 1.2772909549977722e-06, "loss": 0.0588, "step": 58730 }, { "epoch": 8.724194267042924, "grad_norm": 0.9438624382019043, "learning_rate": 1.2758057329570773e-06, "loss": 0.0493, "step": 58740 }, { "epoch": 8.725679489083618, "grad_norm": 0.9759641289710999, "learning_rate": 1.274320510916382e-06, "loss": 0.0502, "step": 58750 }, { "epoch": 8.727164711124313, "grad_norm": 0.6798367500305176, "learning_rate": 1.272835288875687e-06, "loss": 0.0572, "step": 58760 }, { "epoch": 8.728649933165007, "grad_norm": 0.7928009033203125, "learning_rate": 1.271350066834992e-06, "loss": 0.0603, "step": 58770 }, { "epoch": 8.730135155205703, "grad_norm": 0.44863516092300415, "learning_rate": 1.2698648447942967e-06, "loss": 0.0494, "step": 58780 }, { "epoch": 8.731620377246399, "grad_norm": 0.6240716576576233, "learning_rate": 1.2683796227536018e-06, "loss": 0.0535, "step": 58790 }, { "epoch": 8.733105599287093, "grad_norm": 1.6525503396987915, "learning_rate": 1.2668944007129067e-06, "loss": 0.0508, "step": 58800 }, { "epoch": 8.734590821327789, "grad_norm": 1.3705474138259888, "learning_rate": 1.2654091786722117e-06, "loss": 0.0605, "step": 58810 }, { "epoch": 8.736076043368483, "grad_norm": 0.4468485116958618, "learning_rate": 1.2639239566315164e-06, "loss": 0.0639, "step": 58820 }, { "epoch": 8.737561265409179, "grad_norm": 1.8079521656036377, "learning_rate": 1.2624387345908214e-06, "loss": 0.0505, "step": 58830 }, { "epoch": 8.739046487449874, "grad_norm": 1.1422653198242188, "learning_rate": 1.2609535125501263e-06, "loss": 0.0569, "step": 58840 }, { "epoch": 8.740531709490568, "grad_norm": 0.506270706653595, "learning_rate": 1.2594682905094314e-06, "loss": 0.0573, "step": 58850 }, { "epoch": 8.742016931531264, "grad_norm": 0.41067951917648315, "learning_rate": 1.257983068468736e-06, "loss": 0.0427, "step": 58860 }, { "epoch": 8.74350215357196, "grad_norm": 0.4286954998970032, "learning_rate": 1.256497846428041e-06, "loss": 0.0507, "step": 58870 }, { "epoch": 8.744987375612654, "grad_norm": 1.6591687202453613, "learning_rate": 1.255012624387346e-06, "loss": 0.0754, "step": 58880 }, { "epoch": 8.74647259765335, "grad_norm": 0.6580554842948914, "learning_rate": 1.253527402346651e-06, "loss": 0.0632, "step": 58890 }, { "epoch": 8.747957819694044, "grad_norm": 1.3528720140457153, "learning_rate": 1.252042180305956e-06, "loss": 0.0537, "step": 58900 }, { "epoch": 8.74944304173474, "grad_norm": 0.600568950176239, "learning_rate": 1.2505569582652607e-06, "loss": 0.0492, "step": 58910 }, { "epoch": 8.750928263775435, "grad_norm": 1.0042638778686523, "learning_rate": 1.2490717362245656e-06, "loss": 0.0499, "step": 58920 }, { "epoch": 8.75241348581613, "grad_norm": 0.5799403786659241, "learning_rate": 1.2475865141838707e-06, "loss": 0.0447, "step": 58930 }, { "epoch": 8.753898707856825, "grad_norm": 0.4826274514198303, "learning_rate": 1.2461012921431755e-06, "loss": 0.0497, "step": 58940 }, { "epoch": 8.75538392989752, "grad_norm": 0.8607178330421448, "learning_rate": 1.2446160701024804e-06, "loss": 0.0545, "step": 58950 }, { "epoch": 8.756869151938215, "grad_norm": 0.7830055356025696, "learning_rate": 1.2431308480617852e-06, "loss": 0.0422, "step": 58960 }, { "epoch": 8.75835437397891, "grad_norm": 0.3618166148662567, "learning_rate": 1.2416456260210903e-06, "loss": 0.0484, "step": 58970 }, { "epoch": 8.759839596019605, "grad_norm": 0.8886063694953918, "learning_rate": 1.2401604039803951e-06, "loss": 0.0507, "step": 58980 }, { "epoch": 8.7613248180603, "grad_norm": 1.0976297855377197, "learning_rate": 1.2386751819397002e-06, "loss": 0.0348, "step": 58990 }, { "epoch": 8.762810040100995, "grad_norm": 0.6038275361061096, "learning_rate": 1.237189959899005e-06, "loss": 0.0518, "step": 59000 }, { "epoch": 8.76429526214169, "grad_norm": 1.2169570922851562, "learning_rate": 1.23570473785831e-06, "loss": 0.0597, "step": 59010 }, { "epoch": 8.765780484182386, "grad_norm": 1.306175708770752, "learning_rate": 1.2342195158176148e-06, "loss": 0.0663, "step": 59020 }, { "epoch": 8.76726570622308, "grad_norm": 0.9833489656448364, "learning_rate": 1.2327342937769196e-06, "loss": 0.0475, "step": 59030 }, { "epoch": 8.768750928263776, "grad_norm": 0.8598132729530334, "learning_rate": 1.2312490717362247e-06, "loss": 0.0383, "step": 59040 }, { "epoch": 8.77023615030447, "grad_norm": 0.6318262219429016, "learning_rate": 1.2297638496955296e-06, "loss": 0.0461, "step": 59050 }, { "epoch": 8.771721372345166, "grad_norm": 1.8029948472976685, "learning_rate": 1.2282786276548344e-06, "loss": 0.0561, "step": 59060 }, { "epoch": 8.773206594385861, "grad_norm": 1.5056575536727905, "learning_rate": 1.2267934056141393e-06, "loss": 0.0542, "step": 59070 }, { "epoch": 8.774691816426555, "grad_norm": 0.7758419513702393, "learning_rate": 1.2253081835734444e-06, "loss": 0.0468, "step": 59080 }, { "epoch": 8.776177038467251, "grad_norm": 1.0615826845169067, "learning_rate": 1.2238229615327492e-06, "loss": 0.0397, "step": 59090 }, { "epoch": 8.777662260507945, "grad_norm": 0.5602389574050903, "learning_rate": 1.2223377394920543e-06, "loss": 0.0586, "step": 59100 }, { "epoch": 8.779147482548641, "grad_norm": 0.767863929271698, "learning_rate": 1.2208525174513591e-06, "loss": 0.0722, "step": 59110 }, { "epoch": 8.780632704589337, "grad_norm": 1.2912675142288208, "learning_rate": 1.219367295410664e-06, "loss": 0.0761, "step": 59120 }, { "epoch": 8.78211792663003, "grad_norm": 1.0784565210342407, "learning_rate": 1.2178820733699689e-06, "loss": 0.043, "step": 59130 }, { "epoch": 8.783603148670727, "grad_norm": 0.5213328003883362, "learning_rate": 1.216396851329274e-06, "loss": 0.05, "step": 59140 }, { "epoch": 8.78508837071142, "grad_norm": 0.12252107262611389, "learning_rate": 1.2149116292885788e-06, "loss": 0.0373, "step": 59150 }, { "epoch": 8.786573592752116, "grad_norm": 0.5756390690803528, "learning_rate": 1.2134264072478836e-06, "loss": 0.0423, "step": 59160 }, { "epoch": 8.788058814792812, "grad_norm": 0.5673865675926208, "learning_rate": 1.2119411852071885e-06, "loss": 0.0557, "step": 59170 }, { "epoch": 8.789544036833506, "grad_norm": 1.11675226688385, "learning_rate": 1.2104559631664936e-06, "loss": 0.0704, "step": 59180 }, { "epoch": 8.791029258874202, "grad_norm": 0.8019585609436035, "learning_rate": 1.2089707411257984e-06, "loss": 0.063, "step": 59190 }, { "epoch": 8.792514480914896, "grad_norm": 0.49639007449150085, "learning_rate": 1.2074855190851033e-06, "loss": 0.0612, "step": 59200 }, { "epoch": 8.793999702955592, "grad_norm": 1.0924346446990967, "learning_rate": 1.2060002970444081e-06, "loss": 0.0469, "step": 59210 }, { "epoch": 8.795484924996288, "grad_norm": 0.4211598038673401, "learning_rate": 1.204515075003713e-06, "loss": 0.0485, "step": 59220 }, { "epoch": 8.796970147036982, "grad_norm": 0.7586895823478699, "learning_rate": 1.203029852963018e-06, "loss": 0.0639, "step": 59230 }, { "epoch": 8.798455369077677, "grad_norm": 0.7787258625030518, "learning_rate": 1.2015446309223231e-06, "loss": 0.0568, "step": 59240 }, { "epoch": 8.799940591118371, "grad_norm": 0.5634714365005493, "learning_rate": 1.200059408881628e-06, "loss": 0.0442, "step": 59250 }, { "epoch": 8.801425813159067, "grad_norm": 1.3035249710083008, "learning_rate": 1.1985741868409328e-06, "loss": 0.0604, "step": 59260 }, { "epoch": 8.802911035199763, "grad_norm": 0.8066299557685852, "learning_rate": 1.1970889648002377e-06, "loss": 0.0489, "step": 59270 }, { "epoch": 8.804396257240457, "grad_norm": 1.6491585969924927, "learning_rate": 1.1956037427595428e-06, "loss": 0.0658, "step": 59280 }, { "epoch": 8.805881479281153, "grad_norm": 2.1438181400299072, "learning_rate": 1.1941185207188476e-06, "loss": 0.0563, "step": 59290 }, { "epoch": 8.807366701321847, "grad_norm": 1.1060525178909302, "learning_rate": 1.1926332986781525e-06, "loss": 0.0585, "step": 59300 }, { "epoch": 8.808851923362543, "grad_norm": 0.823993444442749, "learning_rate": 1.1911480766374573e-06, "loss": 0.0388, "step": 59310 }, { "epoch": 8.810337145403238, "grad_norm": 0.8131610155105591, "learning_rate": 1.1896628545967622e-06, "loss": 0.0565, "step": 59320 }, { "epoch": 8.811822367443932, "grad_norm": 0.6244588494300842, "learning_rate": 1.1881776325560673e-06, "loss": 0.0575, "step": 59330 }, { "epoch": 8.813307589484628, "grad_norm": 0.7041961550712585, "learning_rate": 1.1866924105153721e-06, "loss": 0.063, "step": 59340 }, { "epoch": 8.814792811525322, "grad_norm": 0.21381068229675293, "learning_rate": 1.185207188474677e-06, "loss": 0.0309, "step": 59350 }, { "epoch": 8.816278033566018, "grad_norm": 0.7408416271209717, "learning_rate": 1.183721966433982e-06, "loss": 0.0605, "step": 59360 }, { "epoch": 8.817763255606714, "grad_norm": 0.9459415674209595, "learning_rate": 1.182236744393287e-06, "loss": 0.0734, "step": 59370 }, { "epoch": 8.819248477647408, "grad_norm": 0.6017314195632935, "learning_rate": 1.1807515223525918e-06, "loss": 0.0533, "step": 59380 }, { "epoch": 8.820733699688104, "grad_norm": 0.6662879586219788, "learning_rate": 1.1792663003118968e-06, "loss": 0.051, "step": 59390 }, { "epoch": 8.822218921728798, "grad_norm": 1.1942579746246338, "learning_rate": 1.1777810782712017e-06, "loss": 0.0568, "step": 59400 }, { "epoch": 8.823704143769493, "grad_norm": 1.188172459602356, "learning_rate": 1.1762958562305065e-06, "loss": 0.0681, "step": 59410 }, { "epoch": 8.82518936581019, "grad_norm": 0.9328430891036987, "learning_rate": 1.1748106341898114e-06, "loss": 0.0544, "step": 59420 }, { "epoch": 8.826674587850883, "grad_norm": 0.6567057371139526, "learning_rate": 1.1733254121491165e-06, "loss": 0.0605, "step": 59430 }, { "epoch": 8.828159809891579, "grad_norm": 0.9897271394729614, "learning_rate": 1.1718401901084213e-06, "loss": 0.0556, "step": 59440 }, { "epoch": 8.829645031932273, "grad_norm": 0.8121185898780823, "learning_rate": 1.1703549680677262e-06, "loss": 0.0681, "step": 59450 }, { "epoch": 8.831130253972969, "grad_norm": 0.4883832335472107, "learning_rate": 1.168869746027031e-06, "loss": 0.048, "step": 59460 }, { "epoch": 8.832615476013665, "grad_norm": 1.0136661529541016, "learning_rate": 1.167384523986336e-06, "loss": 0.0607, "step": 59470 }, { "epoch": 8.834100698054359, "grad_norm": 1.6294145584106445, "learning_rate": 1.165899301945641e-06, "loss": 0.059, "step": 59480 }, { "epoch": 8.835585920095054, "grad_norm": 1.445611834526062, "learning_rate": 1.1644140799049458e-06, "loss": 0.0488, "step": 59490 }, { "epoch": 8.837071142135748, "grad_norm": 0.6219216585159302, "learning_rate": 1.1629288578642509e-06, "loss": 0.0465, "step": 59500 }, { "epoch": 8.838556364176444, "grad_norm": 0.4000525176525116, "learning_rate": 1.1614436358235557e-06, "loss": 0.0475, "step": 59510 }, { "epoch": 8.84004158621714, "grad_norm": 0.29526957869529724, "learning_rate": 1.1599584137828606e-06, "loss": 0.0398, "step": 59520 }, { "epoch": 8.841526808257834, "grad_norm": 1.7476985454559326, "learning_rate": 1.1584731917421657e-06, "loss": 0.0522, "step": 59530 }, { "epoch": 8.84301203029853, "grad_norm": 1.4126195907592773, "learning_rate": 1.1569879697014705e-06, "loss": 0.0543, "step": 59540 }, { "epoch": 8.844497252339224, "grad_norm": 0.6883609890937805, "learning_rate": 1.1555027476607754e-06, "loss": 0.0585, "step": 59550 }, { "epoch": 8.84598247437992, "grad_norm": 1.0310041904449463, "learning_rate": 1.1540175256200802e-06, "loss": 0.0457, "step": 59560 }, { "epoch": 8.847467696420615, "grad_norm": 0.666269063949585, "learning_rate": 1.152532303579385e-06, "loss": 0.0494, "step": 59570 }, { "epoch": 8.84895291846131, "grad_norm": 0.570885956287384, "learning_rate": 1.1510470815386902e-06, "loss": 0.0514, "step": 59580 }, { "epoch": 8.850438140502005, "grad_norm": 2.130302906036377, "learning_rate": 1.149561859497995e-06, "loss": 0.0514, "step": 59590 }, { "epoch": 8.851923362542701, "grad_norm": 1.2145628929138184, "learning_rate": 1.1480766374572999e-06, "loss": 0.029, "step": 59600 }, { "epoch": 8.853408584583395, "grad_norm": 0.6939643621444702, "learning_rate": 1.1465914154166047e-06, "loss": 0.0472, "step": 59610 }, { "epoch": 8.85489380662409, "grad_norm": 0.7668987512588501, "learning_rate": 1.1451061933759098e-06, "loss": 0.0605, "step": 59620 }, { "epoch": 8.856379028664785, "grad_norm": 1.1189602613449097, "learning_rate": 1.1436209713352149e-06, "loss": 0.0534, "step": 59630 }, { "epoch": 8.85786425070548, "grad_norm": 1.8580517768859863, "learning_rate": 1.1421357492945197e-06, "loss": 0.0663, "step": 59640 }, { "epoch": 8.859349472746176, "grad_norm": 1.231301188468933, "learning_rate": 1.1406505272538246e-06, "loss": 0.0602, "step": 59650 }, { "epoch": 8.86083469478687, "grad_norm": 0.23550699651241302, "learning_rate": 1.1391653052131294e-06, "loss": 0.0786, "step": 59660 }, { "epoch": 8.862319916827566, "grad_norm": 1.3787282705307007, "learning_rate": 1.1376800831724343e-06, "loss": 0.0523, "step": 59670 }, { "epoch": 8.86380513886826, "grad_norm": 0.7296797037124634, "learning_rate": 1.1361948611317394e-06, "loss": 0.0664, "step": 59680 }, { "epoch": 8.865290360908956, "grad_norm": 1.0177968740463257, "learning_rate": 1.1347096390910442e-06, "loss": 0.0701, "step": 59690 }, { "epoch": 8.866775582949652, "grad_norm": 1.0494672060012817, "learning_rate": 1.133224417050349e-06, "loss": 0.0563, "step": 59700 }, { "epoch": 8.868260804990346, "grad_norm": 1.0491480827331543, "learning_rate": 1.131739195009654e-06, "loss": 0.0568, "step": 59710 }, { "epoch": 8.869746027031042, "grad_norm": 0.9075988531112671, "learning_rate": 1.130253972968959e-06, "loss": 0.0732, "step": 59720 }, { "epoch": 8.871231249071736, "grad_norm": 0.7524979114532471, "learning_rate": 1.1287687509282639e-06, "loss": 0.0502, "step": 59730 }, { "epoch": 8.872716471112431, "grad_norm": 0.7980123162269592, "learning_rate": 1.1272835288875687e-06, "loss": 0.056, "step": 59740 }, { "epoch": 8.874201693153127, "grad_norm": 0.6302353143692017, "learning_rate": 1.1257983068468736e-06, "loss": 0.0492, "step": 59750 }, { "epoch": 8.875686915193821, "grad_norm": 1.1649489402770996, "learning_rate": 1.1243130848061786e-06, "loss": 0.0349, "step": 59760 }, { "epoch": 8.877172137234517, "grad_norm": 0.41437533497810364, "learning_rate": 1.1228278627654835e-06, "loss": 0.0543, "step": 59770 }, { "epoch": 8.878657359275211, "grad_norm": 0.6834044456481934, "learning_rate": 1.1213426407247886e-06, "loss": 0.0621, "step": 59780 }, { "epoch": 8.880142581315907, "grad_norm": 1.1875417232513428, "learning_rate": 1.1198574186840934e-06, "loss": 0.0571, "step": 59790 }, { "epoch": 8.881627803356603, "grad_norm": 0.9354287981987, "learning_rate": 1.1183721966433983e-06, "loss": 0.0673, "step": 59800 }, { "epoch": 8.883113025397297, "grad_norm": 1.0599032640457153, "learning_rate": 1.1168869746027031e-06, "loss": 0.063, "step": 59810 }, { "epoch": 8.884598247437992, "grad_norm": 0.8172001838684082, "learning_rate": 1.1154017525620082e-06, "loss": 0.057, "step": 59820 }, { "epoch": 8.886083469478686, "grad_norm": 1.5262736082077026, "learning_rate": 1.113916530521313e-06, "loss": 0.0643, "step": 59830 }, { "epoch": 8.887568691519382, "grad_norm": 0.44762691855430603, "learning_rate": 1.112431308480618e-06, "loss": 0.0364, "step": 59840 }, { "epoch": 8.889053913560078, "grad_norm": 1.2637884616851807, "learning_rate": 1.1109460864399228e-06, "loss": 0.049, "step": 59850 }, { "epoch": 8.890539135600772, "grad_norm": 0.6419134140014648, "learning_rate": 1.1094608643992276e-06, "loss": 0.0492, "step": 59860 }, { "epoch": 8.892024357641468, "grad_norm": 0.6041998267173767, "learning_rate": 1.1079756423585327e-06, "loss": 0.0609, "step": 59870 }, { "epoch": 8.893509579682162, "grad_norm": 1.4558790922164917, "learning_rate": 1.1064904203178376e-06, "loss": 0.0583, "step": 59880 }, { "epoch": 8.894994801722858, "grad_norm": 1.1899456977844238, "learning_rate": 1.1050051982771426e-06, "loss": 0.0493, "step": 59890 }, { "epoch": 8.896480023763553, "grad_norm": 0.3095281720161438, "learning_rate": 1.1035199762364475e-06, "loss": 0.0688, "step": 59900 }, { "epoch": 8.897965245804247, "grad_norm": 0.9570375680923462, "learning_rate": 1.1020347541957523e-06, "loss": 0.0533, "step": 59910 }, { "epoch": 8.899450467844943, "grad_norm": 0.522426187992096, "learning_rate": 1.1005495321550572e-06, "loss": 0.0539, "step": 59920 }, { "epoch": 8.900935689885637, "grad_norm": 0.6601673364639282, "learning_rate": 1.0990643101143623e-06, "loss": 0.0697, "step": 59930 }, { "epoch": 8.902420911926333, "grad_norm": 0.7265911102294922, "learning_rate": 1.0975790880736671e-06, "loss": 0.0476, "step": 59940 }, { "epoch": 8.903906133967029, "grad_norm": 1.3074336051940918, "learning_rate": 1.096093866032972e-06, "loss": 0.0614, "step": 59950 }, { "epoch": 8.905391356007723, "grad_norm": 1.233241081237793, "learning_rate": 1.0946086439922768e-06, "loss": 0.0648, "step": 59960 }, { "epoch": 8.906876578048418, "grad_norm": 2.0665476322174072, "learning_rate": 1.093123421951582e-06, "loss": 0.0435, "step": 59970 }, { "epoch": 8.908361800089112, "grad_norm": 0.3625172972679138, "learning_rate": 1.0916381999108868e-06, "loss": 0.0539, "step": 59980 }, { "epoch": 8.909847022129808, "grad_norm": 0.36465543508529663, "learning_rate": 1.0901529778701916e-06, "loss": 0.049, "step": 59990 }, { "epoch": 8.911332244170504, "grad_norm": 0.697887122631073, "learning_rate": 1.0886677558294965e-06, "loss": 0.0605, "step": 60000 }, { "epoch": 8.912817466211198, "grad_norm": 0.8298418521881104, "learning_rate": 1.0871825337888016e-06, "loss": 0.0523, "step": 60010 }, { "epoch": 8.914302688251894, "grad_norm": 0.6717225313186646, "learning_rate": 1.0856973117481064e-06, "loss": 0.0622, "step": 60020 }, { "epoch": 8.91578791029259, "grad_norm": 0.8412166833877563, "learning_rate": 1.0842120897074115e-06, "loss": 0.0471, "step": 60030 }, { "epoch": 8.917273132333284, "grad_norm": 0.5965431928634644, "learning_rate": 1.0827268676667163e-06, "loss": 0.0323, "step": 60040 }, { "epoch": 8.91875835437398, "grad_norm": 0.7671550512313843, "learning_rate": 1.0812416456260212e-06, "loss": 0.0637, "step": 60050 }, { "epoch": 8.920243576414673, "grad_norm": 0.5707191228866577, "learning_rate": 1.079756423585326e-06, "loss": 0.037, "step": 60060 }, { "epoch": 8.92172879845537, "grad_norm": 0.5659679174423218, "learning_rate": 1.0782712015446311e-06, "loss": 0.0406, "step": 60070 }, { "epoch": 8.923214020496065, "grad_norm": 1.0556646585464478, "learning_rate": 1.076785979503936e-06, "loss": 0.0723, "step": 60080 }, { "epoch": 8.924699242536759, "grad_norm": 0.26674020290374756, "learning_rate": 1.0753007574632408e-06, "loss": 0.0499, "step": 60090 }, { "epoch": 8.926184464577455, "grad_norm": 0.8880712985992432, "learning_rate": 1.0738155354225457e-06, "loss": 0.0489, "step": 60100 }, { "epoch": 8.927669686618149, "grad_norm": 0.4370151162147522, "learning_rate": 1.0723303133818505e-06, "loss": 0.0658, "step": 60110 }, { "epoch": 8.929154908658845, "grad_norm": 1.0058577060699463, "learning_rate": 1.0708450913411556e-06, "loss": 0.0529, "step": 60120 }, { "epoch": 8.93064013069954, "grad_norm": 0.8313179612159729, "learning_rate": 1.0693598693004605e-06, "loss": 0.034, "step": 60130 }, { "epoch": 8.932125352740234, "grad_norm": 1.0790013074874878, "learning_rate": 1.0678746472597653e-06, "loss": 0.0519, "step": 60140 }, { "epoch": 8.93361057478093, "grad_norm": 1.860836148262024, "learning_rate": 1.0663894252190704e-06, "loss": 0.0501, "step": 60150 }, { "epoch": 8.935095796821624, "grad_norm": 0.9269645810127258, "learning_rate": 1.0649042031783753e-06, "loss": 0.0588, "step": 60160 }, { "epoch": 8.93658101886232, "grad_norm": 1.4157025814056396, "learning_rate": 1.0634189811376803e-06, "loss": 0.0745, "step": 60170 }, { "epoch": 8.938066240903016, "grad_norm": 1.012364149093628, "learning_rate": 1.0619337590969852e-06, "loss": 0.0586, "step": 60180 }, { "epoch": 8.93955146294371, "grad_norm": 1.1754486560821533, "learning_rate": 1.06044853705629e-06, "loss": 0.0525, "step": 60190 }, { "epoch": 8.941036684984406, "grad_norm": 0.8056031465530396, "learning_rate": 1.0589633150155949e-06, "loss": 0.058, "step": 60200 }, { "epoch": 8.9425219070251, "grad_norm": 1.1152647733688354, "learning_rate": 1.0574780929748997e-06, "loss": 0.0432, "step": 60210 }, { "epoch": 8.944007129065795, "grad_norm": 2.0299875736236572, "learning_rate": 1.0559928709342048e-06, "loss": 0.0505, "step": 60220 }, { "epoch": 8.945492351106491, "grad_norm": 0.5199902653694153, "learning_rate": 1.0545076488935097e-06, "loss": 0.0602, "step": 60230 }, { "epoch": 8.946977573147185, "grad_norm": 1.0558547973632812, "learning_rate": 1.0530224268528145e-06, "loss": 0.0543, "step": 60240 }, { "epoch": 8.948462795187881, "grad_norm": 2.096571922302246, "learning_rate": 1.0515372048121194e-06, "loss": 0.0499, "step": 60250 }, { "epoch": 8.949948017228575, "grad_norm": 0.9488021731376648, "learning_rate": 1.0500519827714245e-06, "loss": 0.0703, "step": 60260 }, { "epoch": 8.95143323926927, "grad_norm": 0.6574435830116272, "learning_rate": 1.0485667607307293e-06, "loss": 0.0531, "step": 60270 }, { "epoch": 8.952918461309967, "grad_norm": 0.9170523881912231, "learning_rate": 1.0470815386900342e-06, "loss": 0.0556, "step": 60280 }, { "epoch": 8.95440368335066, "grad_norm": 1.5840109586715698, "learning_rate": 1.0455963166493392e-06, "loss": 0.0468, "step": 60290 }, { "epoch": 8.955888905391356, "grad_norm": 0.9436348676681519, "learning_rate": 1.044111094608644e-06, "loss": 0.061, "step": 60300 }, { "epoch": 8.95737412743205, "grad_norm": 1.0092281103134155, "learning_rate": 1.042625872567949e-06, "loss": 0.0596, "step": 60310 }, { "epoch": 8.958859349472746, "grad_norm": 0.7205719351768494, "learning_rate": 1.041140650527254e-06, "loss": 0.0481, "step": 60320 }, { "epoch": 8.960344571513442, "grad_norm": 0.3967041075229645, "learning_rate": 1.0396554284865589e-06, "loss": 0.0539, "step": 60330 }, { "epoch": 8.961829793554136, "grad_norm": 1.255529522895813, "learning_rate": 1.0381702064458637e-06, "loss": 0.0441, "step": 60340 }, { "epoch": 8.963315015594832, "grad_norm": 1.2569066286087036, "learning_rate": 1.0366849844051686e-06, "loss": 0.0587, "step": 60350 }, { "epoch": 8.964800237635526, "grad_norm": 0.9882292151451111, "learning_rate": 1.0351997623644737e-06, "loss": 0.0442, "step": 60360 }, { "epoch": 8.966285459676222, "grad_norm": 0.5611401796340942, "learning_rate": 1.0337145403237785e-06, "loss": 0.0463, "step": 60370 }, { "epoch": 8.967770681716917, "grad_norm": 1.5252411365509033, "learning_rate": 1.0322293182830834e-06, "loss": 0.0382, "step": 60380 }, { "epoch": 8.969255903757611, "grad_norm": 1.0554333925247192, "learning_rate": 1.0307440962423882e-06, "loss": 0.0518, "step": 60390 }, { "epoch": 8.970741125798307, "grad_norm": 0.7427452802658081, "learning_rate": 1.029258874201693e-06, "loss": 0.0524, "step": 60400 }, { "epoch": 8.972226347839001, "grad_norm": 1.093377709388733, "learning_rate": 1.0277736521609982e-06, "loss": 0.0497, "step": 60410 }, { "epoch": 8.973711569879697, "grad_norm": 0.6824570298194885, "learning_rate": 1.0262884301203032e-06, "loss": 0.0416, "step": 60420 }, { "epoch": 8.975196791920393, "grad_norm": 0.846742570400238, "learning_rate": 1.024803208079608e-06, "loss": 0.0551, "step": 60430 }, { "epoch": 8.976682013961087, "grad_norm": 1.6127779483795166, "learning_rate": 1.023317986038913e-06, "loss": 0.0429, "step": 60440 }, { "epoch": 8.978167236001783, "grad_norm": 1.100595235824585, "learning_rate": 1.0218327639982178e-06, "loss": 0.0481, "step": 60450 }, { "epoch": 8.979652458042477, "grad_norm": 1.0692342519760132, "learning_rate": 1.0203475419575227e-06, "loss": 0.0514, "step": 60460 }, { "epoch": 8.981137680083172, "grad_norm": 1.2287728786468506, "learning_rate": 1.0188623199168277e-06, "loss": 0.055, "step": 60470 }, { "epoch": 8.982622902123868, "grad_norm": 0.44107210636138916, "learning_rate": 1.0173770978761326e-06, "loss": 0.0465, "step": 60480 }, { "epoch": 8.984108124164562, "grad_norm": 0.5867712497711182, "learning_rate": 1.0158918758354374e-06, "loss": 0.0474, "step": 60490 }, { "epoch": 8.985593346205258, "grad_norm": 0.22786487638950348, "learning_rate": 1.0144066537947423e-06, "loss": 0.0479, "step": 60500 }, { "epoch": 8.987078568245952, "grad_norm": 0.6541853547096252, "learning_rate": 1.0129214317540474e-06, "loss": 0.049, "step": 60510 }, { "epoch": 8.988563790286648, "grad_norm": 0.5077256560325623, "learning_rate": 1.0114362097133522e-06, "loss": 0.0345, "step": 60520 }, { "epoch": 8.990049012327344, "grad_norm": 0.905841588973999, "learning_rate": 1.009950987672657e-06, "loss": 0.0754, "step": 60530 }, { "epoch": 8.991534234368038, "grad_norm": 1.5534299612045288, "learning_rate": 1.008465765631962e-06, "loss": 0.0417, "step": 60540 }, { "epoch": 8.993019456408733, "grad_norm": 0.6061202883720398, "learning_rate": 1.006980543591267e-06, "loss": 0.0545, "step": 60550 }, { "epoch": 8.994504678449427, "grad_norm": 0.6618340015411377, "learning_rate": 1.0054953215505719e-06, "loss": 0.0508, "step": 60560 }, { "epoch": 8.995989900490123, "grad_norm": 0.9549134373664856, "learning_rate": 1.004010099509877e-06, "loss": 0.0739, "step": 60570 }, { "epoch": 8.997475122530819, "grad_norm": 0.991654634475708, "learning_rate": 1.0025248774691818e-06, "loss": 0.0385, "step": 60580 }, { "epoch": 8.998960344571513, "grad_norm": 0.44757747650146484, "learning_rate": 1.0010396554284866e-06, "loss": 0.048, "step": 60590 }, { "epoch": 9.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.05483611300587654, "eval_runtime": 203.9869, "eval_samples_per_second": 186.38, "eval_steps_per_second": 5.829, "step": 60597 }, { "epoch": 9.000445566612209, "grad_norm": 0.9724392890930176, "learning_rate": 9.995544333877915e-07, "loss": 0.0761, "step": 60600 }, { "epoch": 9.001930788652903, "grad_norm": 0.6741613149642944, "learning_rate": 9.980692113470966e-07, "loss": 0.0625, "step": 60610 }, { "epoch": 9.003416010693599, "grad_norm": 1.055646538734436, "learning_rate": 9.965839893064014e-07, "loss": 0.0598, "step": 60620 }, { "epoch": 9.004901232734294, "grad_norm": 0.49605226516723633, "learning_rate": 9.950987672657063e-07, "loss": 0.0472, "step": 60630 }, { "epoch": 9.006386454774988, "grad_norm": 0.8366076350212097, "learning_rate": 9.936135452250111e-07, "loss": 0.0445, "step": 60640 }, { "epoch": 9.007871676815684, "grad_norm": 0.8322066068649292, "learning_rate": 9.92128323184316e-07, "loss": 0.0496, "step": 60650 }, { "epoch": 9.00935689885638, "grad_norm": 0.7049952149391174, "learning_rate": 9.90643101143621e-07, "loss": 0.0509, "step": 60660 }, { "epoch": 9.010842120897074, "grad_norm": 0.583831250667572, "learning_rate": 9.89157879102926e-07, "loss": 0.0515, "step": 60670 }, { "epoch": 9.01232734293777, "grad_norm": 1.2372843027114868, "learning_rate": 9.87672657062231e-07, "loss": 0.0589, "step": 60680 }, { "epoch": 9.013812564978464, "grad_norm": 0.8074018955230713, "learning_rate": 9.861874350215358e-07, "loss": 0.0573, "step": 60690 }, { "epoch": 9.01529778701916, "grad_norm": 0.665175199508667, "learning_rate": 9.847022129808407e-07, "loss": 0.0388, "step": 60700 }, { "epoch": 9.016783009059855, "grad_norm": 1.4056196212768555, "learning_rate": 9.832169909401458e-07, "loss": 0.0455, "step": 60710 }, { "epoch": 9.01826823110055, "grad_norm": 0.739521324634552, "learning_rate": 9.817317688994506e-07, "loss": 0.0792, "step": 60720 }, { "epoch": 9.019753453141245, "grad_norm": 1.6123601198196411, "learning_rate": 9.802465468587555e-07, "loss": 0.0504, "step": 60730 }, { "epoch": 9.02123867518194, "grad_norm": 0.39929506182670593, "learning_rate": 9.787613248180603e-07, "loss": 0.0435, "step": 60740 }, { "epoch": 9.022723897222635, "grad_norm": 0.8194249272346497, "learning_rate": 9.772761027773652e-07, "loss": 0.0464, "step": 60750 }, { "epoch": 9.02420911926333, "grad_norm": 0.744668185710907, "learning_rate": 9.757908807366703e-07, "loss": 0.0542, "step": 60760 }, { "epoch": 9.025694341304025, "grad_norm": 0.6779729723930359, "learning_rate": 9.743056586959751e-07, "loss": 0.0612, "step": 60770 }, { "epoch": 9.02717956334472, "grad_norm": 1.2432478666305542, "learning_rate": 9.7282043665528e-07, "loss": 0.0477, "step": 60780 }, { "epoch": 9.028664785385415, "grad_norm": 1.8497051000595093, "learning_rate": 9.713352146145848e-07, "loss": 0.0598, "step": 60790 }, { "epoch": 9.03015000742611, "grad_norm": 1.5731254816055298, "learning_rate": 9.6984999257389e-07, "loss": 0.046, "step": 60800 }, { "epoch": 9.031635229466806, "grad_norm": 0.7579479217529297, "learning_rate": 9.683647705331948e-07, "loss": 0.0416, "step": 60810 }, { "epoch": 9.0331204515075, "grad_norm": 0.39340782165527344, "learning_rate": 9.668795484924998e-07, "loss": 0.0442, "step": 60820 }, { "epoch": 9.034605673548196, "grad_norm": 0.7787233591079712, "learning_rate": 9.653943264518047e-07, "loss": 0.0546, "step": 60830 }, { "epoch": 9.03609089558889, "grad_norm": 1.0975919961929321, "learning_rate": 9.639091044111095e-07, "loss": 0.0407, "step": 60840 }, { "epoch": 9.037576117629586, "grad_norm": 0.8572696447372437, "learning_rate": 9.624238823704144e-07, "loss": 0.0609, "step": 60850 }, { "epoch": 9.039061339670281, "grad_norm": 0.8922178745269775, "learning_rate": 9.609386603297195e-07, "loss": 0.0568, "step": 60860 }, { "epoch": 9.040546561710975, "grad_norm": 1.2011473178863525, "learning_rate": 9.594534382890243e-07, "loss": 0.0614, "step": 60870 }, { "epoch": 9.042031783751671, "grad_norm": 0.6790103316307068, "learning_rate": 9.579682162483292e-07, "loss": 0.0514, "step": 60880 }, { "epoch": 9.043517005792365, "grad_norm": 0.8033337593078613, "learning_rate": 9.56482994207634e-07, "loss": 0.065, "step": 60890 }, { "epoch": 9.045002227833061, "grad_norm": 0.672807514667511, "learning_rate": 9.549977721669391e-07, "loss": 0.0435, "step": 60900 }, { "epoch": 9.046487449873757, "grad_norm": 1.7294105291366577, "learning_rate": 9.535125501262439e-07, "loss": 0.0599, "step": 60910 }, { "epoch": 9.04797267191445, "grad_norm": 0.6789121031761169, "learning_rate": 9.520273280855488e-07, "loss": 0.0472, "step": 60920 }, { "epoch": 9.049457893955147, "grad_norm": 1.1161093711853027, "learning_rate": 9.505421060448537e-07, "loss": 0.054, "step": 60930 }, { "epoch": 9.05094311599584, "grad_norm": 0.7191179394721985, "learning_rate": 9.490568840041587e-07, "loss": 0.0675, "step": 60940 }, { "epoch": 9.052428338036536, "grad_norm": 1.8499068021774292, "learning_rate": 9.475716619634637e-07, "loss": 0.0633, "step": 60950 }, { "epoch": 9.053913560077232, "grad_norm": 0.590984582901001, "learning_rate": 9.460864399227686e-07, "loss": 0.0538, "step": 60960 }, { "epoch": 9.055398782117926, "grad_norm": 0.873106837272644, "learning_rate": 9.446012178820735e-07, "loss": 0.0653, "step": 60970 }, { "epoch": 9.056884004158622, "grad_norm": 1.1326630115509033, "learning_rate": 9.431159958413784e-07, "loss": 0.0481, "step": 60980 }, { "epoch": 9.058369226199316, "grad_norm": 1.133381724357605, "learning_rate": 9.416307738006832e-07, "loss": 0.0438, "step": 60990 }, { "epoch": 9.059854448240012, "grad_norm": 0.30731022357940674, "learning_rate": 9.401455517599882e-07, "loss": 0.0387, "step": 61000 }, { "epoch": 9.061339670280708, "grad_norm": 0.6071634888648987, "learning_rate": 9.386603297192931e-07, "loss": 0.0587, "step": 61010 }, { "epoch": 9.062824892321402, "grad_norm": 0.34380868077278137, "learning_rate": 9.37175107678598e-07, "loss": 0.0471, "step": 61020 }, { "epoch": 9.064310114362097, "grad_norm": 1.20027494430542, "learning_rate": 9.356898856379029e-07, "loss": 0.0346, "step": 61030 }, { "epoch": 9.065795336402791, "grad_norm": 1.0148383378982544, "learning_rate": 9.342046635972078e-07, "loss": 0.056, "step": 61040 }, { "epoch": 9.067280558443487, "grad_norm": 1.2376158237457275, "learning_rate": 9.327194415565127e-07, "loss": 0.0447, "step": 61050 }, { "epoch": 9.068765780484183, "grad_norm": 0.8556867837905884, "learning_rate": 9.312342195158177e-07, "loss": 0.0609, "step": 61060 }, { "epoch": 9.070251002524877, "grad_norm": 1.4480454921722412, "learning_rate": 9.297489974751225e-07, "loss": 0.0663, "step": 61070 }, { "epoch": 9.071736224565573, "grad_norm": 0.8202183246612549, "learning_rate": 9.282637754344276e-07, "loss": 0.0569, "step": 61080 }, { "epoch": 9.073221446606267, "grad_norm": 1.4094607830047607, "learning_rate": 9.267785533937324e-07, "loss": 0.0552, "step": 61090 }, { "epoch": 9.074706668646963, "grad_norm": 0.8718863725662231, "learning_rate": 9.252933313530374e-07, "loss": 0.0544, "step": 61100 }, { "epoch": 9.076191890687658, "grad_norm": 0.912745475769043, "learning_rate": 9.238081093123423e-07, "loss": 0.056, "step": 61110 }, { "epoch": 9.077677112728352, "grad_norm": 0.773205578327179, "learning_rate": 9.223228872716472e-07, "loss": 0.0547, "step": 61120 }, { "epoch": 9.079162334769048, "grad_norm": 0.6849109530448914, "learning_rate": 9.208376652309521e-07, "loss": 0.0501, "step": 61130 }, { "epoch": 9.080647556809742, "grad_norm": 1.008122444152832, "learning_rate": 9.19352443190257e-07, "loss": 0.0651, "step": 61140 }, { "epoch": 9.082132778850438, "grad_norm": 0.9452615976333618, "learning_rate": 9.178672211495619e-07, "loss": 0.0542, "step": 61150 }, { "epoch": 9.083618000891134, "grad_norm": 0.38566067814826965, "learning_rate": 9.163819991088669e-07, "loss": 0.048, "step": 61160 }, { "epoch": 9.085103222931828, "grad_norm": 0.8904908895492554, "learning_rate": 9.148967770681717e-07, "loss": 0.049, "step": 61170 }, { "epoch": 9.086588444972524, "grad_norm": 0.8872094750404358, "learning_rate": 9.134115550274766e-07, "loss": 0.0484, "step": 61180 }, { "epoch": 9.088073667013218, "grad_norm": 1.5284932851791382, "learning_rate": 9.119263329867815e-07, "loss": 0.0275, "step": 61190 }, { "epoch": 9.089558889053913, "grad_norm": 1.5623444318771362, "learning_rate": 9.104411109460864e-07, "loss": 0.0565, "step": 61200 }, { "epoch": 9.09104411109461, "grad_norm": 0.670907199382782, "learning_rate": 9.089558889053915e-07, "loss": 0.0548, "step": 61210 }, { "epoch": 9.092529333135303, "grad_norm": 1.4543319940567017, "learning_rate": 9.074706668646964e-07, "loss": 0.0524, "step": 61220 }, { "epoch": 9.094014555175999, "grad_norm": 0.6262223720550537, "learning_rate": 9.059854448240013e-07, "loss": 0.0519, "step": 61230 }, { "epoch": 9.095499777216695, "grad_norm": 0.23713155090808868, "learning_rate": 9.045002227833063e-07, "loss": 0.0397, "step": 61240 }, { "epoch": 9.096984999257389, "grad_norm": 0.33859917521476746, "learning_rate": 9.030150007426111e-07, "loss": 0.0588, "step": 61250 }, { "epoch": 9.098470221298085, "grad_norm": 0.9685537815093994, "learning_rate": 9.01529778701916e-07, "loss": 0.0479, "step": 61260 }, { "epoch": 9.099955443338779, "grad_norm": 0.892279326915741, "learning_rate": 9.000445566612209e-07, "loss": 0.0421, "step": 61270 }, { "epoch": 9.101440665379474, "grad_norm": 0.8080083727836609, "learning_rate": 8.985593346205258e-07, "loss": 0.0632, "step": 61280 }, { "epoch": 9.10292588742017, "grad_norm": 0.9495354294776917, "learning_rate": 8.970741125798308e-07, "loss": 0.0561, "step": 61290 }, { "epoch": 9.104411109460864, "grad_norm": 0.7430086135864258, "learning_rate": 8.955888905391356e-07, "loss": 0.0803, "step": 61300 }, { "epoch": 9.10589633150156, "grad_norm": 1.0035367012023926, "learning_rate": 8.941036684984406e-07, "loss": 0.0485, "step": 61310 }, { "epoch": 9.107381553542254, "grad_norm": 1.3069180250167847, "learning_rate": 8.926184464577454e-07, "loss": 0.0487, "step": 61320 }, { "epoch": 9.10886677558295, "grad_norm": 0.8913138508796692, "learning_rate": 8.911332244170504e-07, "loss": 0.0553, "step": 61330 }, { "epoch": 9.110351997623646, "grad_norm": 0.29481425881385803, "learning_rate": 8.896480023763554e-07, "loss": 0.0586, "step": 61340 }, { "epoch": 9.11183721966434, "grad_norm": 1.036330223083496, "learning_rate": 8.881627803356603e-07, "loss": 0.0396, "step": 61350 }, { "epoch": 9.113322441705035, "grad_norm": 0.602928638458252, "learning_rate": 8.866775582949652e-07, "loss": 0.0696, "step": 61360 }, { "epoch": 9.11480766374573, "grad_norm": 1.1184871196746826, "learning_rate": 8.851923362542701e-07, "loss": 0.0451, "step": 61370 }, { "epoch": 9.116292885786425, "grad_norm": 1.7323182821273804, "learning_rate": 8.83707114213575e-07, "loss": 0.0739, "step": 61380 }, { "epoch": 9.117778107827121, "grad_norm": 1.0334404706954956, "learning_rate": 8.8222189217288e-07, "loss": 0.0581, "step": 61390 }, { "epoch": 9.119263329867815, "grad_norm": 0.5090399384498596, "learning_rate": 8.807366701321848e-07, "loss": 0.055, "step": 61400 }, { "epoch": 9.12074855190851, "grad_norm": 1.2942936420440674, "learning_rate": 8.792514480914898e-07, "loss": 0.0644, "step": 61410 }, { "epoch": 9.122233773949205, "grad_norm": 1.3819553852081299, "learning_rate": 8.777662260507946e-07, "loss": 0.0666, "step": 61420 }, { "epoch": 9.1237189959899, "grad_norm": 0.6027370691299438, "learning_rate": 8.762810040100996e-07, "loss": 0.0439, "step": 61430 }, { "epoch": 9.125204218030596, "grad_norm": 1.0936373472213745, "learning_rate": 8.747957819694045e-07, "loss": 0.0503, "step": 61440 }, { "epoch": 9.12668944007129, "grad_norm": 0.3803895115852356, "learning_rate": 8.733105599287093e-07, "loss": 0.0654, "step": 61450 }, { "epoch": 9.128174662111986, "grad_norm": 0.4349137544631958, "learning_rate": 8.718253378880143e-07, "loss": 0.0542, "step": 61460 }, { "epoch": 9.12965988415268, "grad_norm": 0.6400302648544312, "learning_rate": 8.703401158473193e-07, "loss": 0.0622, "step": 61470 }, { "epoch": 9.131145106193376, "grad_norm": 1.309779405593872, "learning_rate": 8.688548938066242e-07, "loss": 0.0314, "step": 61480 }, { "epoch": 9.132630328234072, "grad_norm": 1.1691462993621826, "learning_rate": 8.673696717659292e-07, "loss": 0.048, "step": 61490 }, { "epoch": 9.134115550274766, "grad_norm": 0.7045255899429321, "learning_rate": 8.65884449725234e-07, "loss": 0.0476, "step": 61500 }, { "epoch": 9.135600772315462, "grad_norm": 1.3898378610610962, "learning_rate": 8.64399227684539e-07, "loss": 0.0486, "step": 61510 }, { "epoch": 9.137085994356156, "grad_norm": 0.2608380913734436, "learning_rate": 8.629140056438438e-07, "loss": 0.0476, "step": 61520 }, { "epoch": 9.138571216396851, "grad_norm": 0.6374778747558594, "learning_rate": 8.614287836031487e-07, "loss": 0.0691, "step": 61530 }, { "epoch": 9.140056438437547, "grad_norm": 1.2475641965866089, "learning_rate": 8.599435615624537e-07, "loss": 0.0589, "step": 61540 }, { "epoch": 9.141541660478241, "grad_norm": 0.8836926221847534, "learning_rate": 8.584583395217585e-07, "loss": 0.0601, "step": 61550 }, { "epoch": 9.143026882518937, "grad_norm": 0.34879186749458313, "learning_rate": 8.569731174810635e-07, "loss": 0.0639, "step": 61560 }, { "epoch": 9.144512104559631, "grad_norm": 0.4875172972679138, "learning_rate": 8.554878954403683e-07, "loss": 0.072, "step": 61570 }, { "epoch": 9.145997326600327, "grad_norm": 0.8644539713859558, "learning_rate": 8.540026733996733e-07, "loss": 0.0426, "step": 61580 }, { "epoch": 9.147482548641023, "grad_norm": 1.0071200132369995, "learning_rate": 8.525174513589782e-07, "loss": 0.0314, "step": 61590 }, { "epoch": 9.148967770681717, "grad_norm": 0.5411166548728943, "learning_rate": 8.510322293182832e-07, "loss": 0.0786, "step": 61600 }, { "epoch": 9.150452992722412, "grad_norm": 0.6687445044517517, "learning_rate": 8.495470072775881e-07, "loss": 0.0649, "step": 61610 }, { "epoch": 9.151938214763106, "grad_norm": 0.7308300733566284, "learning_rate": 8.48061785236893e-07, "loss": 0.0378, "step": 61620 }, { "epoch": 9.153423436803802, "grad_norm": 1.4409397840499878, "learning_rate": 8.465765631961979e-07, "loss": 0.0515, "step": 61630 }, { "epoch": 9.154908658844498, "grad_norm": 0.8028169274330139, "learning_rate": 8.450913411555029e-07, "loss": 0.0584, "step": 61640 }, { "epoch": 9.156393880885192, "grad_norm": 0.8186599612236023, "learning_rate": 8.436061191148077e-07, "loss": 0.0764, "step": 61650 }, { "epoch": 9.157879102925888, "grad_norm": 0.739186704158783, "learning_rate": 8.421208970741127e-07, "loss": 0.0477, "step": 61660 }, { "epoch": 9.159364324966582, "grad_norm": 0.792286217212677, "learning_rate": 8.406356750334175e-07, "loss": 0.0697, "step": 61670 }, { "epoch": 9.160849547007277, "grad_norm": 1.066676378250122, "learning_rate": 8.391504529927225e-07, "loss": 0.063, "step": 61680 }, { "epoch": 9.162334769047973, "grad_norm": 1.1812419891357422, "learning_rate": 8.376652309520274e-07, "loss": 0.0553, "step": 61690 }, { "epoch": 9.163819991088667, "grad_norm": 1.9885129928588867, "learning_rate": 8.361800089113323e-07, "loss": 0.0578, "step": 61700 }, { "epoch": 9.165305213129363, "grad_norm": 1.2007282972335815, "learning_rate": 8.346947868706372e-07, "loss": 0.0638, "step": 61710 }, { "epoch": 9.166790435170057, "grad_norm": 1.5052951574325562, "learning_rate": 8.33209564829942e-07, "loss": 0.0566, "step": 61720 }, { "epoch": 9.168275657210753, "grad_norm": 0.37552034854888916, "learning_rate": 8.31724342789247e-07, "loss": 0.0549, "step": 61730 }, { "epoch": 9.169760879251449, "grad_norm": 0.8721835017204285, "learning_rate": 8.302391207485521e-07, "loss": 0.05, "step": 61740 }, { "epoch": 9.171246101292143, "grad_norm": 1.0215429067611694, "learning_rate": 8.287538987078569e-07, "loss": 0.0519, "step": 61750 }, { "epoch": 9.172731323332838, "grad_norm": 1.0494624376296997, "learning_rate": 8.272686766671619e-07, "loss": 0.0458, "step": 61760 }, { "epoch": 9.174216545373532, "grad_norm": 1.1894280910491943, "learning_rate": 8.257834546264667e-07, "loss": 0.0545, "step": 61770 }, { "epoch": 9.175701767414228, "grad_norm": 1.1743789911270142, "learning_rate": 8.242982325857717e-07, "loss": 0.0541, "step": 61780 }, { "epoch": 9.177186989454924, "grad_norm": 1.3520103693008423, "learning_rate": 8.228130105450766e-07, "loss": 0.0678, "step": 61790 }, { "epoch": 9.178672211495618, "grad_norm": 1.1151225566864014, "learning_rate": 8.213277885043814e-07, "loss": 0.0586, "step": 61800 }, { "epoch": 9.180157433536314, "grad_norm": 0.4770192503929138, "learning_rate": 8.198425664636864e-07, "loss": 0.0608, "step": 61810 }, { "epoch": 9.181642655577008, "grad_norm": 0.485674649477005, "learning_rate": 8.183573444229912e-07, "loss": 0.0582, "step": 61820 }, { "epoch": 9.183127877617704, "grad_norm": 0.518302321434021, "learning_rate": 8.168721223822962e-07, "loss": 0.0581, "step": 61830 }, { "epoch": 9.1846130996584, "grad_norm": 1.0120786428451538, "learning_rate": 8.153869003416011e-07, "loss": 0.0708, "step": 61840 }, { "epoch": 9.186098321699093, "grad_norm": 1.759045124053955, "learning_rate": 8.13901678300906e-07, "loss": 0.0487, "step": 61850 }, { "epoch": 9.18758354373979, "grad_norm": 0.931167721748352, "learning_rate": 8.124164562602109e-07, "loss": 0.0492, "step": 61860 }, { "epoch": 9.189068765780485, "grad_norm": 0.9127204418182373, "learning_rate": 8.109312342195159e-07, "loss": 0.074, "step": 61870 }, { "epoch": 9.190553987821179, "grad_norm": 0.6282828450202942, "learning_rate": 8.094460121788208e-07, "loss": 0.0523, "step": 61880 }, { "epoch": 9.192039209861875, "grad_norm": 1.20229172706604, "learning_rate": 8.079607901381258e-07, "loss": 0.0407, "step": 61890 }, { "epoch": 9.193524431902569, "grad_norm": 0.8881356120109558, "learning_rate": 8.064755680974306e-07, "loss": 0.0566, "step": 61900 }, { "epoch": 9.195009653943265, "grad_norm": 0.7234607338905334, "learning_rate": 8.049903460567356e-07, "loss": 0.0581, "step": 61910 }, { "epoch": 9.19649487598396, "grad_norm": 0.9576908349990845, "learning_rate": 8.035051240160404e-07, "loss": 0.0472, "step": 61920 }, { "epoch": 9.197980098024654, "grad_norm": 1.0542101860046387, "learning_rate": 8.020199019753454e-07, "loss": 0.0567, "step": 61930 }, { "epoch": 9.19946532006535, "grad_norm": 0.6845017075538635, "learning_rate": 8.005346799346503e-07, "loss": 0.0388, "step": 61940 }, { "epoch": 9.200950542106044, "grad_norm": 0.4817188084125519, "learning_rate": 7.990494578939552e-07, "loss": 0.0592, "step": 61950 }, { "epoch": 9.20243576414674, "grad_norm": 0.7306998372077942, "learning_rate": 7.975642358532601e-07, "loss": 0.0583, "step": 61960 }, { "epoch": 9.203920986187436, "grad_norm": 1.0176167488098145, "learning_rate": 7.96079013812565e-07, "loss": 0.0576, "step": 61970 }, { "epoch": 9.20540620822813, "grad_norm": 0.6478152275085449, "learning_rate": 7.945937917718699e-07, "loss": 0.06, "step": 61980 }, { "epoch": 9.206891430268826, "grad_norm": 0.7212924957275391, "learning_rate": 7.931085697311748e-07, "loss": 0.0445, "step": 61990 }, { "epoch": 9.20837665230952, "grad_norm": 0.6695299744606018, "learning_rate": 7.916233476904798e-07, "loss": 0.0498, "step": 62000 }, { "epoch": 9.209861874350215, "grad_norm": 0.9033427834510803, "learning_rate": 7.901381256497848e-07, "loss": 0.06, "step": 62010 }, { "epoch": 9.211347096390911, "grad_norm": 1.4528840780258179, "learning_rate": 7.886529036090896e-07, "loss": 0.0541, "step": 62020 }, { "epoch": 9.212832318431605, "grad_norm": 1.2177852392196655, "learning_rate": 7.871676815683946e-07, "loss": 0.054, "step": 62030 }, { "epoch": 9.214317540472301, "grad_norm": 0.617085337638855, "learning_rate": 7.856824595276995e-07, "loss": 0.0569, "step": 62040 }, { "epoch": 9.215802762512995, "grad_norm": 1.0635652542114258, "learning_rate": 7.841972374870044e-07, "loss": 0.0553, "step": 62050 }, { "epoch": 9.21728798455369, "grad_norm": 0.704115629196167, "learning_rate": 7.827120154463093e-07, "loss": 0.0596, "step": 62060 }, { "epoch": 9.218773206594387, "grad_norm": 0.673342764377594, "learning_rate": 7.812267934056141e-07, "loss": 0.0514, "step": 62070 }, { "epoch": 9.22025842863508, "grad_norm": 0.7802639007568359, "learning_rate": 7.797415713649191e-07, "loss": 0.0634, "step": 62080 }, { "epoch": 9.221743650675776, "grad_norm": 1.1733187437057495, "learning_rate": 7.78256349324224e-07, "loss": 0.0624, "step": 62090 }, { "epoch": 9.22322887271647, "grad_norm": 0.6247046589851379, "learning_rate": 7.767711272835289e-07, "loss": 0.0596, "step": 62100 }, { "epoch": 9.224714094757166, "grad_norm": 0.615653395652771, "learning_rate": 7.752859052428338e-07, "loss": 0.0402, "step": 62110 }, { "epoch": 9.226199316797862, "grad_norm": 0.35232970118522644, "learning_rate": 7.738006832021387e-07, "loss": 0.0593, "step": 62120 }, { "epoch": 9.227684538838556, "grad_norm": 2.261904716491699, "learning_rate": 7.723154611614438e-07, "loss": 0.0732, "step": 62130 }, { "epoch": 9.229169760879252, "grad_norm": 1.1600795984268188, "learning_rate": 7.708302391207487e-07, "loss": 0.0482, "step": 62140 }, { "epoch": 9.230654982919946, "grad_norm": 0.5957081317901611, "learning_rate": 7.693450170800535e-07, "loss": 0.0717, "step": 62150 }, { "epoch": 9.232140204960642, "grad_norm": 0.5073825120925903, "learning_rate": 7.678597950393585e-07, "loss": 0.0406, "step": 62160 }, { "epoch": 9.233625427001337, "grad_norm": 0.498580664396286, "learning_rate": 7.663745729986633e-07, "loss": 0.0525, "step": 62170 }, { "epoch": 9.235110649042031, "grad_norm": 0.4409525990486145, "learning_rate": 7.648893509579683e-07, "loss": 0.0569, "step": 62180 }, { "epoch": 9.236595871082727, "grad_norm": 0.8985182642936707, "learning_rate": 7.634041289172732e-07, "loss": 0.0497, "step": 62190 }, { "epoch": 9.238081093123421, "grad_norm": 0.9006769061088562, "learning_rate": 7.619189068765781e-07, "loss": 0.0554, "step": 62200 }, { "epoch": 9.239566315164117, "grad_norm": 0.323944091796875, "learning_rate": 7.60433684835883e-07, "loss": 0.068, "step": 62210 }, { "epoch": 9.241051537204813, "grad_norm": 0.8015594482421875, "learning_rate": 7.58948462795188e-07, "loss": 0.0391, "step": 62220 }, { "epoch": 9.242536759245507, "grad_norm": 1.1999105215072632, "learning_rate": 7.574632407544928e-07, "loss": 0.0589, "step": 62230 }, { "epoch": 9.244021981286203, "grad_norm": 1.5768723487854004, "learning_rate": 7.559780187137978e-07, "loss": 0.0721, "step": 62240 }, { "epoch": 9.245507203326897, "grad_norm": 1.066964030265808, "learning_rate": 7.544927966731026e-07, "loss": 0.0537, "step": 62250 }, { "epoch": 9.246992425367592, "grad_norm": 1.3641186952590942, "learning_rate": 7.530075746324077e-07, "loss": 0.0764, "step": 62260 }, { "epoch": 9.248477647408288, "grad_norm": 0.9384719133377075, "learning_rate": 7.515223525917126e-07, "loss": 0.0472, "step": 62270 }, { "epoch": 9.249962869448982, "grad_norm": 1.3951371908187866, "learning_rate": 7.500371305510175e-07, "loss": 0.0632, "step": 62280 }, { "epoch": 9.251448091489678, "grad_norm": 0.7329308390617371, "learning_rate": 7.485519085103224e-07, "loss": 0.0535, "step": 62290 }, { "epoch": 9.252933313530372, "grad_norm": 1.5356649160385132, "learning_rate": 7.470666864696273e-07, "loss": 0.0516, "step": 62300 }, { "epoch": 9.254418535571068, "grad_norm": 0.6318261623382568, "learning_rate": 7.455814644289322e-07, "loss": 0.0467, "step": 62310 }, { "epoch": 9.255903757611764, "grad_norm": 0.6265151500701904, "learning_rate": 7.440962423882372e-07, "loss": 0.0382, "step": 62320 }, { "epoch": 9.257388979652458, "grad_norm": 0.2857096195220947, "learning_rate": 7.42611020347542e-07, "loss": 0.055, "step": 62330 }, { "epoch": 9.258874201693153, "grad_norm": 0.9067076444625854, "learning_rate": 7.411257983068469e-07, "loss": 0.067, "step": 62340 }, { "epoch": 9.260359423733847, "grad_norm": 1.2431113719940186, "learning_rate": 7.396405762661518e-07, "loss": 0.0626, "step": 62350 }, { "epoch": 9.261844645774543, "grad_norm": 0.924633800983429, "learning_rate": 7.381553542254567e-07, "loss": 0.0455, "step": 62360 }, { "epoch": 9.263329867815239, "grad_norm": 1.4786045551300049, "learning_rate": 7.366701321847616e-07, "loss": 0.0511, "step": 62370 }, { "epoch": 9.264815089855933, "grad_norm": 0.7520620822906494, "learning_rate": 7.351849101440665e-07, "loss": 0.0499, "step": 62380 }, { "epoch": 9.266300311896629, "grad_norm": 0.48528382182121277, "learning_rate": 7.336996881033716e-07, "loss": 0.0618, "step": 62390 }, { "epoch": 9.267785533937325, "grad_norm": 0.9447212219238281, "learning_rate": 7.322144660626765e-07, "loss": 0.0607, "step": 62400 }, { "epoch": 9.269270755978019, "grad_norm": 1.028714895248413, "learning_rate": 7.307292440219814e-07, "loss": 0.0438, "step": 62410 }, { "epoch": 9.270755978018714, "grad_norm": 1.3869749307632446, "learning_rate": 7.292440219812863e-07, "loss": 0.054, "step": 62420 }, { "epoch": 9.272241200059408, "grad_norm": 1.0759178400039673, "learning_rate": 7.277587999405912e-07, "loss": 0.0597, "step": 62430 }, { "epoch": 9.273726422100104, "grad_norm": 1.0529601573944092, "learning_rate": 7.262735778998961e-07, "loss": 0.0544, "step": 62440 }, { "epoch": 9.2752116441408, "grad_norm": 0.7281443476676941, "learning_rate": 7.24788355859201e-07, "loss": 0.0638, "step": 62450 }, { "epoch": 9.276696866181494, "grad_norm": 0.25542089343070984, "learning_rate": 7.233031338185059e-07, "loss": 0.0514, "step": 62460 }, { "epoch": 9.27818208822219, "grad_norm": 0.6407657265663147, "learning_rate": 7.218179117778109e-07, "loss": 0.0459, "step": 62470 }, { "epoch": 9.279667310262884, "grad_norm": 0.9292405843734741, "learning_rate": 7.203326897371157e-07, "loss": 0.0568, "step": 62480 }, { "epoch": 9.28115253230358, "grad_norm": 0.6538282036781311, "learning_rate": 7.188474676964207e-07, "loss": 0.0626, "step": 62490 }, { "epoch": 9.282637754344275, "grad_norm": 0.7332746386528015, "learning_rate": 7.173622456557255e-07, "loss": 0.048, "step": 62500 }, { "epoch": 9.28412297638497, "grad_norm": 0.6291193962097168, "learning_rate": 7.158770236150305e-07, "loss": 0.0618, "step": 62510 }, { "epoch": 9.285608198425665, "grad_norm": 0.783424437046051, "learning_rate": 7.143918015743354e-07, "loss": 0.0675, "step": 62520 }, { "epoch": 9.287093420466359, "grad_norm": 0.8970275521278381, "learning_rate": 7.129065795336404e-07, "loss": 0.0421, "step": 62530 }, { "epoch": 9.288578642507055, "grad_norm": 1.1907296180725098, "learning_rate": 7.114213574929453e-07, "loss": 0.0624, "step": 62540 }, { "epoch": 9.29006386454775, "grad_norm": 1.3740684986114502, "learning_rate": 7.099361354522502e-07, "loss": 0.0378, "step": 62550 }, { "epoch": 9.291549086588445, "grad_norm": 0.9677320122718811, "learning_rate": 7.084509134115551e-07, "loss": 0.0375, "step": 62560 }, { "epoch": 9.29303430862914, "grad_norm": 1.0171085596084595, "learning_rate": 7.069656913708601e-07, "loss": 0.045, "step": 62570 }, { "epoch": 9.294519530669834, "grad_norm": 0.4692612588405609, "learning_rate": 7.054804693301649e-07, "loss": 0.0768, "step": 62580 }, { "epoch": 9.29600475271053, "grad_norm": 0.7565601468086243, "learning_rate": 7.039952472894699e-07, "loss": 0.0549, "step": 62590 }, { "epoch": 9.297489974751226, "grad_norm": 0.8417346477508545, "learning_rate": 7.025100252487747e-07, "loss": 0.0611, "step": 62600 }, { "epoch": 9.29897519679192, "grad_norm": 1.0898284912109375, "learning_rate": 7.010248032080796e-07, "loss": 0.0659, "step": 62610 }, { "epoch": 9.300460418832616, "grad_norm": 0.7627786993980408, "learning_rate": 6.995395811673846e-07, "loss": 0.0432, "step": 62620 }, { "epoch": 9.30194564087331, "grad_norm": 1.2172629833221436, "learning_rate": 6.980543591266894e-07, "loss": 0.0724, "step": 62630 }, { "epoch": 9.303430862914006, "grad_norm": 0.7371120452880859, "learning_rate": 6.965691370859944e-07, "loss": 0.0722, "step": 62640 }, { "epoch": 9.304916084954701, "grad_norm": 1.0806337594985962, "learning_rate": 6.950839150452992e-07, "loss": 0.0577, "step": 62650 }, { "epoch": 9.306401306995395, "grad_norm": 1.2978869676589966, "learning_rate": 6.935986930046043e-07, "loss": 0.0691, "step": 62660 }, { "epoch": 9.307886529036091, "grad_norm": 1.1727228164672852, "learning_rate": 6.921134709639093e-07, "loss": 0.0521, "step": 62670 }, { "epoch": 9.309371751076785, "grad_norm": 0.4627057909965515, "learning_rate": 6.906282489232141e-07, "loss": 0.0561, "step": 62680 }, { "epoch": 9.310856973117481, "grad_norm": 1.514148473739624, "learning_rate": 6.89143026882519e-07, "loss": 0.0635, "step": 62690 }, { "epoch": 9.312342195158177, "grad_norm": 0.7269427180290222, "learning_rate": 6.876578048418239e-07, "loss": 0.0604, "step": 62700 }, { "epoch": 9.31382741719887, "grad_norm": 0.46542301774024963, "learning_rate": 6.861725828011288e-07, "loss": 0.0647, "step": 62710 }, { "epoch": 9.315312639239567, "grad_norm": 0.5337091684341431, "learning_rate": 6.846873607604338e-07, "loss": 0.0572, "step": 62720 }, { "epoch": 9.31679786128026, "grad_norm": 0.7221681475639343, "learning_rate": 6.832021387197386e-07, "loss": 0.0467, "step": 62730 }, { "epoch": 9.318283083320956, "grad_norm": 1.7959321737289429, "learning_rate": 6.817169166790436e-07, "loss": 0.0699, "step": 62740 }, { "epoch": 9.319768305361652, "grad_norm": 0.5353294014930725, "learning_rate": 6.802316946383484e-07, "loss": 0.0429, "step": 62750 }, { "epoch": 9.321253527402346, "grad_norm": 1.852454662322998, "learning_rate": 6.787464725976534e-07, "loss": 0.0428, "step": 62760 }, { "epoch": 9.322738749443042, "grad_norm": 1.3642737865447998, "learning_rate": 6.772612505569583e-07, "loss": 0.0585, "step": 62770 }, { "epoch": 9.324223971483736, "grad_norm": 0.9288305044174194, "learning_rate": 6.757760285162632e-07, "loss": 0.0468, "step": 62780 }, { "epoch": 9.325709193524432, "grad_norm": 1.5143994092941284, "learning_rate": 6.742908064755682e-07, "loss": 0.0631, "step": 62790 }, { "epoch": 9.327194415565128, "grad_norm": 0.7979211807250977, "learning_rate": 6.728055844348731e-07, "loss": 0.0545, "step": 62800 }, { "epoch": 9.328679637605822, "grad_norm": 1.252830982208252, "learning_rate": 6.71320362394178e-07, "loss": 0.0557, "step": 62810 }, { "epoch": 9.330164859646517, "grad_norm": 1.5728176832199097, "learning_rate": 6.69835140353483e-07, "loss": 0.0506, "step": 62820 }, { "epoch": 9.331650081687211, "grad_norm": 1.2865973711013794, "learning_rate": 6.683499183127878e-07, "loss": 0.0716, "step": 62830 }, { "epoch": 9.333135303727907, "grad_norm": 0.5197668075561523, "learning_rate": 6.668646962720928e-07, "loss": 0.052, "step": 62840 }, { "epoch": 9.334620525768603, "grad_norm": 0.7080972194671631, "learning_rate": 6.653794742313976e-07, "loss": 0.0499, "step": 62850 }, { "epoch": 9.336105747809297, "grad_norm": 1.5883245468139648, "learning_rate": 6.638942521907026e-07, "loss": 0.0527, "step": 62860 }, { "epoch": 9.337590969849993, "grad_norm": 1.628940224647522, "learning_rate": 6.624090301500075e-07, "loss": 0.0491, "step": 62870 }, { "epoch": 9.339076191890687, "grad_norm": 0.41147395968437195, "learning_rate": 6.609238081093123e-07, "loss": 0.0546, "step": 62880 }, { "epoch": 9.340561413931383, "grad_norm": 0.7498815655708313, "learning_rate": 6.594385860686173e-07, "loss": 0.0733, "step": 62890 }, { "epoch": 9.342046635972078, "grad_norm": 1.1569870710372925, "learning_rate": 6.579533640279221e-07, "loss": 0.053, "step": 62900 }, { "epoch": 9.343531858012772, "grad_norm": 1.0395070314407349, "learning_rate": 6.564681419872271e-07, "loss": 0.0523, "step": 62910 }, { "epoch": 9.345017080053468, "grad_norm": 0.9898577928543091, "learning_rate": 6.549829199465322e-07, "loss": 0.0438, "step": 62920 }, { "epoch": 9.346502302094162, "grad_norm": 1.5343103408813477, "learning_rate": 6.53497697905837e-07, "loss": 0.0516, "step": 62930 }, { "epoch": 9.347987524134858, "grad_norm": 0.42942705750465393, "learning_rate": 6.52012475865142e-07, "loss": 0.0493, "step": 62940 }, { "epoch": 9.349472746175554, "grad_norm": 1.3621197938919067, "learning_rate": 6.505272538244468e-07, "loss": 0.061, "step": 62950 }, { "epoch": 9.350957968216248, "grad_norm": 0.8141943216323853, "learning_rate": 6.490420317837517e-07, "loss": 0.041, "step": 62960 }, { "epoch": 9.352443190256944, "grad_norm": 1.1711074113845825, "learning_rate": 6.475568097430567e-07, "loss": 0.0582, "step": 62970 }, { "epoch": 9.353928412297638, "grad_norm": 1.1483805179595947, "learning_rate": 6.460715877023615e-07, "loss": 0.0732, "step": 62980 }, { "epoch": 9.355413634338333, "grad_norm": 1.2621763944625854, "learning_rate": 6.445863656616665e-07, "loss": 0.0526, "step": 62990 }, { "epoch": 9.35689885637903, "grad_norm": 0.48527824878692627, "learning_rate": 6.431011436209713e-07, "loss": 0.0574, "step": 63000 }, { "epoch": 9.358384078419723, "grad_norm": 0.6406056880950928, "learning_rate": 6.416159215802763e-07, "loss": 0.0599, "step": 63010 }, { "epoch": 9.359869300460419, "grad_norm": 1.0590063333511353, "learning_rate": 6.401306995395812e-07, "loss": 0.0686, "step": 63020 }, { "epoch": 9.361354522501115, "grad_norm": 1.2997311353683472, "learning_rate": 6.386454774988861e-07, "loss": 0.0505, "step": 63030 }, { "epoch": 9.362839744541809, "grad_norm": 0.8595489263534546, "learning_rate": 6.37160255458191e-07, "loss": 0.0302, "step": 63040 }, { "epoch": 9.364324966582505, "grad_norm": 0.8814943432807922, "learning_rate": 6.35675033417496e-07, "loss": 0.0401, "step": 63050 }, { "epoch": 9.365810188623199, "grad_norm": 1.350519061088562, "learning_rate": 6.341898113768009e-07, "loss": 0.0576, "step": 63060 }, { "epoch": 9.367295410663894, "grad_norm": 1.250366449356079, "learning_rate": 6.327045893361059e-07, "loss": 0.059, "step": 63070 }, { "epoch": 9.36878063270459, "grad_norm": 0.40602949261665344, "learning_rate": 6.312193672954107e-07, "loss": 0.0631, "step": 63080 }, { "epoch": 9.370265854745284, "grad_norm": 1.4695994853973389, "learning_rate": 6.297341452547157e-07, "loss": 0.0604, "step": 63090 }, { "epoch": 9.37175107678598, "grad_norm": 0.6759101748466492, "learning_rate": 6.282489232140205e-07, "loss": 0.0524, "step": 63100 }, { "epoch": 9.373236298826674, "grad_norm": 0.9469413757324219, "learning_rate": 6.267637011733255e-07, "loss": 0.0454, "step": 63110 }, { "epoch": 9.37472152086737, "grad_norm": 1.0243042707443237, "learning_rate": 6.252784791326304e-07, "loss": 0.0682, "step": 63120 }, { "epoch": 9.376206742908066, "grad_norm": 1.182602047920227, "learning_rate": 6.237932570919353e-07, "loss": 0.0489, "step": 63130 }, { "epoch": 9.37769196494876, "grad_norm": 0.5317026972770691, "learning_rate": 6.223080350512402e-07, "loss": 0.0523, "step": 63140 }, { "epoch": 9.379177186989455, "grad_norm": 0.8718259930610657, "learning_rate": 6.208228130105451e-07, "loss": 0.0437, "step": 63150 }, { "epoch": 9.38066240903015, "grad_norm": 0.6692548394203186, "learning_rate": 6.193375909698501e-07, "loss": 0.0706, "step": 63160 }, { "epoch": 9.382147631070845, "grad_norm": 1.0512770414352417, "learning_rate": 6.17852368929155e-07, "loss": 0.0631, "step": 63170 }, { "epoch": 9.383632853111541, "grad_norm": 0.8435956835746765, "learning_rate": 6.163671468884598e-07, "loss": 0.0456, "step": 63180 }, { "epoch": 9.385118075152235, "grad_norm": 0.6097307801246643, "learning_rate": 6.148819248477648e-07, "loss": 0.052, "step": 63190 }, { "epoch": 9.38660329719293, "grad_norm": 0.8554158210754395, "learning_rate": 6.133967028070696e-07, "loss": 0.0448, "step": 63200 }, { "epoch": 9.388088519233625, "grad_norm": 1.3168599605560303, "learning_rate": 6.119114807663746e-07, "loss": 0.0614, "step": 63210 }, { "epoch": 9.38957374127432, "grad_norm": 0.7756260633468628, "learning_rate": 6.104262587256796e-07, "loss": 0.0391, "step": 63220 }, { "epoch": 9.391058963315016, "grad_norm": 0.5791680216789246, "learning_rate": 6.089410366849844e-07, "loss": 0.052, "step": 63230 }, { "epoch": 9.39254418535571, "grad_norm": 1.4955168962478638, "learning_rate": 6.074558146442894e-07, "loss": 0.0472, "step": 63240 }, { "epoch": 9.394029407396406, "grad_norm": 0.7317465543746948, "learning_rate": 6.059705926035942e-07, "loss": 0.0575, "step": 63250 }, { "epoch": 9.3955146294371, "grad_norm": 0.9136133790016174, "learning_rate": 6.044853705628992e-07, "loss": 0.0588, "step": 63260 }, { "epoch": 9.396999851477796, "grad_norm": 0.40172865986824036, "learning_rate": 6.030001485222041e-07, "loss": 0.0531, "step": 63270 }, { "epoch": 9.398485073518492, "grad_norm": 1.070167899131775, "learning_rate": 6.01514926481509e-07, "loss": 0.0367, "step": 63280 }, { "epoch": 9.399970295559186, "grad_norm": 0.7728586792945862, "learning_rate": 6.00029704440814e-07, "loss": 0.0466, "step": 63290 }, { "epoch": 9.401455517599882, "grad_norm": 0.7134668231010437, "learning_rate": 5.985444824001188e-07, "loss": 0.0722, "step": 63300 }, { "epoch": 9.402940739640576, "grad_norm": 0.7128061056137085, "learning_rate": 5.970592603594238e-07, "loss": 0.0476, "step": 63310 }, { "epoch": 9.404425961681271, "grad_norm": 0.4389706552028656, "learning_rate": 5.955740383187287e-07, "loss": 0.0683, "step": 63320 }, { "epoch": 9.405911183721967, "grad_norm": 0.2874282896518707, "learning_rate": 5.940888162780336e-07, "loss": 0.0556, "step": 63330 }, { "epoch": 9.407396405762661, "grad_norm": 1.0555808544158936, "learning_rate": 5.926035942373385e-07, "loss": 0.0674, "step": 63340 }, { "epoch": 9.408881627803357, "grad_norm": 0.664207398891449, "learning_rate": 5.911183721966434e-07, "loss": 0.065, "step": 63350 }, { "epoch": 9.410366849844051, "grad_norm": 1.2207247018814087, "learning_rate": 5.896331501559484e-07, "loss": 0.0591, "step": 63360 }, { "epoch": 9.411852071884747, "grad_norm": 1.1976929903030396, "learning_rate": 5.881479281152533e-07, "loss": 0.054, "step": 63370 }, { "epoch": 9.413337293925442, "grad_norm": 1.1441959142684937, "learning_rate": 5.866627060745582e-07, "loss": 0.0623, "step": 63380 }, { "epoch": 9.414822515966137, "grad_norm": 1.0371273756027222, "learning_rate": 5.851774840338631e-07, "loss": 0.0463, "step": 63390 }, { "epoch": 9.416307738006832, "grad_norm": 0.6782839298248291, "learning_rate": 5.83692261993168e-07, "loss": 0.0476, "step": 63400 }, { "epoch": 9.417792960047526, "grad_norm": 0.9542140960693359, "learning_rate": 5.822070399524729e-07, "loss": 0.0718, "step": 63410 }, { "epoch": 9.419278182088222, "grad_norm": 0.2834306061267853, "learning_rate": 5.807218179117779e-07, "loss": 0.0473, "step": 63420 }, { "epoch": 9.420763404128918, "grad_norm": 1.0381821393966675, "learning_rate": 5.792365958710828e-07, "loss": 0.0549, "step": 63430 }, { "epoch": 9.422248626169612, "grad_norm": 1.2295236587524414, "learning_rate": 5.777513738303877e-07, "loss": 0.035, "step": 63440 }, { "epoch": 9.423733848210308, "grad_norm": 0.9140399098396301, "learning_rate": 5.762661517896925e-07, "loss": 0.0577, "step": 63450 }, { "epoch": 9.425219070251002, "grad_norm": 1.0174657106399536, "learning_rate": 5.747809297489975e-07, "loss": 0.073, "step": 63460 }, { "epoch": 9.426704292291697, "grad_norm": 1.6030808687210083, "learning_rate": 5.732957077083024e-07, "loss": 0.0436, "step": 63470 }, { "epoch": 9.428189514332393, "grad_norm": 0.6991294622421265, "learning_rate": 5.718104856676074e-07, "loss": 0.0492, "step": 63480 }, { "epoch": 9.429674736373087, "grad_norm": 0.5950497388839722, "learning_rate": 5.703252636269123e-07, "loss": 0.0499, "step": 63490 }, { "epoch": 9.431159958413783, "grad_norm": 0.7248765826225281, "learning_rate": 5.688400415862171e-07, "loss": 0.0499, "step": 63500 }, { "epoch": 9.432645180454477, "grad_norm": 1.8232178688049316, "learning_rate": 5.673548195455221e-07, "loss": 0.0502, "step": 63510 }, { "epoch": 9.434130402495173, "grad_norm": 1.7857370376586914, "learning_rate": 5.65869597504827e-07, "loss": 0.0504, "step": 63520 }, { "epoch": 9.435615624535869, "grad_norm": 0.7211408615112305, "learning_rate": 5.643843754641319e-07, "loss": 0.0528, "step": 63530 }, { "epoch": 9.437100846576563, "grad_norm": 0.6421229243278503, "learning_rate": 5.628991534234368e-07, "loss": 0.0655, "step": 63540 }, { "epoch": 9.438586068617258, "grad_norm": 1.601430892944336, "learning_rate": 5.614139313827418e-07, "loss": 0.0567, "step": 63550 }, { "epoch": 9.440071290657954, "grad_norm": 2.0657436847686768, "learning_rate": 5.599287093420467e-07, "loss": 0.0594, "step": 63560 }, { "epoch": 9.441556512698648, "grad_norm": 0.7642673254013062, "learning_rate": 5.584434873013516e-07, "loss": 0.0572, "step": 63570 }, { "epoch": 9.443041734739344, "grad_norm": 1.3112720251083374, "learning_rate": 5.569582652606565e-07, "loss": 0.0479, "step": 63580 }, { "epoch": 9.444526956780038, "grad_norm": 0.7265917062759399, "learning_rate": 5.554730432199614e-07, "loss": 0.0459, "step": 63590 }, { "epoch": 9.446012178820734, "grad_norm": 0.47517985105514526, "learning_rate": 5.539878211792664e-07, "loss": 0.0498, "step": 63600 }, { "epoch": 9.44749740086143, "grad_norm": 1.4434775114059448, "learning_rate": 5.525025991385713e-07, "loss": 0.0661, "step": 63610 }, { "epoch": 9.448982622902124, "grad_norm": 0.9332127571105957, "learning_rate": 5.510173770978762e-07, "loss": 0.0528, "step": 63620 }, { "epoch": 9.45046784494282, "grad_norm": 0.766245424747467, "learning_rate": 5.495321550571811e-07, "loss": 0.058, "step": 63630 }, { "epoch": 9.451953066983513, "grad_norm": 1.0345425605773926, "learning_rate": 5.48046933016486e-07, "loss": 0.0583, "step": 63640 }, { "epoch": 9.45343828902421, "grad_norm": 1.2403125762939453, "learning_rate": 5.46561710975791e-07, "loss": 0.0651, "step": 63650 }, { "epoch": 9.454923511064905, "grad_norm": 0.4796169102191925, "learning_rate": 5.450764889350958e-07, "loss": 0.0456, "step": 63660 }, { "epoch": 9.456408733105599, "grad_norm": 1.10849928855896, "learning_rate": 5.435912668944008e-07, "loss": 0.042, "step": 63670 }, { "epoch": 9.457893955146295, "grad_norm": 0.5190598368644714, "learning_rate": 5.421060448537057e-07, "loss": 0.0615, "step": 63680 }, { "epoch": 9.459379177186989, "grad_norm": 0.47407037019729614, "learning_rate": 5.406208228130106e-07, "loss": 0.0595, "step": 63690 }, { "epoch": 9.460864399227685, "grad_norm": 0.6373763084411621, "learning_rate": 5.391356007723156e-07, "loss": 0.0505, "step": 63700 }, { "epoch": 9.46234962126838, "grad_norm": 0.38864976167678833, "learning_rate": 5.376503787316204e-07, "loss": 0.0415, "step": 63710 }, { "epoch": 9.463834843309074, "grad_norm": 0.8846045136451721, "learning_rate": 5.361651566909253e-07, "loss": 0.0429, "step": 63720 }, { "epoch": 9.46532006534977, "grad_norm": 0.519731342792511, "learning_rate": 5.346799346502302e-07, "loss": 0.0482, "step": 63730 }, { "epoch": 9.466805287390464, "grad_norm": 0.6602308750152588, "learning_rate": 5.331947126095352e-07, "loss": 0.0555, "step": 63740 }, { "epoch": 9.46829050943116, "grad_norm": 1.1158581972122192, "learning_rate": 5.317094905688402e-07, "loss": 0.0616, "step": 63750 }, { "epoch": 9.469775731471856, "grad_norm": 0.9060409069061279, "learning_rate": 5.30224268528145e-07, "loss": 0.0524, "step": 63760 }, { "epoch": 9.47126095351255, "grad_norm": 0.7163268327713013, "learning_rate": 5.287390464874499e-07, "loss": 0.0528, "step": 63770 }, { "epoch": 9.472746175553246, "grad_norm": 1.3203110694885254, "learning_rate": 5.272538244467548e-07, "loss": 0.0545, "step": 63780 }, { "epoch": 9.47423139759394, "grad_norm": 0.7185261249542236, "learning_rate": 5.257686024060597e-07, "loss": 0.0551, "step": 63790 }, { "epoch": 9.475716619634635, "grad_norm": 0.6173855662345886, "learning_rate": 5.242833803653647e-07, "loss": 0.0676, "step": 63800 }, { "epoch": 9.477201841675331, "grad_norm": 0.670606255531311, "learning_rate": 5.227981583246696e-07, "loss": 0.0675, "step": 63810 }, { "epoch": 9.478687063716025, "grad_norm": 0.4557764530181885, "learning_rate": 5.213129362839745e-07, "loss": 0.0418, "step": 63820 }, { "epoch": 9.480172285756721, "grad_norm": 0.7182461023330688, "learning_rate": 5.198277142432794e-07, "loss": 0.0516, "step": 63830 }, { "epoch": 9.481657507797415, "grad_norm": 0.5253933668136597, "learning_rate": 5.183424922025843e-07, "loss": 0.0571, "step": 63840 }, { "epoch": 9.48314272983811, "grad_norm": 1.8607404232025146, "learning_rate": 5.168572701618893e-07, "loss": 0.0472, "step": 63850 }, { "epoch": 9.484627951878807, "grad_norm": 0.8381548523902893, "learning_rate": 5.153720481211941e-07, "loss": 0.0459, "step": 63860 }, { "epoch": 9.4861131739195, "grad_norm": 0.6967155933380127, "learning_rate": 5.138868260804991e-07, "loss": 0.0492, "step": 63870 }, { "epoch": 9.487598395960196, "grad_norm": 0.9393335580825806, "learning_rate": 5.12401604039804e-07, "loss": 0.0537, "step": 63880 }, { "epoch": 9.48908361800089, "grad_norm": 0.7347944974899292, "learning_rate": 5.109163819991089e-07, "loss": 0.0366, "step": 63890 }, { "epoch": 9.490568840041586, "grad_norm": 1.2542600631713867, "learning_rate": 5.094311599584139e-07, "loss": 0.0487, "step": 63900 }, { "epoch": 9.492054062082282, "grad_norm": 0.42937129735946655, "learning_rate": 5.079459379177187e-07, "loss": 0.0671, "step": 63910 }, { "epoch": 9.493539284122976, "grad_norm": 0.6828808784484863, "learning_rate": 5.064607158770237e-07, "loss": 0.0384, "step": 63920 }, { "epoch": 9.495024506163672, "grad_norm": 0.7077220678329468, "learning_rate": 5.049754938363285e-07, "loss": 0.0607, "step": 63930 }, { "epoch": 9.496509728204366, "grad_norm": 0.7866410613059998, "learning_rate": 5.034902717956335e-07, "loss": 0.0626, "step": 63940 }, { "epoch": 9.497994950245062, "grad_norm": 0.7980449795722961, "learning_rate": 5.020050497549385e-07, "loss": 0.0471, "step": 63950 }, { "epoch": 9.499480172285757, "grad_norm": 1.281341552734375, "learning_rate": 5.005198277142433e-07, "loss": 0.0486, "step": 63960 }, { "epoch": 9.500965394326451, "grad_norm": 0.6784372329711914, "learning_rate": 4.990346056735483e-07, "loss": 0.0428, "step": 63970 }, { "epoch": 9.502450616367147, "grad_norm": 1.3833361864089966, "learning_rate": 4.975493836328531e-07, "loss": 0.0495, "step": 63980 }, { "epoch": 9.503935838407841, "grad_norm": 1.5026179552078247, "learning_rate": 4.96064161592158e-07, "loss": 0.0517, "step": 63990 }, { "epoch": 9.505421060448537, "grad_norm": 0.9454336762428284, "learning_rate": 4.94578939551463e-07, "loss": 0.0392, "step": 64000 }, { "epoch": 9.506906282489233, "grad_norm": 0.583394467830658, "learning_rate": 4.930937175107679e-07, "loss": 0.0385, "step": 64010 }, { "epoch": 9.508391504529927, "grad_norm": 1.3831298351287842, "learning_rate": 4.916084954700729e-07, "loss": 0.0557, "step": 64020 }, { "epoch": 9.509876726570623, "grad_norm": 1.121113896369934, "learning_rate": 4.901232734293777e-07, "loss": 0.0477, "step": 64030 }, { "epoch": 9.511361948611317, "grad_norm": 0.5695465207099915, "learning_rate": 4.886380513886826e-07, "loss": 0.0412, "step": 64040 }, { "epoch": 9.512847170652012, "grad_norm": 0.8473941683769226, "learning_rate": 4.871528293479876e-07, "loss": 0.0427, "step": 64050 }, { "epoch": 9.514332392692708, "grad_norm": 0.5661032795906067, "learning_rate": 4.856676073072924e-07, "loss": 0.0598, "step": 64060 }, { "epoch": 9.515817614733402, "grad_norm": 0.9760502576828003, "learning_rate": 4.841823852665974e-07, "loss": 0.0531, "step": 64070 }, { "epoch": 9.517302836774098, "grad_norm": 0.8279736042022705, "learning_rate": 4.826971632259023e-07, "loss": 0.0628, "step": 64080 }, { "epoch": 9.518788058814792, "grad_norm": 1.024143934249878, "learning_rate": 4.812119411852072e-07, "loss": 0.0577, "step": 64090 }, { "epoch": 9.520273280855488, "grad_norm": 1.033553123474121, "learning_rate": 4.797267191445122e-07, "loss": 0.0648, "step": 64100 }, { "epoch": 9.521758502896184, "grad_norm": 0.7202749848365784, "learning_rate": 4.78241497103817e-07, "loss": 0.0499, "step": 64110 }, { "epoch": 9.523243724936878, "grad_norm": 0.8777990937232971, "learning_rate": 4.7675627506312193e-07, "loss": 0.0526, "step": 64120 }, { "epoch": 9.524728946977573, "grad_norm": 0.8504054546356201, "learning_rate": 4.7527105302242684e-07, "loss": 0.0525, "step": 64130 }, { "epoch": 9.526214169018267, "grad_norm": 0.585706889629364, "learning_rate": 4.7378583098173186e-07, "loss": 0.0651, "step": 64140 }, { "epoch": 9.527699391058963, "grad_norm": 0.19927702844142914, "learning_rate": 4.7230060894103677e-07, "loss": 0.0597, "step": 64150 }, { "epoch": 9.529184613099659, "grad_norm": 0.7638365626335144, "learning_rate": 4.708153869003416e-07, "loss": 0.0416, "step": 64160 }, { "epoch": 9.530669835140353, "grad_norm": 0.7551804780960083, "learning_rate": 4.6933016485964653e-07, "loss": 0.0654, "step": 64170 }, { "epoch": 9.532155057181049, "grad_norm": 1.1386343240737915, "learning_rate": 4.6784494281895144e-07, "loss": 0.0447, "step": 64180 }, { "epoch": 9.533640279221743, "grad_norm": 1.1652734279632568, "learning_rate": 4.6635972077825635e-07, "loss": 0.0648, "step": 64190 }, { "epoch": 9.535125501262439, "grad_norm": 0.6755314469337463, "learning_rate": 4.6487449873756126e-07, "loss": 0.0594, "step": 64200 }, { "epoch": 9.536610723303134, "grad_norm": 0.9879747033119202, "learning_rate": 4.633892766968662e-07, "loss": 0.042, "step": 64210 }, { "epoch": 9.538095945343828, "grad_norm": 0.3094334900379181, "learning_rate": 4.6190405465617113e-07, "loss": 0.0528, "step": 64220 }, { "epoch": 9.539581167384524, "grad_norm": 1.2739931344985962, "learning_rate": 4.6041883261547604e-07, "loss": 0.0582, "step": 64230 }, { "epoch": 9.54106638942522, "grad_norm": 1.297001838684082, "learning_rate": 4.5893361057478095e-07, "loss": 0.0574, "step": 64240 }, { "epoch": 9.542551611465914, "grad_norm": 1.0937061309814453, "learning_rate": 4.5744838853408586e-07, "loss": 0.068, "step": 64250 }, { "epoch": 9.54403683350661, "grad_norm": 1.086991548538208, "learning_rate": 4.559631664933908e-07, "loss": 0.048, "step": 64260 }, { "epoch": 9.545522055547304, "grad_norm": 0.9579164385795593, "learning_rate": 4.5447794445269574e-07, "loss": 0.0561, "step": 64270 }, { "epoch": 9.547007277588, "grad_norm": 1.18247389793396, "learning_rate": 4.5299272241200065e-07, "loss": 0.0608, "step": 64280 }, { "epoch": 9.548492499628695, "grad_norm": 1.123586654663086, "learning_rate": 4.5150750037130556e-07, "loss": 0.0636, "step": 64290 }, { "epoch": 9.54997772166939, "grad_norm": 0.5111974477767944, "learning_rate": 4.5002227833061047e-07, "loss": 0.0587, "step": 64300 }, { "epoch": 9.551462943710085, "grad_norm": 0.7695721387863159, "learning_rate": 4.485370562899154e-07, "loss": 0.0662, "step": 64310 }, { "epoch": 9.552948165750779, "grad_norm": 0.7634204626083374, "learning_rate": 4.470518342492203e-07, "loss": 0.0388, "step": 64320 }, { "epoch": 9.554433387791475, "grad_norm": 1.0317970514297485, "learning_rate": 4.455666122085252e-07, "loss": 0.0553, "step": 64330 }, { "epoch": 9.55591860983217, "grad_norm": 0.6158688068389893, "learning_rate": 4.4408139016783016e-07, "loss": 0.0465, "step": 64340 }, { "epoch": 9.557403831872865, "grad_norm": 1.1406865119934082, "learning_rate": 4.4259616812713507e-07, "loss": 0.0548, "step": 64350 }, { "epoch": 9.55888905391356, "grad_norm": 0.5441305041313171, "learning_rate": 4.4111094608644e-07, "loss": 0.0503, "step": 64360 }, { "epoch": 9.560374275954254, "grad_norm": 1.8708332777023315, "learning_rate": 4.396257240457449e-07, "loss": 0.0773, "step": 64370 }, { "epoch": 9.56185949799495, "grad_norm": 0.9381576776504517, "learning_rate": 4.381405020050498e-07, "loss": 0.0469, "step": 64380 }, { "epoch": 9.563344720035646, "grad_norm": 1.1940032243728638, "learning_rate": 4.3665527996435465e-07, "loss": 0.0534, "step": 64390 }, { "epoch": 9.56482994207634, "grad_norm": 0.8630006313323975, "learning_rate": 4.3517005792365967e-07, "loss": 0.0506, "step": 64400 }, { "epoch": 9.566315164117036, "grad_norm": 0.6426660418510437, "learning_rate": 4.336848358829646e-07, "loss": 0.0576, "step": 64410 }, { "epoch": 9.56780038615773, "grad_norm": 0.9737311005592346, "learning_rate": 4.321996138422695e-07, "loss": 0.0561, "step": 64420 }, { "epoch": 9.569285608198426, "grad_norm": 1.9430110454559326, "learning_rate": 4.3071439180157435e-07, "loss": 0.0543, "step": 64430 }, { "epoch": 9.570770830239121, "grad_norm": 0.870077908039093, "learning_rate": 4.2922916976087926e-07, "loss": 0.0476, "step": 64440 }, { "epoch": 9.572256052279815, "grad_norm": 0.4826265871524811, "learning_rate": 4.2774394772018417e-07, "loss": 0.057, "step": 64450 }, { "epoch": 9.573741274320511, "grad_norm": 0.300932377576828, "learning_rate": 4.262587256794891e-07, "loss": 0.0556, "step": 64460 }, { "epoch": 9.575226496361205, "grad_norm": 1.122602105140686, "learning_rate": 4.2477350363879404e-07, "loss": 0.0619, "step": 64470 }, { "epoch": 9.576711718401901, "grad_norm": 0.3875451982021332, "learning_rate": 4.2328828159809895e-07, "loss": 0.0709, "step": 64480 }, { "epoch": 9.578196940442597, "grad_norm": 1.1015067100524902, "learning_rate": 4.2180305955740386e-07, "loss": 0.0514, "step": 64490 }, { "epoch": 9.57968216248329, "grad_norm": 0.6941787600517273, "learning_rate": 4.2031783751670877e-07, "loss": 0.0719, "step": 64500 }, { "epoch": 9.581167384523987, "grad_norm": 0.47489145398139954, "learning_rate": 4.188326154760137e-07, "loss": 0.0503, "step": 64510 }, { "epoch": 9.58265260656468, "grad_norm": 1.3649030923843384, "learning_rate": 4.173473934353186e-07, "loss": 0.0766, "step": 64520 }, { "epoch": 9.584137828605376, "grad_norm": 0.6902500987052917, "learning_rate": 4.158621713946235e-07, "loss": 0.0496, "step": 64530 }, { "epoch": 9.585623050646072, "grad_norm": 0.4888577163219452, "learning_rate": 4.1437694935392846e-07, "loss": 0.0328, "step": 64540 }, { "epoch": 9.587108272686766, "grad_norm": 0.9694914221763611, "learning_rate": 4.1289172731323337e-07, "loss": 0.0476, "step": 64550 }, { "epoch": 9.588593494727462, "grad_norm": 0.3271615207195282, "learning_rate": 4.114065052725383e-07, "loss": 0.0367, "step": 64560 }, { "epoch": 9.590078716768156, "grad_norm": 1.1771557331085205, "learning_rate": 4.099212832318432e-07, "loss": 0.0502, "step": 64570 }, { "epoch": 9.591563938808852, "grad_norm": 0.8279902935028076, "learning_rate": 4.084360611911481e-07, "loss": 0.0617, "step": 64580 }, { "epoch": 9.593049160849548, "grad_norm": 1.2852143049240112, "learning_rate": 4.06950839150453e-07, "loss": 0.0519, "step": 64590 }, { "epoch": 9.594534382890242, "grad_norm": 0.7560584545135498, "learning_rate": 4.0546561710975797e-07, "loss": 0.0485, "step": 64600 }, { "epoch": 9.596019604930937, "grad_norm": 0.2690260112285614, "learning_rate": 4.039803950690629e-07, "loss": 0.0352, "step": 64610 }, { "epoch": 9.597504826971631, "grad_norm": 1.46587073802948, "learning_rate": 4.024951730283678e-07, "loss": 0.0555, "step": 64620 }, { "epoch": 9.598990049012327, "grad_norm": 0.5200058817863464, "learning_rate": 4.010099509876727e-07, "loss": 0.0481, "step": 64630 }, { "epoch": 9.600475271053023, "grad_norm": 1.343431830406189, "learning_rate": 3.995247289469776e-07, "loss": 0.0496, "step": 64640 }, { "epoch": 9.601960493093717, "grad_norm": 0.7409623861312866, "learning_rate": 3.980395069062825e-07, "loss": 0.0438, "step": 64650 }, { "epoch": 9.603445715134413, "grad_norm": 1.2803033590316772, "learning_rate": 3.965542848655874e-07, "loss": 0.0542, "step": 64660 }, { "epoch": 9.604930937175109, "grad_norm": 0.5689689517021179, "learning_rate": 3.950690628248924e-07, "loss": 0.0488, "step": 64670 }, { "epoch": 9.606416159215803, "grad_norm": 0.8184802532196045, "learning_rate": 3.935838407841973e-07, "loss": 0.0418, "step": 64680 }, { "epoch": 9.607901381256498, "grad_norm": 0.913433849811554, "learning_rate": 3.920986187435022e-07, "loss": 0.041, "step": 64690 }, { "epoch": 9.609386603297192, "grad_norm": 0.21632570028305054, "learning_rate": 3.9061339670280707e-07, "loss": 0.0534, "step": 64700 }, { "epoch": 9.610871825337888, "grad_norm": 1.04236900806427, "learning_rate": 3.89128174662112e-07, "loss": 0.0563, "step": 64710 }, { "epoch": 9.612357047378584, "grad_norm": 1.2642252445220947, "learning_rate": 3.876429526214169e-07, "loss": 0.0603, "step": 64720 }, { "epoch": 9.613842269419278, "grad_norm": 1.1171890497207642, "learning_rate": 3.861577305807219e-07, "loss": 0.0575, "step": 64730 }, { "epoch": 9.615327491459974, "grad_norm": 0.5049655437469482, "learning_rate": 3.8467250854002676e-07, "loss": 0.0386, "step": 64740 }, { "epoch": 9.616812713500668, "grad_norm": 0.8653619885444641, "learning_rate": 3.8318728649933167e-07, "loss": 0.0452, "step": 64750 }, { "epoch": 9.618297935541364, "grad_norm": 1.2991448640823364, "learning_rate": 3.817020644586366e-07, "loss": 0.039, "step": 64760 }, { "epoch": 9.61978315758206, "grad_norm": 0.8051328063011169, "learning_rate": 3.802168424179415e-07, "loss": 0.0538, "step": 64770 }, { "epoch": 9.621268379622753, "grad_norm": 0.7863057255744934, "learning_rate": 3.787316203772464e-07, "loss": 0.0519, "step": 64780 }, { "epoch": 9.62275360166345, "grad_norm": 0.9942373633384705, "learning_rate": 3.772463983365513e-07, "loss": 0.0544, "step": 64790 }, { "epoch": 9.624238823704143, "grad_norm": 0.5974944829940796, "learning_rate": 3.757611762958563e-07, "loss": 0.0437, "step": 64800 }, { "epoch": 9.625724045744839, "grad_norm": 1.216723084449768, "learning_rate": 3.742759542551612e-07, "loss": 0.0553, "step": 64810 }, { "epoch": 9.627209267785535, "grad_norm": 0.6732010841369629, "learning_rate": 3.727907322144661e-07, "loss": 0.0553, "step": 64820 }, { "epoch": 9.628694489826229, "grad_norm": 0.4387213885784149, "learning_rate": 3.71305510173771e-07, "loss": 0.0401, "step": 64830 }, { "epoch": 9.630179711866925, "grad_norm": 1.1491668224334717, "learning_rate": 3.698202881330759e-07, "loss": 0.0447, "step": 64840 }, { "epoch": 9.631664933907619, "grad_norm": 0.705153226852417, "learning_rate": 3.683350660923808e-07, "loss": 0.0477, "step": 64850 }, { "epoch": 9.633150155948314, "grad_norm": 2.005906581878662, "learning_rate": 3.668498440516858e-07, "loss": 0.0698, "step": 64860 }, { "epoch": 9.63463537798901, "grad_norm": 0.6410548686981201, "learning_rate": 3.653646220109907e-07, "loss": 0.0496, "step": 64870 }, { "epoch": 9.636120600029704, "grad_norm": 0.34218358993530273, "learning_rate": 3.638793999702956e-07, "loss": 0.0576, "step": 64880 }, { "epoch": 9.6376058220704, "grad_norm": 0.3363538682460785, "learning_rate": 3.623941779296005e-07, "loss": 0.0474, "step": 64890 }, { "epoch": 9.639091044111094, "grad_norm": 1.0393438339233398, "learning_rate": 3.6090895588890543e-07, "loss": 0.0597, "step": 64900 }, { "epoch": 9.64057626615179, "grad_norm": 0.566709578037262, "learning_rate": 3.5942373384821034e-07, "loss": 0.0523, "step": 64910 }, { "epoch": 9.642061488192486, "grad_norm": 1.6289558410644531, "learning_rate": 3.5793851180751525e-07, "loss": 0.055, "step": 64920 }, { "epoch": 9.64354671023318, "grad_norm": 0.6053251624107361, "learning_rate": 3.564532897668202e-07, "loss": 0.0396, "step": 64930 }, { "epoch": 9.645031932273875, "grad_norm": 3.9865386486053467, "learning_rate": 3.549680677261251e-07, "loss": 0.0496, "step": 64940 }, { "epoch": 9.64651715431457, "grad_norm": 1.3346593379974365, "learning_rate": 3.5348284568543003e-07, "loss": 0.0708, "step": 64950 }, { "epoch": 9.648002376355265, "grad_norm": 0.7871192097663879, "learning_rate": 3.5199762364473494e-07, "loss": 0.0437, "step": 64960 }, { "epoch": 9.649487598395961, "grad_norm": 0.5941735506057739, "learning_rate": 3.505124016040398e-07, "loss": 0.0626, "step": 64970 }, { "epoch": 9.650972820436655, "grad_norm": 1.1441556215286255, "learning_rate": 3.490271795633447e-07, "loss": 0.0843, "step": 64980 }, { "epoch": 9.65245804247735, "grad_norm": 0.8148675560951233, "learning_rate": 3.475419575226496e-07, "loss": 0.0501, "step": 64990 }, { "epoch": 9.653943264518045, "grad_norm": 1.1982941627502441, "learning_rate": 3.4605673548195463e-07, "loss": 0.0516, "step": 65000 }, { "epoch": 9.65542848655874, "grad_norm": 0.9795641303062439, "learning_rate": 3.445715134412595e-07, "loss": 0.0561, "step": 65010 }, { "epoch": 9.656913708599436, "grad_norm": 2.1101455688476562, "learning_rate": 3.430862914005644e-07, "loss": 0.0602, "step": 65020 }, { "epoch": 9.65839893064013, "grad_norm": 0.5057744979858398, "learning_rate": 3.416010693598693e-07, "loss": 0.0575, "step": 65030 }, { "epoch": 9.659884152680826, "grad_norm": 0.9282684326171875, "learning_rate": 3.401158473191742e-07, "loss": 0.05, "step": 65040 }, { "epoch": 9.66136937472152, "grad_norm": 1.1378172636032104, "learning_rate": 3.3863062527847913e-07, "loss": 0.06, "step": 65050 }, { "epoch": 9.662854596762216, "grad_norm": 0.2000456601381302, "learning_rate": 3.371454032377841e-07, "loss": 0.0675, "step": 65060 }, { "epoch": 9.664339818802912, "grad_norm": 0.8669431805610657, "learning_rate": 3.35660181197089e-07, "loss": 0.0415, "step": 65070 }, { "epoch": 9.665825040843606, "grad_norm": 0.8875764012336731, "learning_rate": 3.341749591563939e-07, "loss": 0.0588, "step": 65080 }, { "epoch": 9.667310262884302, "grad_norm": 1.0086852312088013, "learning_rate": 3.326897371156988e-07, "loss": 0.046, "step": 65090 }, { "epoch": 9.668795484924996, "grad_norm": 0.4355565309524536, "learning_rate": 3.3120451507500373e-07, "loss": 0.0575, "step": 65100 }, { "epoch": 9.670280706965691, "grad_norm": 0.9262057542800903, "learning_rate": 3.2971929303430864e-07, "loss": 0.0591, "step": 65110 }, { "epoch": 9.671765929006387, "grad_norm": 1.1087294816970825, "learning_rate": 3.2823407099361355e-07, "loss": 0.056, "step": 65120 }, { "epoch": 9.673251151047081, "grad_norm": 0.8267140984535217, "learning_rate": 3.267488489529185e-07, "loss": 0.0586, "step": 65130 }, { "epoch": 9.674736373087777, "grad_norm": 0.963013768196106, "learning_rate": 3.252636269122234e-07, "loss": 0.0535, "step": 65140 }, { "epoch": 9.676221595128471, "grad_norm": 0.8553949594497681, "learning_rate": 3.2377840487152833e-07, "loss": 0.0629, "step": 65150 }, { "epoch": 9.677706817169167, "grad_norm": 0.4256061613559723, "learning_rate": 3.2229318283083324e-07, "loss": 0.0439, "step": 65160 }, { "epoch": 9.679192039209862, "grad_norm": 0.9870197176933289, "learning_rate": 3.2080796079013815e-07, "loss": 0.0854, "step": 65170 }, { "epoch": 9.680677261250556, "grad_norm": 1.0996114015579224, "learning_rate": 3.1932273874944306e-07, "loss": 0.058, "step": 65180 }, { "epoch": 9.682162483291252, "grad_norm": 0.41394004225730896, "learning_rate": 3.17837516708748e-07, "loss": 0.0468, "step": 65190 }, { "epoch": 9.683647705331946, "grad_norm": 0.959151029586792, "learning_rate": 3.1635229466805293e-07, "loss": 0.0532, "step": 65200 }, { "epoch": 9.685132927372642, "grad_norm": 0.5184232592582703, "learning_rate": 3.1486707262735784e-07, "loss": 0.0641, "step": 65210 }, { "epoch": 9.686618149413338, "grad_norm": 2.521256923675537, "learning_rate": 3.1338185058666275e-07, "loss": 0.0403, "step": 65220 }, { "epoch": 9.688103371454032, "grad_norm": 0.7861289978027344, "learning_rate": 3.1189662854596766e-07, "loss": 0.0553, "step": 65230 }, { "epoch": 9.689588593494728, "grad_norm": 1.0370820760726929, "learning_rate": 3.1041140650527257e-07, "loss": 0.0509, "step": 65240 }, { "epoch": 9.691073815535422, "grad_norm": 0.29477956891059875, "learning_rate": 3.089261844645775e-07, "loss": 0.0426, "step": 65250 }, { "epoch": 9.692559037576117, "grad_norm": 1.420774221420288, "learning_rate": 3.074409624238824e-07, "loss": 0.0638, "step": 65260 }, { "epoch": 9.694044259616813, "grad_norm": 1.197407603263855, "learning_rate": 3.059557403831873e-07, "loss": 0.0515, "step": 65270 }, { "epoch": 9.695529481657507, "grad_norm": 0.4081036150455475, "learning_rate": 3.044705183424922e-07, "loss": 0.0447, "step": 65280 }, { "epoch": 9.697014703698203, "grad_norm": 0.3299735188484192, "learning_rate": 3.029852963017971e-07, "loss": 0.0448, "step": 65290 }, { "epoch": 9.698499925738897, "grad_norm": 0.8112266063690186, "learning_rate": 3.0150007426110203e-07, "loss": 0.0547, "step": 65300 }, { "epoch": 9.699985147779593, "grad_norm": 0.9002323150634766, "learning_rate": 3.00014852220407e-07, "loss": 0.0586, "step": 65310 }, { "epoch": 9.701470369820289, "grad_norm": 0.8222044110298157, "learning_rate": 2.985296301797119e-07, "loss": 0.0458, "step": 65320 }, { "epoch": 9.702955591860983, "grad_norm": 0.6172053813934326, "learning_rate": 2.970444081390168e-07, "loss": 0.0619, "step": 65330 }, { "epoch": 9.704440813901678, "grad_norm": 1.5769612789154053, "learning_rate": 2.955591860983217e-07, "loss": 0.0713, "step": 65340 }, { "epoch": 9.705926035942372, "grad_norm": 0.9784587621688843, "learning_rate": 2.9407396405762663e-07, "loss": 0.0641, "step": 65350 }, { "epoch": 9.707411257983068, "grad_norm": 1.5220768451690674, "learning_rate": 2.9258874201693154e-07, "loss": 0.0516, "step": 65360 }, { "epoch": 9.708896480023764, "grad_norm": 1.2147810459136963, "learning_rate": 2.9110351997623645e-07, "loss": 0.0451, "step": 65370 }, { "epoch": 9.710381702064458, "grad_norm": 1.1519248485565186, "learning_rate": 2.896182979355414e-07, "loss": 0.0766, "step": 65380 }, { "epoch": 9.711866924105154, "grad_norm": 0.8308933973312378, "learning_rate": 2.881330758948463e-07, "loss": 0.0446, "step": 65390 }, { "epoch": 9.71335214614585, "grad_norm": 0.7178075909614563, "learning_rate": 2.866478538541512e-07, "loss": 0.0647, "step": 65400 }, { "epoch": 9.714837368186544, "grad_norm": 0.5174522399902344, "learning_rate": 2.8516263181345615e-07, "loss": 0.0402, "step": 65410 }, { "epoch": 9.71632259022724, "grad_norm": 0.8876258730888367, "learning_rate": 2.8367740977276106e-07, "loss": 0.0617, "step": 65420 }, { "epoch": 9.717807812267933, "grad_norm": 0.8110934495925903, "learning_rate": 2.8219218773206597e-07, "loss": 0.0741, "step": 65430 }, { "epoch": 9.71929303430863, "grad_norm": 0.5553524494171143, "learning_rate": 2.807069656913709e-07, "loss": 0.0438, "step": 65440 }, { "epoch": 9.720778256349325, "grad_norm": 0.6501131057739258, "learning_rate": 2.792217436506758e-07, "loss": 0.067, "step": 65450 }, { "epoch": 9.722263478390019, "grad_norm": 1.176830530166626, "learning_rate": 2.777365216099807e-07, "loss": 0.0548, "step": 65460 }, { "epoch": 9.723748700430715, "grad_norm": 0.9840707182884216, "learning_rate": 2.7625129956928566e-07, "loss": 0.0546, "step": 65470 }, { "epoch": 9.725233922471409, "grad_norm": 0.5249770283699036, "learning_rate": 2.7476607752859057e-07, "loss": 0.0547, "step": 65480 }, { "epoch": 9.726719144512105, "grad_norm": 1.2236920595169067, "learning_rate": 2.732808554878955e-07, "loss": 0.0397, "step": 65490 }, { "epoch": 9.7282043665528, "grad_norm": 0.892269492149353, "learning_rate": 2.717956334472004e-07, "loss": 0.0631, "step": 65500 }, { "epoch": 9.729689588593494, "grad_norm": 1.4101309776306152, "learning_rate": 2.703104114065053e-07, "loss": 0.0468, "step": 65510 }, { "epoch": 9.73117481063419, "grad_norm": 0.9848302006721497, "learning_rate": 2.688251893658102e-07, "loss": 0.0629, "step": 65520 }, { "epoch": 9.732660032674884, "grad_norm": 0.7715422511100769, "learning_rate": 2.673399673251151e-07, "loss": 0.0412, "step": 65530 }, { "epoch": 9.73414525471558, "grad_norm": 0.9916948080062866, "learning_rate": 2.658547452844201e-07, "loss": 0.0535, "step": 65540 }, { "epoch": 9.735630476756276, "grad_norm": 1.1795718669891357, "learning_rate": 2.6436952324372494e-07, "loss": 0.0432, "step": 65550 }, { "epoch": 9.73711569879697, "grad_norm": 1.2389646768569946, "learning_rate": 2.6288430120302985e-07, "loss": 0.0651, "step": 65560 }, { "epoch": 9.738600920837666, "grad_norm": 0.48417583107948303, "learning_rate": 2.613990791623348e-07, "loss": 0.0469, "step": 65570 }, { "epoch": 9.74008614287836, "grad_norm": 1.2999908924102783, "learning_rate": 2.599138571216397e-07, "loss": 0.059, "step": 65580 }, { "epoch": 9.741571364919055, "grad_norm": 0.9606696367263794, "learning_rate": 2.5842863508094463e-07, "loss": 0.032, "step": 65590 }, { "epoch": 9.743056586959751, "grad_norm": 0.2228945791721344, "learning_rate": 2.5694341304024954e-07, "loss": 0.0369, "step": 65600 }, { "epoch": 9.744541809000445, "grad_norm": 0.3582620918750763, "learning_rate": 2.5545819099955445e-07, "loss": 0.05, "step": 65610 }, { "epoch": 9.746027031041141, "grad_norm": 0.6840639710426331, "learning_rate": 2.5397296895885936e-07, "loss": 0.0369, "step": 65620 }, { "epoch": 9.747512253081835, "grad_norm": 0.572365403175354, "learning_rate": 2.5248774691816427e-07, "loss": 0.0536, "step": 65630 }, { "epoch": 9.74899747512253, "grad_norm": 0.2343834489583969, "learning_rate": 2.5100252487746923e-07, "loss": 0.0403, "step": 65640 }, { "epoch": 9.750482697163227, "grad_norm": 1.2876397371292114, "learning_rate": 2.4951730283677414e-07, "loss": 0.0535, "step": 65650 }, { "epoch": 9.75196791920392, "grad_norm": 1.232550859451294, "learning_rate": 2.48032080796079e-07, "loss": 0.0626, "step": 65660 }, { "epoch": 9.753453141244616, "grad_norm": 0.9929244518280029, "learning_rate": 2.4654685875538396e-07, "loss": 0.0824, "step": 65670 }, { "epoch": 9.75493836328531, "grad_norm": 1.197312593460083, "learning_rate": 2.4506163671468887e-07, "loss": 0.0761, "step": 65680 }, { "epoch": 9.756423585326006, "grad_norm": 0.8419313430786133, "learning_rate": 2.435764146739938e-07, "loss": 0.0489, "step": 65690 }, { "epoch": 9.757908807366702, "grad_norm": 2.1786866188049316, "learning_rate": 2.420911926332987e-07, "loss": 0.0524, "step": 65700 }, { "epoch": 9.759394029407396, "grad_norm": 1.1716639995574951, "learning_rate": 2.406059705926036e-07, "loss": 0.0562, "step": 65710 }, { "epoch": 9.760879251448092, "grad_norm": 0.7020483613014221, "learning_rate": 2.391207485519085e-07, "loss": 0.0517, "step": 65720 }, { "epoch": 9.762364473488786, "grad_norm": 0.8520447015762329, "learning_rate": 2.3763552651121342e-07, "loss": 0.0715, "step": 65730 }, { "epoch": 9.763849695529482, "grad_norm": 1.2511392831802368, "learning_rate": 2.3615030447051838e-07, "loss": 0.0545, "step": 65740 }, { "epoch": 9.765334917570177, "grad_norm": 0.5448188185691833, "learning_rate": 2.3466508242982327e-07, "loss": 0.0412, "step": 65750 }, { "epoch": 9.766820139610871, "grad_norm": 0.8606187105178833, "learning_rate": 2.3317986038912818e-07, "loss": 0.0619, "step": 65760 }, { "epoch": 9.768305361651567, "grad_norm": 0.7856817245483398, "learning_rate": 2.316946383484331e-07, "loss": 0.0375, "step": 65770 }, { "epoch": 9.769790583692261, "grad_norm": 0.7301252484321594, "learning_rate": 2.3020941630773802e-07, "loss": 0.0466, "step": 65780 }, { "epoch": 9.771275805732957, "grad_norm": 0.7444620132446289, "learning_rate": 2.2872419426704293e-07, "loss": 0.0461, "step": 65790 }, { "epoch": 9.772761027773653, "grad_norm": 0.6654163599014282, "learning_rate": 2.2723897222634787e-07, "loss": 0.0624, "step": 65800 }, { "epoch": 9.774246249814347, "grad_norm": 1.2745815515518188, "learning_rate": 2.2575375018565278e-07, "loss": 0.0666, "step": 65810 }, { "epoch": 9.775731471855043, "grad_norm": 0.8714974522590637, "learning_rate": 2.242685281449577e-07, "loss": 0.0559, "step": 65820 }, { "epoch": 9.777216693895738, "grad_norm": 1.2431142330169678, "learning_rate": 2.227833061042626e-07, "loss": 0.0488, "step": 65830 }, { "epoch": 9.778701915936432, "grad_norm": 0.9246008396148682, "learning_rate": 2.2129808406356753e-07, "loss": 0.0514, "step": 65840 }, { "epoch": 9.780187137977128, "grad_norm": 1.1826361417770386, "learning_rate": 2.1981286202287244e-07, "loss": 0.0516, "step": 65850 }, { "epoch": 9.781672360017822, "grad_norm": 1.0376538038253784, "learning_rate": 2.1832763998217733e-07, "loss": 0.0471, "step": 65860 }, { "epoch": 9.783157582058518, "grad_norm": 0.5390947461128235, "learning_rate": 2.168424179414823e-07, "loss": 0.0471, "step": 65870 }, { "epoch": 9.784642804099214, "grad_norm": 0.7411609292030334, "learning_rate": 2.1535719590078717e-07, "loss": 0.0486, "step": 65880 }, { "epoch": 9.786128026139908, "grad_norm": 0.964026153087616, "learning_rate": 2.1387197386009208e-07, "loss": 0.045, "step": 65890 }, { "epoch": 9.787613248180604, "grad_norm": 0.2791236340999603, "learning_rate": 2.1238675181939702e-07, "loss": 0.0456, "step": 65900 }, { "epoch": 9.789098470221298, "grad_norm": 0.9963831305503845, "learning_rate": 2.1090152977870193e-07, "loss": 0.0638, "step": 65910 }, { "epoch": 9.790583692261993, "grad_norm": 0.5477802157402039, "learning_rate": 2.0941630773800684e-07, "loss": 0.0476, "step": 65920 }, { "epoch": 9.792068914302689, "grad_norm": 0.26147234439849854, "learning_rate": 2.0793108569731175e-07, "loss": 0.0572, "step": 65930 }, { "epoch": 9.793554136343383, "grad_norm": 1.2986868619918823, "learning_rate": 2.0644586365661669e-07, "loss": 0.0911, "step": 65940 }, { "epoch": 9.795039358384079, "grad_norm": 0.6861764788627625, "learning_rate": 2.049606416159216e-07, "loss": 0.0651, "step": 65950 }, { "epoch": 9.796524580424773, "grad_norm": 1.7995854616165161, "learning_rate": 2.034754195752265e-07, "loss": 0.0426, "step": 65960 }, { "epoch": 9.798009802465469, "grad_norm": 1.2532118558883667, "learning_rate": 2.0199019753453144e-07, "loss": 0.0476, "step": 65970 }, { "epoch": 9.799495024506164, "grad_norm": 0.5201395153999329, "learning_rate": 2.0050497549383635e-07, "loss": 0.0392, "step": 65980 }, { "epoch": 9.800980246546859, "grad_norm": 0.6813002228736877, "learning_rate": 1.9901975345314126e-07, "loss": 0.0565, "step": 65990 }, { "epoch": 9.802465468587554, "grad_norm": 1.2058467864990234, "learning_rate": 1.975345314124462e-07, "loss": 0.0479, "step": 66000 }, { "epoch": 9.803950690628248, "grad_norm": 1.339169979095459, "learning_rate": 1.960493093717511e-07, "loss": 0.0657, "step": 66010 }, { "epoch": 9.805435912668944, "grad_norm": 1.0724490880966187, "learning_rate": 1.94564087331056e-07, "loss": 0.0589, "step": 66020 }, { "epoch": 9.80692113470964, "grad_norm": 1.7160189151763916, "learning_rate": 1.9307886529036095e-07, "loss": 0.0549, "step": 66030 }, { "epoch": 9.808406356750334, "grad_norm": 0.39401769638061523, "learning_rate": 1.9159364324966584e-07, "loss": 0.0379, "step": 66040 }, { "epoch": 9.80989157879103, "grad_norm": 1.2391009330749512, "learning_rate": 1.9010842120897075e-07, "loss": 0.043, "step": 66050 }, { "epoch": 9.811376800831724, "grad_norm": 1.3258800506591797, "learning_rate": 1.8862319916827566e-07, "loss": 0.0543, "step": 66060 }, { "epoch": 9.81286202287242, "grad_norm": 1.50371515750885, "learning_rate": 1.871379771275806e-07, "loss": 0.0575, "step": 66070 }, { "epoch": 9.814347244913115, "grad_norm": 1.2874075174331665, "learning_rate": 1.856527550868855e-07, "loss": 0.0517, "step": 66080 }, { "epoch": 9.81583246695381, "grad_norm": 0.9409539699554443, "learning_rate": 1.841675330461904e-07, "loss": 0.0681, "step": 66090 }, { "epoch": 9.817317688994505, "grad_norm": 1.2464892864227295, "learning_rate": 1.8268231100549535e-07, "loss": 0.0658, "step": 66100 }, { "epoch": 9.818802911035199, "grad_norm": 0.7039570212364197, "learning_rate": 1.8119708896480026e-07, "loss": 0.0434, "step": 66110 }, { "epoch": 9.820288133075895, "grad_norm": 0.4767332375049591, "learning_rate": 1.7971186692410517e-07, "loss": 0.0494, "step": 66120 }, { "epoch": 9.82177335511659, "grad_norm": 1.12473464012146, "learning_rate": 1.782266448834101e-07, "loss": 0.0671, "step": 66130 }, { "epoch": 9.823258577157285, "grad_norm": 1.3426387310028076, "learning_rate": 1.7674142284271501e-07, "loss": 0.0428, "step": 66140 }, { "epoch": 9.82474379919798, "grad_norm": 1.6674333810806274, "learning_rate": 1.752562008020199e-07, "loss": 0.0468, "step": 66150 }, { "epoch": 9.826229021238674, "grad_norm": 0.9544386863708496, "learning_rate": 1.737709787613248e-07, "loss": 0.0414, "step": 66160 }, { "epoch": 9.82771424327937, "grad_norm": 0.5816496014595032, "learning_rate": 1.7228575672062974e-07, "loss": 0.0463, "step": 66170 }, { "epoch": 9.829199465320066, "grad_norm": 0.7869675755500793, "learning_rate": 1.7080053467993465e-07, "loss": 0.0566, "step": 66180 }, { "epoch": 9.83068468736076, "grad_norm": 1.1577743291854858, "learning_rate": 1.6931531263923956e-07, "loss": 0.0625, "step": 66190 }, { "epoch": 9.832169909401456, "grad_norm": 0.6714699268341064, "learning_rate": 1.678300905985445e-07, "loss": 0.0618, "step": 66200 }, { "epoch": 9.83365513144215, "grad_norm": 0.582687258720398, "learning_rate": 1.663448685578494e-07, "loss": 0.0516, "step": 66210 }, { "epoch": 9.835140353482846, "grad_norm": 1.4580570459365845, "learning_rate": 1.6485964651715432e-07, "loss": 0.0445, "step": 66220 }, { "epoch": 9.836625575523541, "grad_norm": 1.1883230209350586, "learning_rate": 1.6337442447645926e-07, "loss": 0.0511, "step": 66230 }, { "epoch": 9.838110797564235, "grad_norm": 0.6525833606719971, "learning_rate": 1.6188920243576417e-07, "loss": 0.0499, "step": 66240 }, { "epoch": 9.839596019604931, "grad_norm": 1.5722192525863647, "learning_rate": 1.6040398039506908e-07, "loss": 0.0646, "step": 66250 }, { "epoch": 9.841081241645625, "grad_norm": 0.9194024205207825, "learning_rate": 1.58918758354374e-07, "loss": 0.0496, "step": 66260 }, { "epoch": 9.842566463686321, "grad_norm": 0.7903993129730225, "learning_rate": 1.5743353631367892e-07, "loss": 0.0674, "step": 66270 }, { "epoch": 9.844051685727017, "grad_norm": 1.3717644214630127, "learning_rate": 1.5594831427298383e-07, "loss": 0.0655, "step": 66280 }, { "epoch": 9.84553690776771, "grad_norm": 0.5210042595863342, "learning_rate": 1.5446309223228874e-07, "loss": 0.0614, "step": 66290 }, { "epoch": 9.847022129808407, "grad_norm": 1.4641269445419312, "learning_rate": 1.5297787019159365e-07, "loss": 0.0477, "step": 66300 }, { "epoch": 9.8485073518491, "grad_norm": 1.273045539855957, "learning_rate": 1.5149264815089856e-07, "loss": 0.0508, "step": 66310 }, { "epoch": 9.849992573889796, "grad_norm": 0.6448570489883423, "learning_rate": 1.500074261102035e-07, "loss": 0.0559, "step": 66320 }, { "epoch": 9.851477795930492, "grad_norm": 0.7389506697654724, "learning_rate": 1.485222040695084e-07, "loss": 0.0489, "step": 66330 }, { "epoch": 9.852963017971186, "grad_norm": 0.46983444690704346, "learning_rate": 1.4703698202881332e-07, "loss": 0.0475, "step": 66340 }, { "epoch": 9.854448240011882, "grad_norm": 0.7253286242485046, "learning_rate": 1.4555175998811823e-07, "loss": 0.055, "step": 66350 }, { "epoch": 9.855933462052576, "grad_norm": 0.9931745529174805, "learning_rate": 1.4406653794742314e-07, "loss": 0.0323, "step": 66360 }, { "epoch": 9.857418684093272, "grad_norm": 0.8444429636001587, "learning_rate": 1.4258131590672807e-07, "loss": 0.0501, "step": 66370 }, { "epoch": 9.858903906133968, "grad_norm": 1.6108314990997314, "learning_rate": 1.4109609386603298e-07, "loss": 0.0533, "step": 66380 }, { "epoch": 9.860389128174662, "grad_norm": 1.9255098104476929, "learning_rate": 1.396108718253379e-07, "loss": 0.04, "step": 66390 }, { "epoch": 9.861874350215357, "grad_norm": 0.5105013847351074, "learning_rate": 1.3812564978464283e-07, "loss": 0.0402, "step": 66400 }, { "epoch": 9.863359572256051, "grad_norm": 0.756027340888977, "learning_rate": 1.3664042774394774e-07, "loss": 0.0375, "step": 66410 }, { "epoch": 9.864844794296747, "grad_norm": 0.6383519172668457, "learning_rate": 1.3515520570325265e-07, "loss": 0.0508, "step": 66420 }, { "epoch": 9.866330016337443, "grad_norm": 1.1964900493621826, "learning_rate": 1.3366998366255756e-07, "loss": 0.0611, "step": 66430 }, { "epoch": 9.867815238378137, "grad_norm": 1.084910273551941, "learning_rate": 1.3218476162186247e-07, "loss": 0.0555, "step": 66440 }, { "epoch": 9.869300460418833, "grad_norm": 1.2565182447433472, "learning_rate": 1.306995395811674e-07, "loss": 0.0492, "step": 66450 }, { "epoch": 9.870785682459527, "grad_norm": 0.6368108987808228, "learning_rate": 1.2921431754047231e-07, "loss": 0.047, "step": 66460 }, { "epoch": 9.872270904500223, "grad_norm": 0.6076331734657288, "learning_rate": 1.2772909549977722e-07, "loss": 0.0576, "step": 66470 }, { "epoch": 9.873756126540918, "grad_norm": 0.9494715929031372, "learning_rate": 1.2624387345908213e-07, "loss": 0.0573, "step": 66480 }, { "epoch": 9.875241348581612, "grad_norm": 1.1798850297927856, "learning_rate": 1.2475865141838707e-07, "loss": 0.0504, "step": 66490 }, { "epoch": 9.876726570622308, "grad_norm": 1.0333198308944702, "learning_rate": 1.2327342937769198e-07, "loss": 0.0344, "step": 66500 }, { "epoch": 9.878211792663002, "grad_norm": 0.6598351001739502, "learning_rate": 1.217882073369969e-07, "loss": 0.068, "step": 66510 }, { "epoch": 9.879697014703698, "grad_norm": 1.9670634269714355, "learning_rate": 1.203029852963018e-07, "loss": 0.0524, "step": 66520 }, { "epoch": 9.881182236744394, "grad_norm": 0.5597050786018372, "learning_rate": 1.1881776325560671e-07, "loss": 0.0365, "step": 66530 }, { "epoch": 9.882667458785088, "grad_norm": 0.2909705638885498, "learning_rate": 1.1733254121491163e-07, "loss": 0.054, "step": 66540 }, { "epoch": 9.884152680825784, "grad_norm": 1.0502877235412598, "learning_rate": 1.1584731917421656e-07, "loss": 0.0515, "step": 66550 }, { "epoch": 9.88563790286648, "grad_norm": 1.1342244148254395, "learning_rate": 1.1436209713352147e-07, "loss": 0.0417, "step": 66560 }, { "epoch": 9.887123124907173, "grad_norm": 0.6780197024345398, "learning_rate": 1.1287687509282639e-07, "loss": 0.0581, "step": 66570 }, { "epoch": 9.88860834694787, "grad_norm": 0.6926343441009521, "learning_rate": 1.113916530521313e-07, "loss": 0.0429, "step": 66580 }, { "epoch": 9.890093568988563, "grad_norm": 1.3560590744018555, "learning_rate": 1.0990643101143622e-07, "loss": 0.0805, "step": 66590 }, { "epoch": 9.891578791029259, "grad_norm": 0.8795716166496277, "learning_rate": 1.0842120897074115e-07, "loss": 0.0564, "step": 66600 }, { "epoch": 9.893064013069955, "grad_norm": 0.9842681288719177, "learning_rate": 1.0693598693004604e-07, "loss": 0.0617, "step": 66610 }, { "epoch": 9.894549235110649, "grad_norm": 0.8339792490005493, "learning_rate": 1.0545076488935096e-07, "loss": 0.064, "step": 66620 }, { "epoch": 9.896034457151345, "grad_norm": 0.9595751762390137, "learning_rate": 1.0396554284865587e-07, "loss": 0.056, "step": 66630 }, { "epoch": 9.897519679192039, "grad_norm": 1.2626688480377197, "learning_rate": 1.024803208079608e-07, "loss": 0.0718, "step": 66640 }, { "epoch": 9.899004901232734, "grad_norm": 1.0672602653503418, "learning_rate": 1.0099509876726572e-07, "loss": 0.0753, "step": 66650 }, { "epoch": 9.90049012327343, "grad_norm": 0.8560225963592529, "learning_rate": 9.950987672657063e-08, "loss": 0.0573, "step": 66660 }, { "epoch": 9.901975345314124, "grad_norm": 1.9473977088928223, "learning_rate": 9.802465468587555e-08, "loss": 0.0463, "step": 66670 }, { "epoch": 9.90346056735482, "grad_norm": 0.4703368544578552, "learning_rate": 9.653943264518048e-08, "loss": 0.0461, "step": 66680 }, { "epoch": 9.904945789395514, "grad_norm": 1.5709819793701172, "learning_rate": 9.505421060448537e-08, "loss": 0.0431, "step": 66690 }, { "epoch": 9.90643101143621, "grad_norm": 1.1744906902313232, "learning_rate": 9.35689885637903e-08, "loss": 0.0594, "step": 66700 }, { "epoch": 9.907916233476906, "grad_norm": 1.490570068359375, "learning_rate": 9.20837665230952e-08, "loss": 0.0701, "step": 66710 }, { "epoch": 9.9094014555176, "grad_norm": 0.3112143278121948, "learning_rate": 9.059854448240013e-08, "loss": 0.0532, "step": 66720 }, { "epoch": 9.910886677558295, "grad_norm": 0.5256125330924988, "learning_rate": 8.911332244170505e-08, "loss": 0.0612, "step": 66730 }, { "epoch": 9.91237189959899, "grad_norm": 0.4340265989303589, "learning_rate": 8.762810040100995e-08, "loss": 0.0495, "step": 66740 }, { "epoch": 9.913857121639685, "grad_norm": 1.5605214834213257, "learning_rate": 8.614287836031487e-08, "loss": 0.0502, "step": 66750 }, { "epoch": 9.915342343680381, "grad_norm": 0.9525371193885803, "learning_rate": 8.465765631961978e-08, "loss": 0.0493, "step": 66760 }, { "epoch": 9.916827565721075, "grad_norm": 1.05837881565094, "learning_rate": 8.31724342789247e-08, "loss": 0.061, "step": 66770 }, { "epoch": 9.91831278776177, "grad_norm": 1.485148549079895, "learning_rate": 8.168721223822963e-08, "loss": 0.058, "step": 66780 }, { "epoch": 9.919798009802465, "grad_norm": 0.5239309072494507, "learning_rate": 8.020199019753454e-08, "loss": 0.0506, "step": 66790 }, { "epoch": 9.92128323184316, "grad_norm": 0.8405706286430359, "learning_rate": 7.871676815683946e-08, "loss": 0.0613, "step": 66800 }, { "epoch": 9.922768453883856, "grad_norm": 0.7404711246490479, "learning_rate": 7.723154611614437e-08, "loss": 0.0368, "step": 66810 }, { "epoch": 9.92425367592455, "grad_norm": 0.8041830658912659, "learning_rate": 7.574632407544928e-08, "loss": 0.0613, "step": 66820 }, { "epoch": 9.925738897965246, "grad_norm": 0.6040468215942383, "learning_rate": 7.42611020347542e-08, "loss": 0.0516, "step": 66830 }, { "epoch": 9.92722412000594, "grad_norm": 0.5927002429962158, "learning_rate": 7.277587999405911e-08, "loss": 0.0505, "step": 66840 }, { "epoch": 9.928709342046636, "grad_norm": 0.4314495921134949, "learning_rate": 7.129065795336404e-08, "loss": 0.0552, "step": 66850 }, { "epoch": 9.930194564087332, "grad_norm": 0.20560452342033386, "learning_rate": 6.980543591266895e-08, "loss": 0.0408, "step": 66860 }, { "epoch": 9.931679786128026, "grad_norm": 0.6640859246253967, "learning_rate": 6.832021387197387e-08, "loss": 0.0317, "step": 66870 }, { "epoch": 9.933165008168721, "grad_norm": 1.0623282194137573, "learning_rate": 6.683499183127878e-08, "loss": 0.0501, "step": 66880 }, { "epoch": 9.934650230209416, "grad_norm": 0.9650203585624695, "learning_rate": 6.53497697905837e-08, "loss": 0.06, "step": 66890 }, { "epoch": 9.936135452250111, "grad_norm": 0.383526474237442, "learning_rate": 6.386454774988861e-08, "loss": 0.0483, "step": 66900 }, { "epoch": 9.937620674290807, "grad_norm": 0.9319167137145996, "learning_rate": 6.237932570919354e-08, "loss": 0.0456, "step": 66910 }, { "epoch": 9.939105896331501, "grad_norm": 1.202764630317688, "learning_rate": 6.089410366849845e-08, "loss": 0.0466, "step": 66920 }, { "epoch": 9.940591118372197, "grad_norm": 1.169226050376892, "learning_rate": 5.9408881627803355e-08, "loss": 0.0744, "step": 66930 }, { "epoch": 9.942076340412891, "grad_norm": 1.2179229259490967, "learning_rate": 5.792365958710828e-08, "loss": 0.0421, "step": 66940 }, { "epoch": 9.943561562453587, "grad_norm": 0.5202845335006714, "learning_rate": 5.6438437546413195e-08, "loss": 0.0482, "step": 66950 }, { "epoch": 9.945046784494282, "grad_norm": 1.294660210609436, "learning_rate": 5.495321550571811e-08, "loss": 0.0549, "step": 66960 }, { "epoch": 9.946532006534976, "grad_norm": 0.7863031029701233, "learning_rate": 5.346799346502302e-08, "loss": 0.0339, "step": 66970 }, { "epoch": 9.948017228575672, "grad_norm": 0.7727733254432678, "learning_rate": 5.198277142432794e-08, "loss": 0.0492, "step": 66980 }, { "epoch": 9.949502450616368, "grad_norm": 0.6926112771034241, "learning_rate": 5.049754938363286e-08, "loss": 0.0485, "step": 66990 }, { "epoch": 9.950987672657062, "grad_norm": 0.7342759370803833, "learning_rate": 4.901232734293778e-08, "loss": 0.053, "step": 67000 }, { "epoch": 9.952472894697758, "grad_norm": 0.6734324097633362, "learning_rate": 4.7527105302242687e-08, "loss": 0.0588, "step": 67010 }, { "epoch": 9.953958116738452, "grad_norm": 0.7823164463043213, "learning_rate": 4.60418832615476e-08, "loss": 0.0371, "step": 67020 }, { "epoch": 9.955443338779148, "grad_norm": 0.833852231502533, "learning_rate": 4.4556661220852526e-08, "loss": 0.0368, "step": 67030 }, { "epoch": 9.956928560819843, "grad_norm": 0.5555514097213745, "learning_rate": 4.3071439180157436e-08, "loss": 0.0685, "step": 67040 }, { "epoch": 9.958413782860537, "grad_norm": 0.9500750303268433, "learning_rate": 4.158621713946235e-08, "loss": 0.0422, "step": 67050 }, { "epoch": 9.959899004901233, "grad_norm": 0.4888717830181122, "learning_rate": 4.010099509876727e-08, "loss": 0.0566, "step": 67060 }, { "epoch": 9.961384226941927, "grad_norm": 0.9437296986579895, "learning_rate": 3.8615773058072185e-08, "loss": 0.0503, "step": 67070 }, { "epoch": 9.962869448982623, "grad_norm": 0.8006777763366699, "learning_rate": 3.71305510173771e-08, "loss": 0.0775, "step": 67080 }, { "epoch": 9.964354671023319, "grad_norm": 0.37248167395591736, "learning_rate": 3.564532897668202e-08, "loss": 0.0616, "step": 67090 }, { "epoch": 9.965839893064013, "grad_norm": 1.2399548292160034, "learning_rate": 3.4160106935986935e-08, "loss": 0.0531, "step": 67100 }, { "epoch": 9.967325115104709, "grad_norm": 1.3255739212036133, "learning_rate": 3.267488489529185e-08, "loss": 0.0571, "step": 67110 }, { "epoch": 9.968810337145403, "grad_norm": 0.7814356684684753, "learning_rate": 3.118966285459677e-08, "loss": 0.0501, "step": 67120 }, { "epoch": 9.970295559186098, "grad_norm": 0.7996913194656372, "learning_rate": 2.9704440813901678e-08, "loss": 0.0431, "step": 67130 }, { "epoch": 9.971780781226794, "grad_norm": 0.25158068537712097, "learning_rate": 2.8219218773206597e-08, "loss": 0.042, "step": 67140 }, { "epoch": 9.973266003267488, "grad_norm": 0.4499381482601166, "learning_rate": 2.673399673251151e-08, "loss": 0.0465, "step": 67150 }, { "epoch": 9.974751225308184, "grad_norm": 1.553957462310791, "learning_rate": 2.524877469181643e-08, "loss": 0.0501, "step": 67160 }, { "epoch": 9.976236447348878, "grad_norm": 0.17447112500667572, "learning_rate": 2.3763552651121343e-08, "loss": 0.0504, "step": 67170 }, { "epoch": 9.977721669389574, "grad_norm": 0.9022453427314758, "learning_rate": 2.2278330610426263e-08, "loss": 0.0535, "step": 67180 }, { "epoch": 9.97920689143027, "grad_norm": 1.0982049703598022, "learning_rate": 2.0793108569731176e-08, "loss": 0.0615, "step": 67190 }, { "epoch": 9.980692113470964, "grad_norm": 0.504771888256073, "learning_rate": 1.9307886529036093e-08, "loss": 0.0462, "step": 67200 }, { "epoch": 9.98217733551166, "grad_norm": 0.8339830636978149, "learning_rate": 1.782266448834101e-08, "loss": 0.06, "step": 67210 }, { "epoch": 9.983662557552353, "grad_norm": 0.9843574166297913, "learning_rate": 1.6337442447645926e-08, "loss": 0.0567, "step": 67220 }, { "epoch": 9.98514777959305, "grad_norm": 1.6802273988723755, "learning_rate": 1.4852220406950839e-08, "loss": 0.0608, "step": 67230 }, { "epoch": 9.986633001633745, "grad_norm": 0.8817097544670105, "learning_rate": 1.3366998366255755e-08, "loss": 0.0537, "step": 67240 }, { "epoch": 9.988118223674439, "grad_norm": 0.6457740664482117, "learning_rate": 1.1881776325560672e-08, "loss": 0.0749, "step": 67250 }, { "epoch": 9.989603445715135, "grad_norm": 1.4911023378372192, "learning_rate": 1.0396554284865588e-08, "loss": 0.0494, "step": 67260 }, { "epoch": 9.991088667755829, "grad_norm": 0.6926244497299194, "learning_rate": 8.911332244170505e-09, "loss": 0.047, "step": 67270 }, { "epoch": 9.992573889796525, "grad_norm": 1.2885057926177979, "learning_rate": 7.426110203475419e-09, "loss": 0.0553, "step": 67280 }, { "epoch": 9.99405911183722, "grad_norm": 0.38584211468696594, "learning_rate": 5.940888162780336e-09, "loss": 0.052, "step": 67290 }, { "epoch": 9.995544333877914, "grad_norm": 0.6076240539550781, "learning_rate": 4.455666122085252e-09, "loss": 0.0729, "step": 67300 }, { "epoch": 9.99702955591861, "grad_norm": 0.7656409740447998, "learning_rate": 2.970444081390168e-09, "loss": 0.067, "step": 67310 }, { "epoch": 9.998514777959304, "grad_norm": 0.22936700284481049, "learning_rate": 1.485222040695084e-09, "loss": 0.0667, "step": 67320 }, { "epoch": 10.0, "grad_norm": 0.6690473556518555, "learning_rate": 0.0, "loss": 0.0389, "step": 67330 }, { "epoch": 10.0, "eval_accuracy": 0.49727767695099817, "eval_loss": 0.05480470508337021, "eval_runtime": 204.2655, "eval_samples_per_second": 186.125, "eval_steps_per_second": 5.821, "step": 67330 }, { "epoch": 10.0, "step": 67330, "total_flos": 1.6694802102985576e+20, "train_loss": 0.0, "train_runtime": 0.0767, "train_samples_per_second": 28073613.463, "train_steps_per_second": 877361.502 } ], "logging_steps": 10, "max_steps": 67330, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.6694802102985576e+20, "train_batch_size": 32, "trial_name": null, "trial_params": null }