{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.999683663569395, "eval_steps": 500, "global_step": 2580, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0003874742882051919, "grad_norm": 0.6923572421073914, "learning_rate": 1.9230769230769234e-07, "loss": 2.0671, "step": 1 }, { "epoch": 0.0007749485764103837, "grad_norm": 0.6222718954086304, "learning_rate": 3.846153846153847e-07, "loss": 2.0267, "step": 2 }, { "epoch": 0.0011624228646155755, "grad_norm": 0.6504022479057312, "learning_rate": 5.76923076923077e-07, "loss": 2.0381, "step": 3 }, { "epoch": 0.0015498971528207675, "grad_norm": 0.7175610065460205, "learning_rate": 7.692307692307694e-07, "loss": 2.0284, "step": 4 }, { "epoch": 0.0019373714410259593, "grad_norm": 0.6393950581550598, "learning_rate": 9.615384615384617e-07, "loss": 2.0609, "step": 5 }, { "epoch": 0.002324845729231151, "grad_norm": 0.5405275821685791, "learning_rate": 1.153846153846154e-06, "loss": 1.9393, "step": 6 }, { "epoch": 0.002712320017436343, "grad_norm": 0.6147302985191345, "learning_rate": 1.3461538461538462e-06, "loss": 2.0118, "step": 7 }, { "epoch": 0.003099794305641535, "grad_norm": 0.7042288780212402, "learning_rate": 1.5384615384615387e-06, "loss": 2.0821, "step": 8 }, { "epoch": 0.0034872685938467265, "grad_norm": 0.6672390699386597, "learning_rate": 1.7307692307692308e-06, "loss": 2.0309, "step": 9 }, { "epoch": 0.0038747428820519185, "grad_norm": 0.6298902034759521, "learning_rate": 1.9230769230769234e-06, "loss": 2.0245, "step": 10 }, { "epoch": 0.00426221717025711, "grad_norm": 0.5852465033531189, "learning_rate": 2.1153846153846155e-06, "loss": 2.0391, "step": 11 }, { "epoch": 0.004649691458462302, "grad_norm": 0.6079792380332947, "learning_rate": 2.307692307692308e-06, "loss": 2.0193, "step": 12 }, { "epoch": 0.005037165746667494, "grad_norm": 0.5806028842926025, "learning_rate": 2.5e-06, "loss": 1.9828, "step": 13 }, { "epoch": 0.005424640034872686, "grad_norm": 0.5376424193382263, "learning_rate": 2.6923076923076923e-06, "loss": 1.974, "step": 14 }, { "epoch": 0.005812114323077878, "grad_norm": 0.5606303215026855, "learning_rate": 2.8846153846153845e-06, "loss": 2.0136, "step": 15 }, { "epoch": 0.00619958861128307, "grad_norm": 0.6936549544334412, "learning_rate": 3.0769230769230774e-06, "loss": 2.0205, "step": 16 }, { "epoch": 0.006587062899488261, "grad_norm": 0.6454318761825562, "learning_rate": 3.2692307692307696e-06, "loss": 2.0307, "step": 17 }, { "epoch": 0.006974537187693453, "grad_norm": 0.6546300053596497, "learning_rate": 3.4615384615384617e-06, "loss": 2.0448, "step": 18 }, { "epoch": 0.007362011475898645, "grad_norm": 0.6601698398590088, "learning_rate": 3.653846153846154e-06, "loss": 2.0685, "step": 19 }, { "epoch": 0.007749485764103837, "grad_norm": 0.6249769330024719, "learning_rate": 3.846153846153847e-06, "loss": 2.0011, "step": 20 }, { "epoch": 0.008136960052309029, "grad_norm": 0.5810555219650269, "learning_rate": 4.0384615384615385e-06, "loss": 1.9182, "step": 21 }, { "epoch": 0.00852443434051422, "grad_norm": 0.5300651788711548, "learning_rate": 4.230769230769231e-06, "loss": 1.9143, "step": 22 }, { "epoch": 0.008911908628719413, "grad_norm": 0.5637993216514587, "learning_rate": 4.423076923076924e-06, "loss": 1.9273, "step": 23 }, { "epoch": 0.009299382916924604, "grad_norm": 0.5467361211776733, "learning_rate": 4.615384615384616e-06, "loss": 1.9453, "step": 24 }, { "epoch": 0.009686857205129797, "grad_norm": 0.40556156635284424, "learning_rate": 4.807692307692308e-06, "loss": 1.9061, "step": 25 }, { "epoch": 0.010074331493334988, "grad_norm": 0.5371876358985901, "learning_rate": 5e-06, "loss": 1.9252, "step": 26 }, { "epoch": 0.01046180578154018, "grad_norm": 0.45392119884490967, "learning_rate": 5.192307692307693e-06, "loss": 1.9611, "step": 27 }, { "epoch": 0.010849280069745372, "grad_norm": 0.49052709341049194, "learning_rate": 5.384615384615385e-06, "loss": 1.9404, "step": 28 }, { "epoch": 0.011236754357950563, "grad_norm": 0.4428410828113556, "learning_rate": 5.576923076923077e-06, "loss": 2.011, "step": 29 }, { "epoch": 0.011624228646155756, "grad_norm": 0.40356844663619995, "learning_rate": 5.769230769230769e-06, "loss": 1.9543, "step": 30 }, { "epoch": 0.012011702934360947, "grad_norm": 0.3948465585708618, "learning_rate": 5.961538461538462e-06, "loss": 1.9192, "step": 31 }, { "epoch": 0.01239917722256614, "grad_norm": 0.37439030408859253, "learning_rate": 6.153846153846155e-06, "loss": 1.9019, "step": 32 }, { "epoch": 0.012786651510771331, "grad_norm": 0.37043169140815735, "learning_rate": 6.3461538461538466e-06, "loss": 1.9439, "step": 33 }, { "epoch": 0.013174125798976522, "grad_norm": 0.31158241629600525, "learning_rate": 6.538461538461539e-06, "loss": 1.9211, "step": 34 }, { "epoch": 0.013561600087181715, "grad_norm": 0.28980252146720886, "learning_rate": 6.730769230769232e-06, "loss": 1.9366, "step": 35 }, { "epoch": 0.013949074375386906, "grad_norm": 0.329671174287796, "learning_rate": 6.923076923076923e-06, "loss": 1.9818, "step": 36 }, { "epoch": 0.014336548663592099, "grad_norm": 0.2794431746006012, "learning_rate": 7.115384615384616e-06, "loss": 1.9458, "step": 37 }, { "epoch": 0.01472402295179729, "grad_norm": 0.26127520203590393, "learning_rate": 7.307692307692308e-06, "loss": 1.8725, "step": 38 }, { "epoch": 0.015111497240002483, "grad_norm": 0.2772791385650635, "learning_rate": 7.500000000000001e-06, "loss": 1.8983, "step": 39 }, { "epoch": 0.015498971528207674, "grad_norm": 0.2964678406715393, "learning_rate": 7.692307692307694e-06, "loss": 1.8995, "step": 40 }, { "epoch": 0.015886445816412867, "grad_norm": 0.27993109822273254, "learning_rate": 7.884615384615384e-06, "loss": 1.8981, "step": 41 }, { "epoch": 0.016273920104618058, "grad_norm": 0.2533140778541565, "learning_rate": 8.076923076923077e-06, "loss": 1.8636, "step": 42 }, { "epoch": 0.01666139439282325, "grad_norm": 0.28240644931793213, "learning_rate": 8.26923076923077e-06, "loss": 1.8981, "step": 43 }, { "epoch": 0.01704886868102844, "grad_norm": 0.2842380702495575, "learning_rate": 8.461538461538462e-06, "loss": 1.8879, "step": 44 }, { "epoch": 0.017436342969233635, "grad_norm": 0.27180027961730957, "learning_rate": 8.653846153846155e-06, "loss": 1.8844, "step": 45 }, { "epoch": 0.017823817257438826, "grad_norm": 0.26375389099121094, "learning_rate": 8.846153846153847e-06, "loss": 1.8445, "step": 46 }, { "epoch": 0.018211291545644017, "grad_norm": 0.3037245571613312, "learning_rate": 9.03846153846154e-06, "loss": 1.8536, "step": 47 }, { "epoch": 0.018598765833849208, "grad_norm": 0.27242764830589294, "learning_rate": 9.230769230769232e-06, "loss": 1.8143, "step": 48 }, { "epoch": 0.0189862401220544, "grad_norm": 0.29574307799339294, "learning_rate": 9.423076923076923e-06, "loss": 1.8664, "step": 49 }, { "epoch": 0.019373714410259594, "grad_norm": 0.2600277066230774, "learning_rate": 9.615384615384616e-06, "loss": 1.8776, "step": 50 }, { "epoch": 0.019761188698464785, "grad_norm": 0.2444121092557907, "learning_rate": 9.807692307692308e-06, "loss": 1.864, "step": 51 }, { "epoch": 0.020148662986669976, "grad_norm": 0.22739152610301971, "learning_rate": 1e-05, "loss": 1.8806, "step": 52 }, { "epoch": 0.020536137274875167, "grad_norm": 0.23612315952777863, "learning_rate": 9.99999613912662e-06, "loss": 1.8185, "step": 53 }, { "epoch": 0.02092361156308036, "grad_norm": 0.22748690843582153, "learning_rate": 9.99998455651244e-06, "loss": 1.8411, "step": 54 }, { "epoch": 0.021311085851285553, "grad_norm": 0.20360301434993744, "learning_rate": 9.999965252175349e-06, "loss": 1.7778, "step": 55 }, { "epoch": 0.021698560139490744, "grad_norm": 0.20186133682727814, "learning_rate": 9.99993822614516e-06, "loss": 1.8403, "step": 56 }, { "epoch": 0.022086034427695935, "grad_norm": 0.19679364562034607, "learning_rate": 9.99990347846361e-06, "loss": 1.8415, "step": 57 }, { "epoch": 0.022473508715901126, "grad_norm": 0.179094597697258, "learning_rate": 9.999861009184362e-06, "loss": 1.8101, "step": 58 }, { "epoch": 0.02286098300410632, "grad_norm": 0.1937316656112671, "learning_rate": 9.999810818373e-06, "loss": 1.8686, "step": 59 }, { "epoch": 0.023248457292311512, "grad_norm": 0.20137861371040344, "learning_rate": 9.999752906107043e-06, "loss": 1.8474, "step": 60 }, { "epoch": 0.023635931580516703, "grad_norm": 0.17836111783981323, "learning_rate": 9.999687272475923e-06, "loss": 1.8179, "step": 61 }, { "epoch": 0.024023405868721894, "grad_norm": 0.19433969259262085, "learning_rate": 9.999613917581e-06, "loss": 1.8086, "step": 62 }, { "epoch": 0.024410880156927085, "grad_norm": 0.1826869249343872, "learning_rate": 9.999532841535565e-06, "loss": 1.782, "step": 63 }, { "epoch": 0.02479835444513228, "grad_norm": 0.2032211571931839, "learning_rate": 9.999444044464823e-06, "loss": 1.8572, "step": 64 }, { "epoch": 0.02518582873333747, "grad_norm": 0.16769559681415558, "learning_rate": 9.99934752650591e-06, "loss": 1.8124, "step": 65 }, { "epoch": 0.025573303021542662, "grad_norm": 0.17377783358097076, "learning_rate": 9.99924328780788e-06, "loss": 1.7751, "step": 66 }, { "epoch": 0.025960777309747853, "grad_norm": 0.18869639933109283, "learning_rate": 9.999131328531719e-06, "loss": 1.8351, "step": 67 }, { "epoch": 0.026348251597953044, "grad_norm": 0.15961916744709015, "learning_rate": 9.999011648850328e-06, "loss": 1.7477, "step": 68 }, { "epoch": 0.02673572588615824, "grad_norm": 0.17454583942890167, "learning_rate": 9.998884248948536e-06, "loss": 1.7576, "step": 69 }, { "epoch": 0.02712320017436343, "grad_norm": 0.1512240618467331, "learning_rate": 9.998749129023091e-06, "loss": 1.7495, "step": 70 }, { "epoch": 0.02751067446256862, "grad_norm": 0.16514761745929718, "learning_rate": 9.998606289282667e-06, "loss": 1.824, "step": 71 }, { "epoch": 0.027898148750773812, "grad_norm": 0.1524139642715454, "learning_rate": 9.998455729947859e-06, "loss": 1.7874, "step": 72 }, { "epoch": 0.028285623038979007, "grad_norm": 0.17041970789432526, "learning_rate": 9.99829745125118e-06, "loss": 1.7904, "step": 73 }, { "epoch": 0.028673097327184198, "grad_norm": 0.14344944059848785, "learning_rate": 9.99813145343707e-06, "loss": 1.7793, "step": 74 }, { "epoch": 0.02906057161538939, "grad_norm": 0.14882387220859528, "learning_rate": 9.997957736761887e-06, "loss": 1.7789, "step": 75 }, { "epoch": 0.02944804590359458, "grad_norm": 0.14182977378368378, "learning_rate": 9.997776301493914e-06, "loss": 1.7593, "step": 76 }, { "epoch": 0.02983552019179977, "grad_norm": 0.14842820167541504, "learning_rate": 9.997587147913344e-06, "loss": 1.8091, "step": 77 }, { "epoch": 0.030222994480004966, "grad_norm": 0.15900954604148865, "learning_rate": 9.997390276312298e-06, "loss": 1.8146, "step": 78 }, { "epoch": 0.030610468768210157, "grad_norm": 0.15745869278907776, "learning_rate": 9.997185686994817e-06, "loss": 1.7652, "step": 79 }, { "epoch": 0.030997943056415348, "grad_norm": 0.15619753301143646, "learning_rate": 9.996973380276858e-06, "loss": 1.7807, "step": 80 }, { "epoch": 0.03138541734462054, "grad_norm": 0.16785477101802826, "learning_rate": 9.996753356486295e-06, "loss": 1.8283, "step": 81 }, { "epoch": 0.031772891632825734, "grad_norm": 0.15593001246452332, "learning_rate": 9.996525615962923e-06, "loss": 1.7539, "step": 82 }, { "epoch": 0.03216036592103092, "grad_norm": 0.1389356106519699, "learning_rate": 9.996290159058453e-06, "loss": 1.8041, "step": 83 }, { "epoch": 0.032547840209236116, "grad_norm": 0.14878007769584656, "learning_rate": 9.99604698613651e-06, "loss": 1.7176, "step": 84 }, { "epoch": 0.03293531449744131, "grad_norm": 0.14064928889274597, "learning_rate": 9.995796097572641e-06, "loss": 1.7569, "step": 85 }, { "epoch": 0.0333227887856465, "grad_norm": 0.1375439316034317, "learning_rate": 9.995537493754306e-06, "loss": 1.7599, "step": 86 }, { "epoch": 0.03371026307385169, "grad_norm": 0.1539166271686554, "learning_rate": 9.995271175080876e-06, "loss": 1.7901, "step": 87 }, { "epoch": 0.03409773736205688, "grad_norm": 0.13793475925922394, "learning_rate": 9.994997141963644e-06, "loss": 1.7192, "step": 88 }, { "epoch": 0.034485211650262075, "grad_norm": 0.1397310048341751, "learning_rate": 9.99471539482581e-06, "loss": 1.7667, "step": 89 }, { "epoch": 0.03487268593846727, "grad_norm": 0.14041666686534882, "learning_rate": 9.994425934102492e-06, "loss": 1.7902, "step": 90 }, { "epoch": 0.03526016022667246, "grad_norm": 0.14461427927017212, "learning_rate": 9.994128760240718e-06, "loss": 1.7843, "step": 91 }, { "epoch": 0.03564763451487765, "grad_norm": 0.13596254587173462, "learning_rate": 9.993823873699427e-06, "loss": 1.7983, "step": 92 }, { "epoch": 0.03603510880308284, "grad_norm": 0.13682232797145844, "learning_rate": 9.993511274949472e-06, "loss": 1.7749, "step": 93 }, { "epoch": 0.036422583091288034, "grad_norm": 0.12191647291183472, "learning_rate": 9.993190964473614e-06, "loss": 1.7999, "step": 94 }, { "epoch": 0.03681005737949323, "grad_norm": 0.15112750232219696, "learning_rate": 9.992862942766523e-06, "loss": 1.6959, "step": 95 }, { "epoch": 0.037197531667698416, "grad_norm": 0.1330232471227646, "learning_rate": 9.992527210334781e-06, "loss": 1.7081, "step": 96 }, { "epoch": 0.03758500595590361, "grad_norm": 0.12039201706647873, "learning_rate": 9.992183767696878e-06, "loss": 1.7759, "step": 97 }, { "epoch": 0.0379724802441088, "grad_norm": 0.11787346750497818, "learning_rate": 9.991832615383203e-06, "loss": 1.7614, "step": 98 }, { "epoch": 0.03835995453231399, "grad_norm": 0.1312962770462036, "learning_rate": 9.991473753936065e-06, "loss": 1.7712, "step": 99 }, { "epoch": 0.03874742882051919, "grad_norm": 0.11712636798620224, "learning_rate": 9.991107183909665e-06, "loss": 1.7016, "step": 100 }, { "epoch": 0.039134903108724375, "grad_norm": 0.14191047847270966, "learning_rate": 9.990732905870122e-06, "loss": 1.7336, "step": 101 }, { "epoch": 0.03952237739692957, "grad_norm": 0.11791345477104187, "learning_rate": 9.990350920395447e-06, "loss": 1.7481, "step": 102 }, { "epoch": 0.03990985168513476, "grad_norm": 0.139199897646904, "learning_rate": 9.989961228075558e-06, "loss": 1.6889, "step": 103 }, { "epoch": 0.04029732597333995, "grad_norm": 0.12002615630626678, "learning_rate": 9.989563829512281e-06, "loss": 1.709, "step": 104 }, { "epoch": 0.04068480026154515, "grad_norm": 0.11881057918071747, "learning_rate": 9.989158725319337e-06, "loss": 1.7689, "step": 105 }, { "epoch": 0.041072274549750334, "grad_norm": 0.13230781257152557, "learning_rate": 9.988745916122344e-06, "loss": 1.7612, "step": 106 }, { "epoch": 0.04145974883795553, "grad_norm": 0.12594729661941528, "learning_rate": 9.98832540255883e-06, "loss": 1.7077, "step": 107 }, { "epoch": 0.04184722312616072, "grad_norm": 0.1312893182039261, "learning_rate": 9.98789718527821e-06, "loss": 1.786, "step": 108 }, { "epoch": 0.04223469741436591, "grad_norm": 0.13741853833198547, "learning_rate": 9.987461264941803e-06, "loss": 1.7147, "step": 109 }, { "epoch": 0.042622171702571106, "grad_norm": 0.11314312368631363, "learning_rate": 9.987017642222821e-06, "loss": 1.7257, "step": 110 }, { "epoch": 0.04300964599077629, "grad_norm": 0.12022025883197784, "learning_rate": 9.986566317806374e-06, "loss": 1.7829, "step": 111 }, { "epoch": 0.04339712027898149, "grad_norm": 0.11993694305419922, "learning_rate": 9.986107292389465e-06, "loss": 1.7713, "step": 112 }, { "epoch": 0.04378459456718668, "grad_norm": 0.10954278707504272, "learning_rate": 9.985640566680987e-06, "loss": 1.7273, "step": 113 }, { "epoch": 0.04417206885539187, "grad_norm": 0.13270458579063416, "learning_rate": 9.98516614140173e-06, "loss": 1.7355, "step": 114 }, { "epoch": 0.044559543143597065, "grad_norm": 0.12262709438800812, "learning_rate": 9.984684017284372e-06, "loss": 1.6893, "step": 115 }, { "epoch": 0.04494701743180225, "grad_norm": 0.11373162269592285, "learning_rate": 9.98419419507348e-06, "loss": 1.7803, "step": 116 }, { "epoch": 0.04533449172000745, "grad_norm": 0.1212875097990036, "learning_rate": 9.98369667552551e-06, "loss": 1.6942, "step": 117 }, { "epoch": 0.04572196600821264, "grad_norm": 0.12154986709356308, "learning_rate": 9.98319145940881e-06, "loss": 1.6839, "step": 118 }, { "epoch": 0.04610944029641783, "grad_norm": 0.10589006543159485, "learning_rate": 9.982678547503606e-06, "loss": 1.6583, "step": 119 }, { "epoch": 0.046496914584623024, "grad_norm": 0.10453012585639954, "learning_rate": 9.982157940602015e-06, "loss": 1.7135, "step": 120 }, { "epoch": 0.04688438887282821, "grad_norm": 0.10810253769159317, "learning_rate": 9.981629639508034e-06, "loss": 1.7213, "step": 121 }, { "epoch": 0.047271863161033406, "grad_norm": 0.11032278090715408, "learning_rate": 9.981093645037549e-06, "loss": 1.6353, "step": 122 }, { "epoch": 0.0476593374492386, "grad_norm": 0.10876409709453583, "learning_rate": 9.980549958018317e-06, "loss": 1.6957, "step": 123 }, { "epoch": 0.04804681173744379, "grad_norm": 0.139864981174469, "learning_rate": 9.979998579289985e-06, "loss": 1.7237, "step": 124 }, { "epoch": 0.04843428602564898, "grad_norm": 0.11574551463127136, "learning_rate": 9.979439509704071e-06, "loss": 1.7049, "step": 125 }, { "epoch": 0.04882176031385417, "grad_norm": 0.10008589178323746, "learning_rate": 9.978872750123978e-06, "loss": 1.6993, "step": 126 }, { "epoch": 0.049209234602059365, "grad_norm": 0.11013998836278915, "learning_rate": 9.978298301424975e-06, "loss": 1.7021, "step": 127 }, { "epoch": 0.04959670889026456, "grad_norm": 0.10174492746591568, "learning_rate": 9.977716164494218e-06, "loss": 1.7008, "step": 128 }, { "epoch": 0.04998418317846975, "grad_norm": 0.13088682293891907, "learning_rate": 9.977126340230725e-06, "loss": 1.634, "step": 129 }, { "epoch": 0.05037165746667494, "grad_norm": 0.10286317020654678, "learning_rate": 9.976528829545391e-06, "loss": 1.6616, "step": 130 }, { "epoch": 0.05075913175488013, "grad_norm": 0.10316441208124161, "learning_rate": 9.975923633360985e-06, "loss": 1.6461, "step": 131 }, { "epoch": 0.051146606043085324, "grad_norm": 0.0927695631980896, "learning_rate": 9.975310752612138e-06, "loss": 1.6568, "step": 132 }, { "epoch": 0.05153408033129052, "grad_norm": 0.11984983086585999, "learning_rate": 9.974690188245353e-06, "loss": 1.6537, "step": 133 }, { "epoch": 0.051921554619495706, "grad_norm": 0.134580597281456, "learning_rate": 9.974061941218998e-06, "loss": 1.6979, "step": 134 }, { "epoch": 0.0523090289077009, "grad_norm": 0.09676618874073029, "learning_rate": 9.973426012503305e-06, "loss": 1.6605, "step": 135 }, { "epoch": 0.05269650319590609, "grad_norm": 0.09218312799930573, "learning_rate": 9.972782403080372e-06, "loss": 1.6831, "step": 136 }, { "epoch": 0.05308397748411128, "grad_norm": 0.09796468913555145, "learning_rate": 9.972131113944155e-06, "loss": 1.6798, "step": 137 }, { "epoch": 0.05347145177231648, "grad_norm": 0.09376774728298187, "learning_rate": 9.971472146100474e-06, "loss": 1.705, "step": 138 }, { "epoch": 0.053858926060521665, "grad_norm": 0.09378914535045624, "learning_rate": 9.970805500567002e-06, "loss": 1.726, "step": 139 }, { "epoch": 0.05424640034872686, "grad_norm": 0.0990544855594635, "learning_rate": 9.970131178373276e-06, "loss": 1.6781, "step": 140 }, { "epoch": 0.054633874636932055, "grad_norm": 0.1037389412522316, "learning_rate": 9.969449180560686e-06, "loss": 1.6783, "step": 141 }, { "epoch": 0.05502134892513724, "grad_norm": 0.1265256255865097, "learning_rate": 9.968759508182471e-06, "loss": 1.6759, "step": 142 }, { "epoch": 0.05540882321334244, "grad_norm": 0.11342137306928635, "learning_rate": 9.968062162303728e-06, "loss": 1.7061, "step": 143 }, { "epoch": 0.055796297501547625, "grad_norm": 0.10088245570659637, "learning_rate": 9.967357144001404e-06, "loss": 1.6467, "step": 144 }, { "epoch": 0.05618377178975282, "grad_norm": 0.11893604695796967, "learning_rate": 9.966644454364291e-06, "loss": 1.6654, "step": 145 }, { "epoch": 0.056571246077958014, "grad_norm": 0.08704584836959839, "learning_rate": 9.965924094493032e-06, "loss": 1.6598, "step": 146 }, { "epoch": 0.0569587203661632, "grad_norm": 0.09753448516130447, "learning_rate": 9.965196065500114e-06, "loss": 1.672, "step": 147 }, { "epoch": 0.057346194654368396, "grad_norm": 0.1020703986287117, "learning_rate": 9.964460368509868e-06, "loss": 1.6088, "step": 148 }, { "epoch": 0.057733668942573584, "grad_norm": 0.09907938539981842, "learning_rate": 9.963717004658468e-06, "loss": 1.6377, "step": 149 }, { "epoch": 0.05812114323077878, "grad_norm": 0.10045703500509262, "learning_rate": 9.962965975093926e-06, "loss": 1.6472, "step": 150 }, { "epoch": 0.05850861751898397, "grad_norm": 0.08324772119522095, "learning_rate": 9.962207280976097e-06, "loss": 1.6502, "step": 151 }, { "epoch": 0.05889609180718916, "grad_norm": 0.09664250165224075, "learning_rate": 9.961440923476666e-06, "loss": 1.637, "step": 152 }, { "epoch": 0.059283566095394355, "grad_norm": 0.11901500076055527, "learning_rate": 9.960666903779159e-06, "loss": 1.6379, "step": 153 }, { "epoch": 0.05967104038359954, "grad_norm": 0.09418801963329315, "learning_rate": 9.959885223078934e-06, "loss": 1.6059, "step": 154 }, { "epoch": 0.06005851467180474, "grad_norm": 0.09874877333641052, "learning_rate": 9.959095882583176e-06, "loss": 1.6699, "step": 155 }, { "epoch": 0.06044598896000993, "grad_norm": 0.13441435992717743, "learning_rate": 9.958298883510904e-06, "loss": 1.6976, "step": 156 }, { "epoch": 0.06083346324821512, "grad_norm": 0.09040838479995728, "learning_rate": 9.957494227092961e-06, "loss": 1.622, "step": 157 }, { "epoch": 0.061220937536420314, "grad_norm": 0.08333111554384232, "learning_rate": 9.956681914572021e-06, "loss": 1.6076, "step": 158 }, { "epoch": 0.0616084118246255, "grad_norm": 0.1243152767419815, "learning_rate": 9.955861947202578e-06, "loss": 1.6554, "step": 159 }, { "epoch": 0.061995886112830696, "grad_norm": 0.09115622192621231, "learning_rate": 9.955034326250947e-06, "loss": 1.6489, "step": 160 }, { "epoch": 0.06238336040103589, "grad_norm": 0.08486997336149216, "learning_rate": 9.954199052995261e-06, "loss": 1.6463, "step": 161 }, { "epoch": 0.06277083468924108, "grad_norm": 0.09140261262655258, "learning_rate": 9.953356128725479e-06, "loss": 1.6407, "step": 162 }, { "epoch": 0.06315830897744627, "grad_norm": 0.10655142366886139, "learning_rate": 9.952505554743368e-06, "loss": 1.6166, "step": 163 }, { "epoch": 0.06354578326565147, "grad_norm": 0.10238759219646454, "learning_rate": 9.951647332362511e-06, "loss": 1.685, "step": 164 }, { "epoch": 0.06393325755385666, "grad_norm": 0.1427789330482483, "learning_rate": 9.950781462908303e-06, "loss": 1.6619, "step": 165 }, { "epoch": 0.06432073184206184, "grad_norm": 0.13144421577453613, "learning_rate": 9.94990794771795e-06, "loss": 1.6697, "step": 166 }, { "epoch": 0.06470820613026704, "grad_norm": 0.0965215414762497, "learning_rate": 9.949026788140463e-06, "loss": 1.6376, "step": 167 }, { "epoch": 0.06509568041847223, "grad_norm": 0.08267915993928909, "learning_rate": 9.948137985536663e-06, "loss": 1.6131, "step": 168 }, { "epoch": 0.06548315470667743, "grad_norm": 0.1532767117023468, "learning_rate": 9.947241541279169e-06, "loss": 1.656, "step": 169 }, { "epoch": 0.06587062899488262, "grad_norm": 0.09536236524581909, "learning_rate": 9.946337456752404e-06, "loss": 1.6136, "step": 170 }, { "epoch": 0.0662581032830878, "grad_norm": 0.08837587386369705, "learning_rate": 9.945425733352594e-06, "loss": 1.6432, "step": 171 }, { "epoch": 0.066645577571293, "grad_norm": 0.10012009739875793, "learning_rate": 9.944506372487754e-06, "loss": 1.5856, "step": 172 }, { "epoch": 0.06703305185949819, "grad_norm": 0.10671480745077133, "learning_rate": 9.943579375577702e-06, "loss": 1.598, "step": 173 }, { "epoch": 0.06742052614770339, "grad_norm": 0.09142492711544037, "learning_rate": 9.942644744054043e-06, "loss": 1.6636, "step": 174 }, { "epoch": 0.06780800043590858, "grad_norm": 0.09933105111122131, "learning_rate": 9.941702479360176e-06, "loss": 1.6287, "step": 175 }, { "epoch": 0.06819547472411376, "grad_norm": 0.08849367499351501, "learning_rate": 9.940752582951283e-06, "loss": 1.6506, "step": 176 }, { "epoch": 0.06858294901231896, "grad_norm": 0.10706111788749695, "learning_rate": 9.93979505629434e-06, "loss": 1.6398, "step": 177 }, { "epoch": 0.06897042330052415, "grad_norm": 0.09320668131113052, "learning_rate": 9.938829900868103e-06, "loss": 1.6211, "step": 178 }, { "epoch": 0.06935789758872934, "grad_norm": 0.12434589862823486, "learning_rate": 9.937857118163106e-06, "loss": 1.6132, "step": 179 }, { "epoch": 0.06974537187693454, "grad_norm": 0.14551475644111633, "learning_rate": 9.936876709681668e-06, "loss": 1.5992, "step": 180 }, { "epoch": 0.07013284616513972, "grad_norm": 0.08847351372241974, "learning_rate": 9.93588867693788e-06, "loss": 1.6585, "step": 181 }, { "epoch": 0.07052032045334491, "grad_norm": 0.11845949292182922, "learning_rate": 9.934893021457612e-06, "loss": 1.6543, "step": 182 }, { "epoch": 0.07090779474155011, "grad_norm": 0.09296925365924835, "learning_rate": 9.933889744778503e-06, "loss": 1.6166, "step": 183 }, { "epoch": 0.0712952690297553, "grad_norm": 0.15323126316070557, "learning_rate": 9.93287884844996e-06, "loss": 1.601, "step": 184 }, { "epoch": 0.0716827433179605, "grad_norm": 0.10131455957889557, "learning_rate": 9.931860334033165e-06, "loss": 1.6588, "step": 185 }, { "epoch": 0.07207021760616568, "grad_norm": 0.09516572207212448, "learning_rate": 9.930834203101058e-06, "loss": 1.6835, "step": 186 }, { "epoch": 0.07245769189437087, "grad_norm": 0.13543346524238586, "learning_rate": 9.92980045723834e-06, "loss": 1.6107, "step": 187 }, { "epoch": 0.07284516618257607, "grad_norm": 0.10232547670602798, "learning_rate": 9.928759098041482e-06, "loss": 1.6022, "step": 188 }, { "epoch": 0.07323264047078126, "grad_norm": 0.08407041430473328, "learning_rate": 9.927710127118704e-06, "loss": 1.6123, "step": 189 }, { "epoch": 0.07362011475898646, "grad_norm": 0.0934591069817543, "learning_rate": 9.926653546089982e-06, "loss": 1.6048, "step": 190 }, { "epoch": 0.07400758904719164, "grad_norm": 0.119308702647686, "learning_rate": 9.925589356587046e-06, "loss": 1.6241, "step": 191 }, { "epoch": 0.07439506333539683, "grad_norm": 0.10647458583116531, "learning_rate": 9.924517560253378e-06, "loss": 1.6451, "step": 192 }, { "epoch": 0.07478253762360203, "grad_norm": 0.08708339929580688, "learning_rate": 9.923438158744206e-06, "loss": 1.6059, "step": 193 }, { "epoch": 0.07517001191180722, "grad_norm": 0.09105508029460907, "learning_rate": 9.922351153726505e-06, "loss": 1.6217, "step": 194 }, { "epoch": 0.07555748620001242, "grad_norm": 0.21438953280448914, "learning_rate": 9.921256546878983e-06, "loss": 1.5557, "step": 195 }, { "epoch": 0.0759449604882176, "grad_norm": 0.0810757726430893, "learning_rate": 9.920154339892104e-06, "loss": 1.5906, "step": 196 }, { "epoch": 0.07633243477642279, "grad_norm": 0.08726426213979721, "learning_rate": 9.919044534468057e-06, "loss": 1.6063, "step": 197 }, { "epoch": 0.07671990906462799, "grad_norm": 0.20424175262451172, "learning_rate": 9.917927132320769e-06, "loss": 1.5803, "step": 198 }, { "epoch": 0.07710738335283318, "grad_norm": 0.11510050296783447, "learning_rate": 9.916802135175898e-06, "loss": 1.5468, "step": 199 }, { "epoch": 0.07749485764103838, "grad_norm": 0.10130883008241653, "learning_rate": 9.915669544770837e-06, "loss": 1.5961, "step": 200 }, { "epoch": 0.07788233192924356, "grad_norm": 0.20077921450138092, "learning_rate": 9.914529362854697e-06, "loss": 1.6251, "step": 201 }, { "epoch": 0.07826980621744875, "grad_norm": 0.13915859162807465, "learning_rate": 9.913381591188318e-06, "loss": 1.5931, "step": 202 }, { "epoch": 0.07865728050565395, "grad_norm": 0.12666691839694977, "learning_rate": 9.912226231544262e-06, "loss": 1.616, "step": 203 }, { "epoch": 0.07904475479385914, "grad_norm": 0.15953180193901062, "learning_rate": 9.911063285706808e-06, "loss": 1.647, "step": 204 }, { "epoch": 0.07943222908206433, "grad_norm": 0.0889282152056694, "learning_rate": 9.909892755471948e-06, "loss": 1.6191, "step": 205 }, { "epoch": 0.07981970337026952, "grad_norm": 0.08901076018810272, "learning_rate": 9.908714642647392e-06, "loss": 1.6346, "step": 206 }, { "epoch": 0.08020717765847471, "grad_norm": 0.0844111293554306, "learning_rate": 9.907528949052558e-06, "loss": 1.6041, "step": 207 }, { "epoch": 0.0805946519466799, "grad_norm": 0.16716241836547852, "learning_rate": 9.90633567651857e-06, "loss": 1.5934, "step": 208 }, { "epoch": 0.0809821262348851, "grad_norm": 0.1390502154827118, "learning_rate": 9.905134826888258e-06, "loss": 1.6199, "step": 209 }, { "epoch": 0.0813696005230903, "grad_norm": 0.08139963448047638, "learning_rate": 9.903926402016153e-06, "loss": 1.5826, "step": 210 }, { "epoch": 0.08175707481129547, "grad_norm": 0.10778816044330597, "learning_rate": 9.902710403768485e-06, "loss": 1.5589, "step": 211 }, { "epoch": 0.08214454909950067, "grad_norm": 0.22404803335666656, "learning_rate": 9.901486834023182e-06, "loss": 1.6215, "step": 212 }, { "epoch": 0.08253202338770586, "grad_norm": 0.18316827714443207, "learning_rate": 9.900255694669861e-06, "loss": 1.5997, "step": 213 }, { "epoch": 0.08291949767591106, "grad_norm": 0.22765861451625824, "learning_rate": 9.89901698760983e-06, "loss": 1.5949, "step": 214 }, { "epoch": 0.08330697196411625, "grad_norm": 0.2155635952949524, "learning_rate": 9.89777071475609e-06, "loss": 1.5828, "step": 215 }, { "epoch": 0.08369444625232143, "grad_norm": 0.11268506199121475, "learning_rate": 9.896516878033318e-06, "loss": 1.6187, "step": 216 }, { "epoch": 0.08408192054052663, "grad_norm": 0.10832755267620087, "learning_rate": 9.895255479377878e-06, "loss": 1.5848, "step": 217 }, { "epoch": 0.08446939482873182, "grad_norm": 0.3066692650318146, "learning_rate": 9.893986520737808e-06, "loss": 1.6149, "step": 218 }, { "epoch": 0.08485686911693702, "grad_norm": 0.10997852683067322, "learning_rate": 9.892710004072826e-06, "loss": 1.5696, "step": 219 }, { "epoch": 0.08524434340514221, "grad_norm": 0.13622839748859406, "learning_rate": 9.891425931354316e-06, "loss": 1.5852, "step": 220 }, { "epoch": 0.0856318176933474, "grad_norm": 0.1692923754453659, "learning_rate": 9.89013430456534e-06, "loss": 1.6386, "step": 221 }, { "epoch": 0.08601929198155259, "grad_norm": 0.17006012797355652, "learning_rate": 9.888835125700616e-06, "loss": 1.6005, "step": 222 }, { "epoch": 0.08640676626975778, "grad_norm": 0.08118966966867447, "learning_rate": 9.887528396766532e-06, "loss": 1.5823, "step": 223 }, { "epoch": 0.08679424055796298, "grad_norm": 0.11570804566144943, "learning_rate": 9.886214119781137e-06, "loss": 1.5858, "step": 224 }, { "epoch": 0.08718171484616817, "grad_norm": 0.13321451842784882, "learning_rate": 9.88489229677413e-06, "loss": 1.619, "step": 225 }, { "epoch": 0.08756918913437337, "grad_norm": 0.09702785313129425, "learning_rate": 9.883562929786868e-06, "loss": 1.5916, "step": 226 }, { "epoch": 0.08795666342257855, "grad_norm": 0.11396509408950806, "learning_rate": 9.882226020872357e-06, "loss": 1.6007, "step": 227 }, { "epoch": 0.08834413771078374, "grad_norm": 0.07788241654634476, "learning_rate": 9.880881572095255e-06, "loss": 1.5711, "step": 228 }, { "epoch": 0.08873161199898894, "grad_norm": 0.19659921526908875, "learning_rate": 9.879529585531859e-06, "loss": 1.6034, "step": 229 }, { "epoch": 0.08911908628719413, "grad_norm": 0.16165152192115784, "learning_rate": 9.878170063270108e-06, "loss": 1.6028, "step": 230 }, { "epoch": 0.08950656057539932, "grad_norm": 0.08556222915649414, "learning_rate": 9.876803007409578e-06, "loss": 1.5583, "step": 231 }, { "epoch": 0.0898940348636045, "grad_norm": 0.13593269884586334, "learning_rate": 9.875428420061484e-06, "loss": 1.6275, "step": 232 }, { "epoch": 0.0902815091518097, "grad_norm": 0.13713160157203674, "learning_rate": 9.874046303348665e-06, "loss": 1.6036, "step": 233 }, { "epoch": 0.0906689834400149, "grad_norm": 0.09810218960046768, "learning_rate": 9.872656659405597e-06, "loss": 1.5525, "step": 234 }, { "epoch": 0.09105645772822009, "grad_norm": 0.09126561880111694, "learning_rate": 9.871259490378372e-06, "loss": 1.5599, "step": 235 }, { "epoch": 0.09144393201642528, "grad_norm": 0.16696327924728394, "learning_rate": 9.869854798424709e-06, "loss": 1.6291, "step": 236 }, { "epoch": 0.09183140630463046, "grad_norm": 0.15414024889469147, "learning_rate": 9.868442585713942e-06, "loss": 1.6116, "step": 237 }, { "epoch": 0.09221888059283566, "grad_norm": 0.1026405468583107, "learning_rate": 9.867022854427022e-06, "loss": 1.6276, "step": 238 }, { "epoch": 0.09260635488104085, "grad_norm": 0.10393257439136505, "learning_rate": 9.86559560675651e-06, "loss": 1.6374, "step": 239 }, { "epoch": 0.09299382916924605, "grad_norm": 0.12583240866661072, "learning_rate": 9.864160844906571e-06, "loss": 1.6279, "step": 240 }, { "epoch": 0.09338130345745124, "grad_norm": 0.12951582670211792, "learning_rate": 9.862718571092983e-06, "loss": 1.5654, "step": 241 }, { "epoch": 0.09376877774565642, "grad_norm": 0.09775348007678986, "learning_rate": 9.86126878754312e-06, "loss": 1.5652, "step": 242 }, { "epoch": 0.09415625203386162, "grad_norm": 0.08997101336717606, "learning_rate": 9.859811496495955e-06, "loss": 1.5634, "step": 243 }, { "epoch": 0.09454372632206681, "grad_norm": 0.1633729189634323, "learning_rate": 9.85834670020205e-06, "loss": 1.5701, "step": 244 }, { "epoch": 0.094931200610272, "grad_norm": 0.10564675182104111, "learning_rate": 9.856874400923568e-06, "loss": 1.57, "step": 245 }, { "epoch": 0.0953186748984772, "grad_norm": 0.08243391662836075, "learning_rate": 9.85539460093425e-06, "loss": 1.5575, "step": 246 }, { "epoch": 0.09570614918668238, "grad_norm": 0.1689504086971283, "learning_rate": 9.853907302519425e-06, "loss": 1.5511, "step": 247 }, { "epoch": 0.09609362347488758, "grad_norm": 0.0866553857922554, "learning_rate": 9.852412507976003e-06, "loss": 1.5692, "step": 248 }, { "epoch": 0.09648109776309277, "grad_norm": 0.08665693551301956, "learning_rate": 9.850910219612467e-06, "loss": 1.6033, "step": 249 }, { "epoch": 0.09686857205129797, "grad_norm": 0.13548393547534943, "learning_rate": 9.849400439748873e-06, "loss": 1.5728, "step": 250 }, { "epoch": 0.09725604633950316, "grad_norm": 0.08641160279512405, "learning_rate": 9.847883170716852e-06, "loss": 1.5731, "step": 251 }, { "epoch": 0.09764352062770834, "grad_norm": 0.1074226051568985, "learning_rate": 9.846358414859598e-06, "loss": 1.5649, "step": 252 }, { "epoch": 0.09803099491591354, "grad_norm": 0.08257234841585159, "learning_rate": 9.844826174531863e-06, "loss": 1.5609, "step": 253 }, { "epoch": 0.09841846920411873, "grad_norm": 0.16954340040683746, "learning_rate": 9.843286452099964e-06, "loss": 1.5876, "step": 254 }, { "epoch": 0.09880594349232392, "grad_norm": 0.08577932417392731, "learning_rate": 9.841739249941772e-06, "loss": 1.5812, "step": 255 }, { "epoch": 0.09919341778052912, "grad_norm": 0.13520435988903046, "learning_rate": 9.840184570446702e-06, "loss": 1.5758, "step": 256 }, { "epoch": 0.0995808920687343, "grad_norm": 0.13704164326190948, "learning_rate": 9.838622416015729e-06, "loss": 1.5863, "step": 257 }, { "epoch": 0.0999683663569395, "grad_norm": 0.16886204481124878, "learning_rate": 9.83705278906136e-06, "loss": 1.5744, "step": 258 }, { "epoch": 0.10035584064514469, "grad_norm": 0.09547556191682816, "learning_rate": 9.83547569200765e-06, "loss": 1.5653, "step": 259 }, { "epoch": 0.10074331493334988, "grad_norm": 0.16017162799835205, "learning_rate": 9.833891127290186e-06, "loss": 1.5659, "step": 260 }, { "epoch": 0.10113078922155508, "grad_norm": 0.15751776099205017, "learning_rate": 9.832299097356091e-06, "loss": 1.5334, "step": 261 }, { "epoch": 0.10151826350976026, "grad_norm": 0.12815676629543304, "learning_rate": 9.830699604664015e-06, "loss": 1.5578, "step": 262 }, { "epoch": 0.10190573779796545, "grad_norm": 0.14528295397758484, "learning_rate": 9.829092651684136e-06, "loss": 1.5973, "step": 263 }, { "epoch": 0.10229321208617065, "grad_norm": 0.19544392824172974, "learning_rate": 9.827478240898146e-06, "loss": 1.5871, "step": 264 }, { "epoch": 0.10268068637437584, "grad_norm": 0.1537884771823883, "learning_rate": 9.825856374799262e-06, "loss": 1.5618, "step": 265 }, { "epoch": 0.10306816066258104, "grad_norm": 0.14332221448421478, "learning_rate": 9.82422705589221e-06, "loss": 1.5998, "step": 266 }, { "epoch": 0.10345563495078622, "grad_norm": 0.13526982069015503, "learning_rate": 9.822590286693232e-06, "loss": 1.5723, "step": 267 }, { "epoch": 0.10384310923899141, "grad_norm": 0.20340216159820557, "learning_rate": 9.820946069730067e-06, "loss": 1.5104, "step": 268 }, { "epoch": 0.10423058352719661, "grad_norm": 0.12306951731443405, "learning_rate": 9.81929440754196e-06, "loss": 1.5829, "step": 269 }, { "epoch": 0.1046180578154018, "grad_norm": 0.15378281474113464, "learning_rate": 9.81763530267966e-06, "loss": 1.6172, "step": 270 }, { "epoch": 0.105005532103607, "grad_norm": 0.1677636355161667, "learning_rate": 9.815968757705398e-06, "loss": 1.6057, "step": 271 }, { "epoch": 0.10539300639181218, "grad_norm": 0.08368021249771118, "learning_rate": 9.814294775192905e-06, "loss": 1.5556, "step": 272 }, { "epoch": 0.10578048068001737, "grad_norm": 0.1396418958902359, "learning_rate": 9.812613357727395e-06, "loss": 1.6172, "step": 273 }, { "epoch": 0.10616795496822257, "grad_norm": 0.12649010121822357, "learning_rate": 9.810924507905562e-06, "loss": 1.573, "step": 274 }, { "epoch": 0.10655542925642776, "grad_norm": 0.12166839838027954, "learning_rate": 9.809228228335582e-06, "loss": 1.5711, "step": 275 }, { "epoch": 0.10694290354463296, "grad_norm": 0.1407092958688736, "learning_rate": 9.807524521637103e-06, "loss": 1.5902, "step": 276 }, { "epoch": 0.10733037783283814, "grad_norm": 0.08702956885099411, "learning_rate": 9.805813390441244e-06, "loss": 1.5821, "step": 277 }, { "epoch": 0.10771785212104333, "grad_norm": 0.0933813601732254, "learning_rate": 9.804094837390585e-06, "loss": 1.594, "step": 278 }, { "epoch": 0.10810532640924853, "grad_norm": 0.09233735501766205, "learning_rate": 9.802368865139178e-06, "loss": 1.5804, "step": 279 }, { "epoch": 0.10849280069745372, "grad_norm": 0.08909355103969574, "learning_rate": 9.800635476352525e-06, "loss": 1.5749, "step": 280 }, { "epoch": 0.10888027498565891, "grad_norm": 0.12215803563594818, "learning_rate": 9.798894673707584e-06, "loss": 1.5637, "step": 281 }, { "epoch": 0.10926774927386411, "grad_norm": 0.09839669615030289, "learning_rate": 9.797146459892762e-06, "loss": 1.5431, "step": 282 }, { "epoch": 0.10965522356206929, "grad_norm": 0.13411641120910645, "learning_rate": 9.79539083760791e-06, "loss": 1.5608, "step": 283 }, { "epoch": 0.11004269785027448, "grad_norm": 0.16516117751598358, "learning_rate": 9.793627809564324e-06, "loss": 1.5533, "step": 284 }, { "epoch": 0.11043017213847968, "grad_norm": 0.10852131247520447, "learning_rate": 9.791857378484737e-06, "loss": 1.5755, "step": 285 }, { "epoch": 0.11081764642668487, "grad_norm": 0.13034188747406006, "learning_rate": 9.790079547103311e-06, "loss": 1.5431, "step": 286 }, { "epoch": 0.11120512071489007, "grad_norm": 0.13477660715579987, "learning_rate": 9.788294318165639e-06, "loss": 1.601, "step": 287 }, { "epoch": 0.11159259500309525, "grad_norm": 0.10615213215351105, "learning_rate": 9.786501694428738e-06, "loss": 1.5344, "step": 288 }, { "epoch": 0.11198006929130044, "grad_norm": 0.12622405588626862, "learning_rate": 9.784701678661045e-06, "loss": 1.5873, "step": 289 }, { "epoch": 0.11236754357950564, "grad_norm": 0.11425310373306274, "learning_rate": 9.782894273642414e-06, "loss": 1.5184, "step": 290 }, { "epoch": 0.11275501786771083, "grad_norm": 0.2101593315601349, "learning_rate": 9.781079482164111e-06, "loss": 1.5374, "step": 291 }, { "epoch": 0.11314249215591603, "grad_norm": 0.1042136698961258, "learning_rate": 9.779257307028805e-06, "loss": 1.5696, "step": 292 }, { "epoch": 0.11352996644412121, "grad_norm": 0.11563464254140854, "learning_rate": 9.777427751050571e-06, "loss": 1.5688, "step": 293 }, { "epoch": 0.1139174407323264, "grad_norm": 0.1242155134677887, "learning_rate": 9.775590817054887e-06, "loss": 1.5754, "step": 294 }, { "epoch": 0.1143049150205316, "grad_norm": 0.13096097111701965, "learning_rate": 9.77374650787862e-06, "loss": 1.5851, "step": 295 }, { "epoch": 0.11469238930873679, "grad_norm": 0.08922680467367172, "learning_rate": 9.771894826370021e-06, "loss": 1.5833, "step": 296 }, { "epoch": 0.11507986359694199, "grad_norm": 0.10062846541404724, "learning_rate": 9.77003577538874e-06, "loss": 1.5794, "step": 297 }, { "epoch": 0.11546733788514717, "grad_norm": 0.10295723378658295, "learning_rate": 9.768169357805796e-06, "loss": 1.5617, "step": 298 }, { "epoch": 0.11585481217335236, "grad_norm": 0.12614895403385162, "learning_rate": 9.766295576503597e-06, "loss": 1.5741, "step": 299 }, { "epoch": 0.11624228646155756, "grad_norm": 0.14072567224502563, "learning_rate": 9.76441443437591e-06, "loss": 1.6001, "step": 300 }, { "epoch": 0.11662976074976275, "grad_norm": 0.10441557317972183, "learning_rate": 9.762525934327878e-06, "loss": 1.6309, "step": 301 }, { "epoch": 0.11701723503796795, "grad_norm": 0.1201942190527916, "learning_rate": 9.760630079276002e-06, "loss": 1.496, "step": 302 }, { "epoch": 0.11740470932617313, "grad_norm": 0.13539598882198334, "learning_rate": 9.758726872148148e-06, "loss": 1.5103, "step": 303 }, { "epoch": 0.11779218361437832, "grad_norm": 0.17220677435398102, "learning_rate": 9.756816315883531e-06, "loss": 1.5762, "step": 304 }, { "epoch": 0.11817965790258352, "grad_norm": 0.10578922927379608, "learning_rate": 9.754898413432719e-06, "loss": 1.5344, "step": 305 }, { "epoch": 0.11856713219078871, "grad_norm": 0.09817967563867569, "learning_rate": 9.75297316775762e-06, "loss": 1.5667, "step": 306 }, { "epoch": 0.1189546064789939, "grad_norm": 0.14822445809841156, "learning_rate": 9.75104058183149e-06, "loss": 1.5663, "step": 307 }, { "epoch": 0.11934208076719909, "grad_norm": 0.10673946887254715, "learning_rate": 9.749100658638914e-06, "loss": 1.5615, "step": 308 }, { "epoch": 0.11972955505540428, "grad_norm": 0.10701555013656616, "learning_rate": 9.747153401175812e-06, "loss": 1.5415, "step": 309 }, { "epoch": 0.12011702934360947, "grad_norm": 0.10496748983860016, "learning_rate": 9.74519881244943e-06, "loss": 1.5981, "step": 310 }, { "epoch": 0.12050450363181467, "grad_norm": 0.10002732276916504, "learning_rate": 9.743236895478335e-06, "loss": 1.5715, "step": 311 }, { "epoch": 0.12089197792001986, "grad_norm": 0.10935622453689575, "learning_rate": 9.741267653292413e-06, "loss": 1.5523, "step": 312 }, { "epoch": 0.12127945220822504, "grad_norm": 0.10152000933885574, "learning_rate": 9.739291088932861e-06, "loss": 1.5803, "step": 313 }, { "epoch": 0.12166692649643024, "grad_norm": 0.10715696960687637, "learning_rate": 9.737307205452187e-06, "loss": 1.5434, "step": 314 }, { "epoch": 0.12205440078463543, "grad_norm": 0.11353219300508499, "learning_rate": 9.735316005914198e-06, "loss": 1.5566, "step": 315 }, { "epoch": 0.12244187507284063, "grad_norm": 0.0952208861708641, "learning_rate": 9.733317493394004e-06, "loss": 1.528, "step": 316 }, { "epoch": 0.12282934936104582, "grad_norm": 0.1462145745754242, "learning_rate": 9.731311670978003e-06, "loss": 1.5883, "step": 317 }, { "epoch": 0.123216823649251, "grad_norm": 0.11364693194627762, "learning_rate": 9.729298541763887e-06, "loss": 1.5374, "step": 318 }, { "epoch": 0.1236042979374562, "grad_norm": 0.1391831487417221, "learning_rate": 9.727278108860633e-06, "loss": 1.5512, "step": 319 }, { "epoch": 0.12399177222566139, "grad_norm": 0.11221762746572495, "learning_rate": 9.725250375388493e-06, "loss": 1.5806, "step": 320 }, { "epoch": 0.12437924651386659, "grad_norm": 0.09370563924312592, "learning_rate": 9.723215344478996e-06, "loss": 1.5893, "step": 321 }, { "epoch": 0.12476672080207178, "grad_norm": 0.12864139676094055, "learning_rate": 9.721173019274942e-06, "loss": 1.5632, "step": 322 }, { "epoch": 0.12515419509027698, "grad_norm": 0.10838036984205246, "learning_rate": 9.719123402930393e-06, "loss": 1.5821, "step": 323 }, { "epoch": 0.12554166937848216, "grad_norm": 0.10382389277219772, "learning_rate": 9.717066498610673e-06, "loss": 1.5288, "step": 324 }, { "epoch": 0.12592914366668737, "grad_norm": 0.09821972250938416, "learning_rate": 9.715002309492362e-06, "loss": 1.541, "step": 325 }, { "epoch": 0.12631661795489255, "grad_norm": 0.14227209985256195, "learning_rate": 9.712930838763289e-06, "loss": 1.5418, "step": 326 }, { "epoch": 0.12670409224309773, "grad_norm": 0.10564311593770981, "learning_rate": 9.710852089622528e-06, "loss": 1.5857, "step": 327 }, { "epoch": 0.12709156653130294, "grad_norm": 0.10986583679914474, "learning_rate": 9.708766065280392e-06, "loss": 1.584, "step": 328 }, { "epoch": 0.12747904081950812, "grad_norm": 0.11950498074293137, "learning_rate": 9.706672768958434e-06, "loss": 1.5471, "step": 329 }, { "epoch": 0.12786651510771332, "grad_norm": 0.1180163249373436, "learning_rate": 9.704572203889433e-06, "loss": 1.5379, "step": 330 }, { "epoch": 0.1282539893959185, "grad_norm": 0.12115409970283508, "learning_rate": 9.702464373317397e-06, "loss": 1.5822, "step": 331 }, { "epoch": 0.12864146368412369, "grad_norm": 0.09491932392120361, "learning_rate": 9.700349280497552e-06, "loss": 1.5163, "step": 332 }, { "epoch": 0.1290289379723289, "grad_norm": 0.10658363252878189, "learning_rate": 9.69822692869634e-06, "loss": 1.5097, "step": 333 }, { "epoch": 0.12941641226053407, "grad_norm": 0.12077996134757996, "learning_rate": 9.696097321191414e-06, "loss": 1.5314, "step": 334 }, { "epoch": 0.12980388654873928, "grad_norm": 0.13343365490436554, "learning_rate": 9.693960461271632e-06, "loss": 1.5478, "step": 335 }, { "epoch": 0.13019136083694446, "grad_norm": 0.1273120790719986, "learning_rate": 9.691816352237052e-06, "loss": 1.5913, "step": 336 }, { "epoch": 0.13057883512514964, "grad_norm": 0.11023461073637009, "learning_rate": 9.689664997398927e-06, "loss": 1.4887, "step": 337 }, { "epoch": 0.13096630941335485, "grad_norm": 0.12722419202327728, "learning_rate": 9.687506400079702e-06, "loss": 1.5388, "step": 338 }, { "epoch": 0.13135378370156003, "grad_norm": 0.12310048192739487, "learning_rate": 9.685340563613004e-06, "loss": 1.5477, "step": 339 }, { "epoch": 0.13174125798976524, "grad_norm": 0.10562162101268768, "learning_rate": 9.68316749134364e-06, "loss": 1.5537, "step": 340 }, { "epoch": 0.13212873227797042, "grad_norm": 0.09771284461021423, "learning_rate": 9.680987186627595e-06, "loss": 1.5869, "step": 341 }, { "epoch": 0.1325162065661756, "grad_norm": 0.10731471329927444, "learning_rate": 9.67879965283202e-06, "loss": 1.5925, "step": 342 }, { "epoch": 0.1329036808543808, "grad_norm": 0.1427149772644043, "learning_rate": 9.676604893335233e-06, "loss": 1.5757, "step": 343 }, { "epoch": 0.133291155142586, "grad_norm": 0.1510029137134552, "learning_rate": 9.674402911526707e-06, "loss": 1.5762, "step": 344 }, { "epoch": 0.1336786294307912, "grad_norm": 0.1269102394580841, "learning_rate": 9.672193710807072e-06, "loss": 1.5012, "step": 345 }, { "epoch": 0.13406610371899638, "grad_norm": 0.13056881725788116, "learning_rate": 9.669977294588107e-06, "loss": 1.5504, "step": 346 }, { "epoch": 0.13445357800720156, "grad_norm": 0.11616942286491394, "learning_rate": 9.667753666292732e-06, "loss": 1.5286, "step": 347 }, { "epoch": 0.13484105229540677, "grad_norm": 0.11927933245897293, "learning_rate": 9.665522829355005e-06, "loss": 1.5317, "step": 348 }, { "epoch": 0.13522852658361195, "grad_norm": 0.1476513147354126, "learning_rate": 9.66328478722012e-06, "loss": 1.5288, "step": 349 }, { "epoch": 0.13561600087181716, "grad_norm": 0.12203975766897202, "learning_rate": 9.661039543344393e-06, "loss": 1.532, "step": 350 }, { "epoch": 0.13600347516002234, "grad_norm": 0.1342812478542328, "learning_rate": 9.658787101195266e-06, "loss": 1.5289, "step": 351 }, { "epoch": 0.13639094944822752, "grad_norm": 0.10929471999406815, "learning_rate": 9.656527464251298e-06, "loss": 1.5421, "step": 352 }, { "epoch": 0.13677842373643273, "grad_norm": 0.15174749493598938, "learning_rate": 9.654260636002157e-06, "loss": 1.5454, "step": 353 }, { "epoch": 0.1371658980246379, "grad_norm": 0.12124531716108322, "learning_rate": 9.651986619948617e-06, "loss": 1.5181, "step": 354 }, { "epoch": 0.13755337231284312, "grad_norm": 0.11283097416162491, "learning_rate": 9.649705419602553e-06, "loss": 1.5182, "step": 355 }, { "epoch": 0.1379408466010483, "grad_norm": 0.12486822158098221, "learning_rate": 9.647417038486936e-06, "loss": 1.5654, "step": 356 }, { "epoch": 0.13832832088925348, "grad_norm": 0.10720991343259811, "learning_rate": 9.645121480135826e-06, "loss": 1.5527, "step": 357 }, { "epoch": 0.1387157951774587, "grad_norm": 0.11325196921825409, "learning_rate": 9.642818748094367e-06, "loss": 1.5462, "step": 358 }, { "epoch": 0.13910326946566387, "grad_norm": 0.13190177083015442, "learning_rate": 9.640508845918783e-06, "loss": 1.5211, "step": 359 }, { "epoch": 0.13949074375386908, "grad_norm": 0.1377810835838318, "learning_rate": 9.638191777176367e-06, "loss": 1.5109, "step": 360 }, { "epoch": 0.13987821804207426, "grad_norm": 0.114166758954525, "learning_rate": 9.635867545445486e-06, "loss": 1.5779, "step": 361 }, { "epoch": 0.14026569233027944, "grad_norm": 0.10922057181596756, "learning_rate": 9.633536154315561e-06, "loss": 1.5769, "step": 362 }, { "epoch": 0.14065316661848465, "grad_norm": 0.12680812180042267, "learning_rate": 9.63119760738708e-06, "loss": 1.5326, "step": 363 }, { "epoch": 0.14104064090668983, "grad_norm": 0.18183670938014984, "learning_rate": 9.628851908271572e-06, "loss": 1.5656, "step": 364 }, { "epoch": 0.14142811519489504, "grad_norm": 0.12218398600816727, "learning_rate": 9.626499060591618e-06, "loss": 1.4835, "step": 365 }, { "epoch": 0.14181558948310022, "grad_norm": 0.12000227719545364, "learning_rate": 9.624139067980838e-06, "loss": 1.5707, "step": 366 }, { "epoch": 0.1422030637713054, "grad_norm": 0.11143898218870163, "learning_rate": 9.621771934083882e-06, "loss": 1.5807, "step": 367 }, { "epoch": 0.1425905380595106, "grad_norm": 0.1680416464805603, "learning_rate": 9.619397662556434e-06, "loss": 1.511, "step": 368 }, { "epoch": 0.1429780123477158, "grad_norm": 0.18268853425979614, "learning_rate": 9.617016257065197e-06, "loss": 1.5241, "step": 369 }, { "epoch": 0.143365486635921, "grad_norm": 0.11445441842079163, "learning_rate": 9.614627721287897e-06, "loss": 1.5613, "step": 370 }, { "epoch": 0.14375296092412618, "grad_norm": 0.11772032082080841, "learning_rate": 9.612232058913263e-06, "loss": 1.5806, "step": 371 }, { "epoch": 0.14414043521233136, "grad_norm": 0.19190987944602966, "learning_rate": 9.609829273641034e-06, "loss": 1.5324, "step": 372 }, { "epoch": 0.14452790950053657, "grad_norm": 0.14022958278656006, "learning_rate": 9.607419369181954e-06, "loss": 1.5465, "step": 373 }, { "epoch": 0.14491538378874175, "grad_norm": 0.13393110036849976, "learning_rate": 9.605002349257755e-06, "loss": 1.5576, "step": 374 }, { "epoch": 0.14530285807694696, "grad_norm": 0.11381831020116806, "learning_rate": 9.602578217601162e-06, "loss": 1.5752, "step": 375 }, { "epoch": 0.14569033236515214, "grad_norm": 0.15356788039207458, "learning_rate": 9.60014697795588e-06, "loss": 1.5501, "step": 376 }, { "epoch": 0.14607780665335732, "grad_norm": 0.1350572109222412, "learning_rate": 9.59770863407659e-06, "loss": 1.5507, "step": 377 }, { "epoch": 0.14646528094156253, "grad_norm": 0.1601293534040451, "learning_rate": 9.59526318972895e-06, "loss": 1.5451, "step": 378 }, { "epoch": 0.1468527552297677, "grad_norm": 0.10903230309486389, "learning_rate": 9.59281064868958e-06, "loss": 1.5094, "step": 379 }, { "epoch": 0.14724022951797291, "grad_norm": 0.11794281005859375, "learning_rate": 9.590351014746059e-06, "loss": 1.5767, "step": 380 }, { "epoch": 0.1476277038061781, "grad_norm": 0.1676027774810791, "learning_rate": 9.587884291696922e-06, "loss": 1.5396, "step": 381 }, { "epoch": 0.14801517809438328, "grad_norm": 0.13041293621063232, "learning_rate": 9.58541048335165e-06, "loss": 1.523, "step": 382 }, { "epoch": 0.14840265238258848, "grad_norm": 0.11311472952365875, "learning_rate": 9.58292959353067e-06, "loss": 1.486, "step": 383 }, { "epoch": 0.14879012667079367, "grad_norm": 0.09928450733423233, "learning_rate": 9.580441626065339e-06, "loss": 1.5855, "step": 384 }, { "epoch": 0.14917760095899887, "grad_norm": 0.1560458093881607, "learning_rate": 9.57794658479795e-06, "loss": 1.5327, "step": 385 }, { "epoch": 0.14956507524720405, "grad_norm": 0.10907760262489319, "learning_rate": 9.575444473581718e-06, "loss": 1.5392, "step": 386 }, { "epoch": 0.14995254953540924, "grad_norm": 0.10431972146034241, "learning_rate": 9.572935296280778e-06, "loss": 1.45, "step": 387 }, { "epoch": 0.15034002382361444, "grad_norm": 0.10802378505468369, "learning_rate": 9.570419056770174e-06, "loss": 1.5488, "step": 388 }, { "epoch": 0.15072749811181962, "grad_norm": 0.12495695054531097, "learning_rate": 9.56789575893586e-06, "loss": 1.5704, "step": 389 }, { "epoch": 0.15111497240002483, "grad_norm": 0.13028475642204285, "learning_rate": 9.56536540667469e-06, "loss": 1.5659, "step": 390 }, { "epoch": 0.15150244668823, "grad_norm": 0.11326300352811813, "learning_rate": 9.562828003894411e-06, "loss": 1.5104, "step": 391 }, { "epoch": 0.1518899209764352, "grad_norm": 0.11274281144142151, "learning_rate": 9.56028355451366e-06, "loss": 1.5329, "step": 392 }, { "epoch": 0.1522773952646404, "grad_norm": 0.12883849442005157, "learning_rate": 9.557732062461955e-06, "loss": 1.5737, "step": 393 }, { "epoch": 0.15266486955284558, "grad_norm": 0.12740831077098846, "learning_rate": 9.555173531679692e-06, "loss": 1.5171, "step": 394 }, { "epoch": 0.1530523438410508, "grad_norm": 0.11495451629161835, "learning_rate": 9.552607966118136e-06, "loss": 1.5147, "step": 395 }, { "epoch": 0.15343981812925597, "grad_norm": 0.11138264089822769, "learning_rate": 9.550035369739416e-06, "loss": 1.4926, "step": 396 }, { "epoch": 0.15382729241746115, "grad_norm": 0.15966415405273438, "learning_rate": 9.54745574651652e-06, "loss": 1.5254, "step": 397 }, { "epoch": 0.15421476670566636, "grad_norm": 0.14715728163719177, "learning_rate": 9.544869100433287e-06, "loss": 1.5588, "step": 398 }, { "epoch": 0.15460224099387154, "grad_norm": 0.12210125476121902, "learning_rate": 9.542275435484402e-06, "loss": 1.5958, "step": 399 }, { "epoch": 0.15498971528207675, "grad_norm": 0.11355980485677719, "learning_rate": 9.539674755675392e-06, "loss": 1.5778, "step": 400 }, { "epoch": 0.15537718957028193, "grad_norm": 0.12520012259483337, "learning_rate": 9.537067065022613e-06, "loss": 1.523, "step": 401 }, { "epoch": 0.1557646638584871, "grad_norm": 0.13665682077407837, "learning_rate": 9.534452367553251e-06, "loss": 1.5374, "step": 402 }, { "epoch": 0.15615213814669232, "grad_norm": 0.1255849450826645, "learning_rate": 9.531830667305312e-06, "loss": 1.5196, "step": 403 }, { "epoch": 0.1565396124348975, "grad_norm": 0.12233111262321472, "learning_rate": 9.529201968327618e-06, "loss": 1.4766, "step": 404 }, { "epoch": 0.1569270867231027, "grad_norm": 0.14514026045799255, "learning_rate": 9.526566274679796e-06, "loss": 1.5376, "step": 405 }, { "epoch": 0.1573145610113079, "grad_norm": 0.12989237904548645, "learning_rate": 9.52392359043228e-06, "loss": 1.5005, "step": 406 }, { "epoch": 0.15770203529951307, "grad_norm": 0.10963813960552216, "learning_rate": 9.521273919666298e-06, "loss": 1.5413, "step": 407 }, { "epoch": 0.15808950958771828, "grad_norm": 0.1274612993001938, "learning_rate": 9.518617266473869e-06, "loss": 1.5695, "step": 408 }, { "epoch": 0.15847698387592346, "grad_norm": 0.12194512784481049, "learning_rate": 9.515953634957788e-06, "loss": 1.5031, "step": 409 }, { "epoch": 0.15886445816412867, "grad_norm": 0.12384016811847687, "learning_rate": 9.513283029231638e-06, "loss": 1.5204, "step": 410 }, { "epoch": 0.15925193245233385, "grad_norm": 0.1136811152100563, "learning_rate": 9.510605453419764e-06, "loss": 1.5247, "step": 411 }, { "epoch": 0.15963940674053903, "grad_norm": 0.10937613248825073, "learning_rate": 9.50792091165728e-06, "loss": 1.554, "step": 412 }, { "epoch": 0.16002688102874424, "grad_norm": 0.13042767345905304, "learning_rate": 9.505229408090057e-06, "loss": 1.4949, "step": 413 }, { "epoch": 0.16041435531694942, "grad_norm": 0.128926083445549, "learning_rate": 9.502530946874714e-06, "loss": 1.4875, "step": 414 }, { "epoch": 0.16080182960515463, "grad_norm": 0.14757691323757172, "learning_rate": 9.499825532178622e-06, "loss": 1.5847, "step": 415 }, { "epoch": 0.1611893038933598, "grad_norm": 0.10990522801876068, "learning_rate": 9.49711316817988e-06, "loss": 1.4871, "step": 416 }, { "epoch": 0.161576778181565, "grad_norm": 0.10873179882764816, "learning_rate": 9.494393859067333e-06, "loss": 1.4942, "step": 417 }, { "epoch": 0.1619642524697702, "grad_norm": 0.14251823723316193, "learning_rate": 9.49166760904054e-06, "loss": 1.5326, "step": 418 }, { "epoch": 0.16235172675797538, "grad_norm": 0.12142035365104675, "learning_rate": 9.488934422309785e-06, "loss": 1.5601, "step": 419 }, { "epoch": 0.1627392010461806, "grad_norm": 0.1317966729402542, "learning_rate": 9.486194303096062e-06, "loss": 1.5734, "step": 420 }, { "epoch": 0.16312667533438577, "grad_norm": 0.1172923743724823, "learning_rate": 9.483447255631072e-06, "loss": 1.4892, "step": 421 }, { "epoch": 0.16351414962259095, "grad_norm": 0.11974377930164337, "learning_rate": 9.480693284157219e-06, "loss": 1.5246, "step": 422 }, { "epoch": 0.16390162391079616, "grad_norm": 0.11286862939596176, "learning_rate": 9.477932392927594e-06, "loss": 1.496, "step": 423 }, { "epoch": 0.16428909819900134, "grad_norm": 0.13607139885425568, "learning_rate": 9.475164586205977e-06, "loss": 1.4736, "step": 424 }, { "epoch": 0.16467657248720655, "grad_norm": 0.1372896134853363, "learning_rate": 9.472389868266831e-06, "loss": 1.5546, "step": 425 }, { "epoch": 0.16506404677541173, "grad_norm": 0.1263563185930252, "learning_rate": 9.469608243395289e-06, "loss": 1.555, "step": 426 }, { "epoch": 0.1654515210636169, "grad_norm": 0.1109975278377533, "learning_rate": 9.46681971588715e-06, "loss": 1.4692, "step": 427 }, { "epoch": 0.16583899535182212, "grad_norm": 0.12901054322719574, "learning_rate": 9.464024290048879e-06, "loss": 1.5192, "step": 428 }, { "epoch": 0.1662264696400273, "grad_norm": 0.11917410045862198, "learning_rate": 9.461221970197585e-06, "loss": 1.5378, "step": 429 }, { "epoch": 0.1666139439282325, "grad_norm": 0.12285107374191284, "learning_rate": 9.458412760661032e-06, "loss": 1.5511, "step": 430 }, { "epoch": 0.16700141821643769, "grad_norm": 0.12914107739925385, "learning_rate": 9.455596665777618e-06, "loss": 1.5131, "step": 431 }, { "epoch": 0.16738889250464287, "grad_norm": 0.12333719432353973, "learning_rate": 9.45277368989638e-06, "loss": 1.5632, "step": 432 }, { "epoch": 0.16777636679284808, "grad_norm": 0.12848618626594543, "learning_rate": 9.449943837376978e-06, "loss": 1.5436, "step": 433 }, { "epoch": 0.16816384108105326, "grad_norm": 0.12534552812576294, "learning_rate": 9.447107112589695e-06, "loss": 1.4861, "step": 434 }, { "epoch": 0.16855131536925846, "grad_norm": 0.12732192873954773, "learning_rate": 9.444263519915422e-06, "loss": 1.5749, "step": 435 }, { "epoch": 0.16893878965746364, "grad_norm": 0.11534284800291061, "learning_rate": 9.44141306374566e-06, "loss": 1.5371, "step": 436 }, { "epoch": 0.16932626394566885, "grad_norm": 0.11830921471118927, "learning_rate": 9.43855574848251e-06, "loss": 1.5203, "step": 437 }, { "epoch": 0.16971373823387403, "grad_norm": 0.1291223019361496, "learning_rate": 9.435691578538665e-06, "loss": 1.5136, "step": 438 }, { "epoch": 0.17010121252207921, "grad_norm": 0.11445064097642899, "learning_rate": 9.432820558337405e-06, "loss": 1.5161, "step": 439 }, { "epoch": 0.17048868681028442, "grad_norm": 0.11787880957126617, "learning_rate": 9.429942692312585e-06, "loss": 1.5285, "step": 440 }, { "epoch": 0.1708761610984896, "grad_norm": 0.15770497918128967, "learning_rate": 9.42705798490864e-06, "loss": 1.5411, "step": 441 }, { "epoch": 0.1712636353866948, "grad_norm": 0.13816951215267181, "learning_rate": 9.424166440580563e-06, "loss": 1.5323, "step": 442 }, { "epoch": 0.1716511096749, "grad_norm": 0.10934319347143173, "learning_rate": 9.421268063793907e-06, "loss": 1.4856, "step": 443 }, { "epoch": 0.17203858396310517, "grad_norm": 0.12055516242980957, "learning_rate": 9.418362859024781e-06, "loss": 1.5306, "step": 444 }, { "epoch": 0.17242605825131038, "grad_norm": 0.14012396335601807, "learning_rate": 9.415450830759836e-06, "loss": 1.5295, "step": 445 }, { "epoch": 0.17281353253951556, "grad_norm": 0.12220621109008789, "learning_rate": 9.412531983496262e-06, "loss": 1.5042, "step": 446 }, { "epoch": 0.17320100682772077, "grad_norm": 0.11504226922988892, "learning_rate": 9.409606321741776e-06, "loss": 1.5075, "step": 447 }, { "epoch": 0.17358848111592595, "grad_norm": 0.11852091550827026, "learning_rate": 9.406673850014622e-06, "loss": 1.5083, "step": 448 }, { "epoch": 0.17397595540413113, "grad_norm": 0.12373681366443634, "learning_rate": 9.403734572843565e-06, "loss": 1.5622, "step": 449 }, { "epoch": 0.17436342969233634, "grad_norm": 0.13900652527809143, "learning_rate": 9.400788494767872e-06, "loss": 1.5114, "step": 450 }, { "epoch": 0.17475090398054152, "grad_norm": 0.13485068082809448, "learning_rate": 9.397835620337317e-06, "loss": 1.4476, "step": 451 }, { "epoch": 0.17513837826874673, "grad_norm": 0.13181337714195251, "learning_rate": 9.39487595411217e-06, "loss": 1.5587, "step": 452 }, { "epoch": 0.1755258525569519, "grad_norm": 0.12997408211231232, "learning_rate": 9.39190950066319e-06, "loss": 1.5105, "step": 453 }, { "epoch": 0.1759133268451571, "grad_norm": 0.09897690266370773, "learning_rate": 9.388936264571618e-06, "loss": 1.5378, "step": 454 }, { "epoch": 0.1763008011333623, "grad_norm": 0.11829683184623718, "learning_rate": 9.385956250429168e-06, "loss": 1.5202, "step": 455 }, { "epoch": 0.17668827542156748, "grad_norm": 0.15152840316295624, "learning_rate": 9.382969462838023e-06, "loss": 1.5217, "step": 456 }, { "epoch": 0.1770757497097727, "grad_norm": 0.13857117295265198, "learning_rate": 9.37997590641083e-06, "loss": 1.5199, "step": 457 }, { "epoch": 0.17746322399797787, "grad_norm": 0.1214204803109169, "learning_rate": 9.37697558577068e-06, "loss": 1.4421, "step": 458 }, { "epoch": 0.17785069828618305, "grad_norm": 0.12047615647315979, "learning_rate": 9.37396850555112e-06, "loss": 1.5244, "step": 459 }, { "epoch": 0.17823817257438826, "grad_norm": 0.13479046523571014, "learning_rate": 9.37095467039613e-06, "loss": 1.4873, "step": 460 }, { "epoch": 0.17862564686259344, "grad_norm": 0.1418432891368866, "learning_rate": 9.367934084960129e-06, "loss": 1.514, "step": 461 }, { "epoch": 0.17901312115079865, "grad_norm": 0.11953197419643402, "learning_rate": 9.364906753907952e-06, "loss": 1.5415, "step": 462 }, { "epoch": 0.17940059543900383, "grad_norm": 0.1286998987197876, "learning_rate": 9.361872681914856e-06, "loss": 1.5396, "step": 463 }, { "epoch": 0.179788069727209, "grad_norm": 0.12686249613761902, "learning_rate": 9.35883187366651e-06, "loss": 1.5034, "step": 464 }, { "epoch": 0.18017554401541422, "grad_norm": 0.12360259145498276, "learning_rate": 9.355784333858982e-06, "loss": 1.4916, "step": 465 }, { "epoch": 0.1805630183036194, "grad_norm": 0.13778156042099, "learning_rate": 9.35273006719874e-06, "loss": 1.5307, "step": 466 }, { "epoch": 0.1809504925918246, "grad_norm": 0.13866668939590454, "learning_rate": 9.349669078402638e-06, "loss": 1.5008, "step": 467 }, { "epoch": 0.1813379668800298, "grad_norm": 0.1540013700723648, "learning_rate": 9.346601372197914e-06, "loss": 1.5404, "step": 468 }, { "epoch": 0.18172544116823497, "grad_norm": 0.13482119143009186, "learning_rate": 9.343526953322174e-06, "loss": 1.5176, "step": 469 }, { "epoch": 0.18211291545644018, "grad_norm": 0.14030858874320984, "learning_rate": 9.340445826523399e-06, "loss": 1.5123, "step": 470 }, { "epoch": 0.18250038974464536, "grad_norm": 0.12236494570970535, "learning_rate": 9.337357996559921e-06, "loss": 1.4698, "step": 471 }, { "epoch": 0.18288786403285057, "grad_norm": 0.11665984243154526, "learning_rate": 9.334263468200431e-06, "loss": 1.5088, "step": 472 }, { "epoch": 0.18327533832105575, "grad_norm": 0.13434793055057526, "learning_rate": 9.33116224622396e-06, "loss": 1.5428, "step": 473 }, { "epoch": 0.18366281260926093, "grad_norm": 0.1302552968263626, "learning_rate": 9.328054335419884e-06, "loss": 1.5295, "step": 474 }, { "epoch": 0.18405028689746614, "grad_norm": 0.1329590231180191, "learning_rate": 9.324939740587895e-06, "loss": 1.5237, "step": 475 }, { "epoch": 0.18443776118567132, "grad_norm": 0.12155267596244812, "learning_rate": 9.32181846653802e-06, "loss": 1.5023, "step": 476 }, { "epoch": 0.18482523547387653, "grad_norm": 0.16110683977603912, "learning_rate": 9.318690518090594e-06, "loss": 1.5055, "step": 477 }, { "epoch": 0.1852127097620817, "grad_norm": 0.14214929938316345, "learning_rate": 9.315555900076266e-06, "loss": 1.5092, "step": 478 }, { "epoch": 0.1856001840502869, "grad_norm": 0.11600382626056671, "learning_rate": 9.312414617335978e-06, "loss": 1.5159, "step": 479 }, { "epoch": 0.1859876583384921, "grad_norm": 0.12991060316562653, "learning_rate": 9.30926667472097e-06, "loss": 1.5089, "step": 480 }, { "epoch": 0.18637513262669728, "grad_norm": 0.13809935748577118, "learning_rate": 9.306112077092763e-06, "loss": 1.5505, "step": 481 }, { "epoch": 0.18676260691490248, "grad_norm": 0.14999495446681976, "learning_rate": 9.302950829323159e-06, "loss": 1.5501, "step": 482 }, { "epoch": 0.18715008120310767, "grad_norm": 0.11384769529104233, "learning_rate": 9.299782936294229e-06, "loss": 1.4875, "step": 483 }, { "epoch": 0.18753755549131285, "grad_norm": 0.14327402412891388, "learning_rate": 9.296608402898306e-06, "loss": 1.5231, "step": 484 }, { "epoch": 0.18792502977951805, "grad_norm": 0.1839590221643448, "learning_rate": 9.29342723403798e-06, "loss": 1.5427, "step": 485 }, { "epoch": 0.18831250406772324, "grad_norm": 0.15044182538986206, "learning_rate": 9.290239434626085e-06, "loss": 1.5198, "step": 486 }, { "epoch": 0.18869997835592844, "grad_norm": 0.13686273992061615, "learning_rate": 9.287045009585698e-06, "loss": 1.483, "step": 487 }, { "epoch": 0.18908745264413362, "grad_norm": 0.13232392072677612, "learning_rate": 9.283843963850127e-06, "loss": 1.5788, "step": 488 }, { "epoch": 0.1894749269323388, "grad_norm": 0.15623566508293152, "learning_rate": 9.280636302362906e-06, "loss": 1.453, "step": 489 }, { "epoch": 0.189862401220544, "grad_norm": 0.16038848459720612, "learning_rate": 9.277422030077785e-06, "loss": 1.5599, "step": 490 }, { "epoch": 0.1902498755087492, "grad_norm": 0.1575280874967575, "learning_rate": 9.274201151958722e-06, "loss": 1.4537, "step": 491 }, { "epoch": 0.1906373497969544, "grad_norm": 0.12420469522476196, "learning_rate": 9.270973672979877e-06, "loss": 1.5198, "step": 492 }, { "epoch": 0.19102482408515958, "grad_norm": 0.11954915523529053, "learning_rate": 9.267739598125608e-06, "loss": 1.4995, "step": 493 }, { "epoch": 0.19141229837336476, "grad_norm": 0.10513021051883698, "learning_rate": 9.264498932390455e-06, "loss": 1.5727, "step": 494 }, { "epoch": 0.19179977266156997, "grad_norm": 0.14376065135002136, "learning_rate": 9.261251680779138e-06, "loss": 1.5122, "step": 495 }, { "epoch": 0.19218724694977515, "grad_norm": 0.14796210825443268, "learning_rate": 9.257997848306548e-06, "loss": 1.5332, "step": 496 }, { "epoch": 0.19257472123798036, "grad_norm": 0.12249498814344406, "learning_rate": 9.254737439997738e-06, "loss": 1.5168, "step": 497 }, { "epoch": 0.19296219552618554, "grad_norm": 0.1320476084947586, "learning_rate": 9.251470460887919e-06, "loss": 1.5003, "step": 498 }, { "epoch": 0.19334966981439072, "grad_norm": 0.12523126602172852, "learning_rate": 9.248196916022447e-06, "loss": 1.4654, "step": 499 }, { "epoch": 0.19373714410259593, "grad_norm": 0.14723511040210724, "learning_rate": 9.244916810456822e-06, "loss": 1.5276, "step": 500 }, { "epoch": 0.1941246183908011, "grad_norm": 0.16401898860931396, "learning_rate": 9.241630149256667e-06, "loss": 1.5151, "step": 501 }, { "epoch": 0.19451209267900632, "grad_norm": 0.12988775968551636, "learning_rate": 9.23833693749774e-06, "loss": 1.4706, "step": 502 }, { "epoch": 0.1948995669672115, "grad_norm": 0.14938020706176758, "learning_rate": 9.235037180265908e-06, "loss": 1.5282, "step": 503 }, { "epoch": 0.19528704125541668, "grad_norm": 0.11702051013708115, "learning_rate": 9.231730882657151e-06, "loss": 1.5249, "step": 504 }, { "epoch": 0.1956745155436219, "grad_norm": 0.12596414983272552, "learning_rate": 9.228418049777545e-06, "loss": 1.5534, "step": 505 }, { "epoch": 0.19606198983182707, "grad_norm": 0.12838870286941528, "learning_rate": 9.225098686743263e-06, "loss": 1.4687, "step": 506 }, { "epoch": 0.19644946412003228, "grad_norm": 0.15541799366474152, "learning_rate": 9.221772798680561e-06, "loss": 1.512, "step": 507 }, { "epoch": 0.19683693840823746, "grad_norm": 0.15415669977664948, "learning_rate": 9.218440390725772e-06, "loss": 1.533, "step": 508 }, { "epoch": 0.19722441269644264, "grad_norm": 0.12590350210666656, "learning_rate": 9.215101468025297e-06, "loss": 1.5172, "step": 509 }, { "epoch": 0.19761188698464785, "grad_norm": 0.12386880815029144, "learning_rate": 9.211756035735604e-06, "loss": 1.4874, "step": 510 }, { "epoch": 0.19799936127285303, "grad_norm": 0.11833976954221725, "learning_rate": 9.208404099023202e-06, "loss": 1.4768, "step": 511 }, { "epoch": 0.19838683556105824, "grad_norm": 0.1225903183221817, "learning_rate": 9.205045663064656e-06, "loss": 1.5405, "step": 512 }, { "epoch": 0.19877430984926342, "grad_norm": 0.1387735903263092, "learning_rate": 9.201680733046564e-06, "loss": 1.5322, "step": 513 }, { "epoch": 0.1991617841374686, "grad_norm": 0.12843471765518188, "learning_rate": 9.198309314165553e-06, "loss": 1.5276, "step": 514 }, { "epoch": 0.1995492584256738, "grad_norm": 0.13656151294708252, "learning_rate": 9.194931411628272e-06, "loss": 1.5117, "step": 515 }, { "epoch": 0.199936732713879, "grad_norm": 0.13175992667675018, "learning_rate": 9.191547030651383e-06, "loss": 1.5093, "step": 516 }, { "epoch": 0.2003242070020842, "grad_norm": 0.16088847815990448, "learning_rate": 9.188156176461553e-06, "loss": 1.4848, "step": 517 }, { "epoch": 0.20071168129028938, "grad_norm": 0.16518940031528473, "learning_rate": 9.184758854295444e-06, "loss": 1.5616, "step": 518 }, { "epoch": 0.20109915557849456, "grad_norm": 0.12913592159748077, "learning_rate": 9.181355069399708e-06, "loss": 1.5382, "step": 519 }, { "epoch": 0.20148662986669977, "grad_norm": 0.15035729110240936, "learning_rate": 9.17794482703098e-06, "loss": 1.4807, "step": 520 }, { "epoch": 0.20187410415490495, "grad_norm": 0.20702490210533142, "learning_rate": 9.174528132455866e-06, "loss": 1.4851, "step": 521 }, { "epoch": 0.20226157844311016, "grad_norm": 0.1552804559469223, "learning_rate": 9.171104990950932e-06, "loss": 1.5045, "step": 522 }, { "epoch": 0.20264905273131534, "grad_norm": 0.14050818979740143, "learning_rate": 9.167675407802708e-06, "loss": 1.4749, "step": 523 }, { "epoch": 0.20303652701952052, "grad_norm": 0.12388381361961365, "learning_rate": 9.164239388307668e-06, "loss": 1.5261, "step": 524 }, { "epoch": 0.20342400130772573, "grad_norm": 0.15349926054477692, "learning_rate": 9.160796937772225e-06, "loss": 1.535, "step": 525 }, { "epoch": 0.2038114755959309, "grad_norm": 0.14674416184425354, "learning_rate": 9.157348061512728e-06, "loss": 1.4884, "step": 526 }, { "epoch": 0.20419894988413612, "grad_norm": 0.141618549823761, "learning_rate": 9.153892764855442e-06, "loss": 1.4824, "step": 527 }, { "epoch": 0.2045864241723413, "grad_norm": 0.13604393601417542, "learning_rate": 9.150431053136557e-06, "loss": 1.5261, "step": 528 }, { "epoch": 0.20497389846054648, "grad_norm": 0.15577775239944458, "learning_rate": 9.146962931702164e-06, "loss": 1.5962, "step": 529 }, { "epoch": 0.20536137274875169, "grad_norm": 0.1435626894235611, "learning_rate": 9.143488405908253e-06, "loss": 1.5033, "step": 530 }, { "epoch": 0.20574884703695687, "grad_norm": 0.13713568449020386, "learning_rate": 9.140007481120706e-06, "loss": 1.4706, "step": 531 }, { "epoch": 0.20613632132516208, "grad_norm": 0.1451202780008316, "learning_rate": 9.136520162715288e-06, "loss": 1.5145, "step": 532 }, { "epoch": 0.20652379561336726, "grad_norm": 0.13581117987632751, "learning_rate": 9.133026456077633e-06, "loss": 1.4757, "step": 533 }, { "epoch": 0.20691126990157244, "grad_norm": 0.12992233037948608, "learning_rate": 9.129526366603252e-06, "loss": 1.5211, "step": 534 }, { "epoch": 0.20729874418977765, "grad_norm": 0.13466329872608185, "learning_rate": 9.126019899697498e-06, "loss": 1.5032, "step": 535 }, { "epoch": 0.20768621847798283, "grad_norm": 0.15325535833835602, "learning_rate": 9.122507060775587e-06, "loss": 1.4814, "step": 536 }, { "epoch": 0.20807369276618803, "grad_norm": 0.12059670686721802, "learning_rate": 9.118987855262565e-06, "loss": 1.5288, "step": 537 }, { "epoch": 0.20846116705439321, "grad_norm": 0.1276988685131073, "learning_rate": 9.115462288593318e-06, "loss": 1.4577, "step": 538 }, { "epoch": 0.2088486413425984, "grad_norm": 0.15648451447486877, "learning_rate": 9.111930366212552e-06, "loss": 1.5353, "step": 539 }, { "epoch": 0.2092361156308036, "grad_norm": 0.15963676571846008, "learning_rate": 9.108392093574785e-06, "loss": 1.4433, "step": 540 }, { "epoch": 0.20962358991900878, "grad_norm": 0.12452239543199539, "learning_rate": 9.104847476144352e-06, "loss": 1.5372, "step": 541 }, { "epoch": 0.210011064207214, "grad_norm": 0.16492298245429993, "learning_rate": 9.101296519395377e-06, "loss": 1.4595, "step": 542 }, { "epoch": 0.21039853849541917, "grad_norm": 0.14817661046981812, "learning_rate": 9.09773922881178e-06, "loss": 1.4825, "step": 543 }, { "epoch": 0.21078601278362435, "grad_norm": 0.1377200037240982, "learning_rate": 9.09417560988726e-06, "loss": 1.4557, "step": 544 }, { "epoch": 0.21117348707182956, "grad_norm": 0.11971425265073776, "learning_rate": 9.090605668125285e-06, "loss": 1.474, "step": 545 }, { "epoch": 0.21156096136003474, "grad_norm": 0.15476329624652863, "learning_rate": 9.0870294090391e-06, "loss": 1.4978, "step": 546 }, { "epoch": 0.21194843564823995, "grad_norm": 0.14965951442718506, "learning_rate": 9.083446838151693e-06, "loss": 1.5159, "step": 547 }, { "epoch": 0.21233590993644513, "grad_norm": 0.13011735677719116, "learning_rate": 9.079857960995806e-06, "loss": 1.5109, "step": 548 }, { "epoch": 0.2127233842246503, "grad_norm": 0.15410985052585602, "learning_rate": 9.076262783113921e-06, "loss": 1.4461, "step": 549 }, { "epoch": 0.21311085851285552, "grad_norm": 0.15295490622520447, "learning_rate": 9.072661310058245e-06, "loss": 1.4978, "step": 550 }, { "epoch": 0.2134983328010607, "grad_norm": 0.1336027830839157, "learning_rate": 9.069053547390715e-06, "loss": 1.4465, "step": 551 }, { "epoch": 0.2138858070892659, "grad_norm": 0.1493641883134842, "learning_rate": 9.065439500682972e-06, "loss": 1.5224, "step": 552 }, { "epoch": 0.2142732813774711, "grad_norm": 0.1289372444152832, "learning_rate": 9.06181917551637e-06, "loss": 1.5143, "step": 553 }, { "epoch": 0.21466075566567627, "grad_norm": 0.12323719263076782, "learning_rate": 9.058192577481956e-06, "loss": 1.488, "step": 554 }, { "epoch": 0.21504822995388148, "grad_norm": 0.1621454507112503, "learning_rate": 9.054559712180465e-06, "loss": 1.4945, "step": 555 }, { "epoch": 0.21543570424208666, "grad_norm": 0.1371450275182724, "learning_rate": 9.050920585222309e-06, "loss": 1.4409, "step": 556 }, { "epoch": 0.21582317853029187, "grad_norm": 0.13742724061012268, "learning_rate": 9.047275202227568e-06, "loss": 1.5142, "step": 557 }, { "epoch": 0.21621065281849705, "grad_norm": 0.12594209611415863, "learning_rate": 9.043623568825993e-06, "loss": 1.4921, "step": 558 }, { "epoch": 0.21659812710670226, "grad_norm": 0.12614323198795319, "learning_rate": 9.039965690656976e-06, "loss": 1.4612, "step": 559 }, { "epoch": 0.21698560139490744, "grad_norm": 0.14451727271080017, "learning_rate": 9.036301573369563e-06, "loss": 1.4695, "step": 560 }, { "epoch": 0.21737307568311262, "grad_norm": 0.13207532465457916, "learning_rate": 9.032631222622429e-06, "loss": 1.482, "step": 561 }, { "epoch": 0.21776054997131783, "grad_norm": 0.14257046580314636, "learning_rate": 9.028954644083879e-06, "loss": 1.4466, "step": 562 }, { "epoch": 0.218148024259523, "grad_norm": 0.1421336531639099, "learning_rate": 9.025271843431835e-06, "loss": 1.4904, "step": 563 }, { "epoch": 0.21853549854772822, "grad_norm": 0.12187008559703827, "learning_rate": 9.021582826353825e-06, "loss": 1.4526, "step": 564 }, { "epoch": 0.2189229728359334, "grad_norm": 0.14461256563663483, "learning_rate": 9.017887598546982e-06, "loss": 1.4939, "step": 565 }, { "epoch": 0.21931044712413858, "grad_norm": 0.13996057212352753, "learning_rate": 9.01418616571803e-06, "loss": 1.4837, "step": 566 }, { "epoch": 0.2196979214123438, "grad_norm": 0.1380947083234787, "learning_rate": 9.010478533583271e-06, "loss": 1.472, "step": 567 }, { "epoch": 0.22008539570054897, "grad_norm": 0.1269470900297165, "learning_rate": 9.00676470786859e-06, "loss": 1.514, "step": 568 }, { "epoch": 0.22047286998875418, "grad_norm": 0.12724709510803223, "learning_rate": 9.003044694309424e-06, "loss": 1.5507, "step": 569 }, { "epoch": 0.22086034427695936, "grad_norm": 0.13857878744602203, "learning_rate": 8.99931849865078e-06, "loss": 1.4561, "step": 570 }, { "epoch": 0.22124781856516454, "grad_norm": 0.13088543713092804, "learning_rate": 8.9955861266472e-06, "loss": 1.5093, "step": 571 }, { "epoch": 0.22163529285336975, "grad_norm": 0.14374110102653503, "learning_rate": 8.991847584062776e-06, "loss": 1.4929, "step": 572 }, { "epoch": 0.22202276714157493, "grad_norm": 0.1413789838552475, "learning_rate": 8.988102876671121e-06, "loss": 1.4787, "step": 573 }, { "epoch": 0.22241024142978014, "grad_norm": 0.13590063154697418, "learning_rate": 8.98435201025537e-06, "loss": 1.459, "step": 574 }, { "epoch": 0.22279771571798532, "grad_norm": 0.16497163474559784, "learning_rate": 8.980594990608173e-06, "loss": 1.4812, "step": 575 }, { "epoch": 0.2231851900061905, "grad_norm": 0.15340548753738403, "learning_rate": 8.976831823531683e-06, "loss": 1.4963, "step": 576 }, { "epoch": 0.2235726642943957, "grad_norm": 0.1470334529876709, "learning_rate": 8.97306251483754e-06, "loss": 1.5081, "step": 577 }, { "epoch": 0.2239601385826009, "grad_norm": 0.13579712808132172, "learning_rate": 8.969287070346876e-06, "loss": 1.5039, "step": 578 }, { "epoch": 0.2243476128708061, "grad_norm": 0.1342238187789917, "learning_rate": 8.965505495890297e-06, "loss": 1.5045, "step": 579 }, { "epoch": 0.22473508715901128, "grad_norm": 0.1456836313009262, "learning_rate": 8.961717797307872e-06, "loss": 1.5026, "step": 580 }, { "epoch": 0.22512256144721646, "grad_norm": 0.14345291256904602, "learning_rate": 8.957923980449135e-06, "loss": 1.4872, "step": 581 }, { "epoch": 0.22551003573542167, "grad_norm": 0.14995050430297852, "learning_rate": 8.95412405117306e-06, "loss": 1.4621, "step": 582 }, { "epoch": 0.22589751002362685, "grad_norm": 0.1419757902622223, "learning_rate": 8.95031801534807e-06, "loss": 1.5154, "step": 583 }, { "epoch": 0.22628498431183205, "grad_norm": 0.14974752068519592, "learning_rate": 8.946505878852011e-06, "loss": 1.5098, "step": 584 }, { "epoch": 0.22667245860003724, "grad_norm": 0.13485178351402283, "learning_rate": 8.942687647572154e-06, "loss": 1.4898, "step": 585 }, { "epoch": 0.22705993288824242, "grad_norm": 0.14423887431621552, "learning_rate": 8.938863327405182e-06, "loss": 1.4586, "step": 586 }, { "epoch": 0.22744740717644762, "grad_norm": 0.1345190405845642, "learning_rate": 8.935032924257182e-06, "loss": 1.4575, "step": 587 }, { "epoch": 0.2278348814646528, "grad_norm": 0.13593006134033203, "learning_rate": 8.931196444043635e-06, "loss": 1.4776, "step": 588 }, { "epoch": 0.228222355752858, "grad_norm": 0.15527686476707458, "learning_rate": 8.927353892689406e-06, "loss": 1.4544, "step": 589 }, { "epoch": 0.2286098300410632, "grad_norm": 0.16446349024772644, "learning_rate": 8.923505276128735e-06, "loss": 1.4621, "step": 590 }, { "epoch": 0.22899730432926838, "grad_norm": 0.12487782537937164, "learning_rate": 8.919650600305233e-06, "loss": 1.4394, "step": 591 }, { "epoch": 0.22938477861747358, "grad_norm": 0.1349981427192688, "learning_rate": 8.915789871171865e-06, "loss": 1.4505, "step": 592 }, { "epoch": 0.22977225290567876, "grad_norm": 0.14325951039791107, "learning_rate": 8.911923094690946e-06, "loss": 1.5293, "step": 593 }, { "epoch": 0.23015972719388397, "grad_norm": 0.1573743373155594, "learning_rate": 8.90805027683413e-06, "loss": 1.4621, "step": 594 }, { "epoch": 0.23054720148208915, "grad_norm": 0.1543186455965042, "learning_rate": 8.904171423582399e-06, "loss": 1.462, "step": 595 }, { "epoch": 0.23093467577029433, "grad_norm": 0.14742733538150787, "learning_rate": 8.900286540926062e-06, "loss": 1.4823, "step": 596 }, { "epoch": 0.23132215005849954, "grad_norm": 0.16544435918331146, "learning_rate": 8.89639563486473e-06, "loss": 1.4439, "step": 597 }, { "epoch": 0.23170962434670472, "grad_norm": 0.1458619385957718, "learning_rate": 8.892498711407322e-06, "loss": 1.5196, "step": 598 }, { "epoch": 0.23209709863490993, "grad_norm": 0.13703875243663788, "learning_rate": 8.88859577657205e-06, "loss": 1.4744, "step": 599 }, { "epoch": 0.2324845729231151, "grad_norm": 0.13998126983642578, "learning_rate": 8.884686836386412e-06, "loss": 1.5568, "step": 600 }, { "epoch": 0.2328720472113203, "grad_norm": 0.16143262386322021, "learning_rate": 8.880771896887172e-06, "loss": 1.5163, "step": 601 }, { "epoch": 0.2332595214995255, "grad_norm": 0.15794111788272858, "learning_rate": 8.876850964120368e-06, "loss": 1.5142, "step": 602 }, { "epoch": 0.23364699578773068, "grad_norm": 0.14832884073257446, "learning_rate": 8.872924044141289e-06, "loss": 1.427, "step": 603 }, { "epoch": 0.2340344700759359, "grad_norm": 0.1527061015367508, "learning_rate": 8.868991143014469e-06, "loss": 1.4305, "step": 604 }, { "epoch": 0.23442194436414107, "grad_norm": 0.16056810319423676, "learning_rate": 8.865052266813686e-06, "loss": 1.5049, "step": 605 }, { "epoch": 0.23480941865234625, "grad_norm": 0.14100827276706696, "learning_rate": 8.861107421621937e-06, "loss": 1.5467, "step": 606 }, { "epoch": 0.23519689294055146, "grad_norm": 0.13917477428913116, "learning_rate": 8.857156613531441e-06, "loss": 1.5104, "step": 607 }, { "epoch": 0.23558436722875664, "grad_norm": 0.12366581708192825, "learning_rate": 8.85319984864363e-06, "loss": 1.4233, "step": 608 }, { "epoch": 0.23597184151696185, "grad_norm": 0.1443452388048172, "learning_rate": 8.849237133069126e-06, "loss": 1.4769, "step": 609 }, { "epoch": 0.23635931580516703, "grad_norm": 0.12975351512432098, "learning_rate": 8.84526847292775e-06, "loss": 1.5083, "step": 610 }, { "epoch": 0.2367467900933722, "grad_norm": 0.1500852406024933, "learning_rate": 8.841293874348498e-06, "loss": 1.4746, "step": 611 }, { "epoch": 0.23713426438157742, "grad_norm": 0.1399647295475006, "learning_rate": 8.83731334346954e-06, "loss": 1.45, "step": 612 }, { "epoch": 0.2375217386697826, "grad_norm": 0.1576841026544571, "learning_rate": 8.833326886438206e-06, "loss": 1.5056, "step": 613 }, { "epoch": 0.2379092129579878, "grad_norm": 0.13910789787769318, "learning_rate": 8.829334509410978e-06, "loss": 1.4708, "step": 614 }, { "epoch": 0.238296687246193, "grad_norm": 0.15307946503162384, "learning_rate": 8.82533621855348e-06, "loss": 1.5386, "step": 615 }, { "epoch": 0.23868416153439817, "grad_norm": 0.14970731735229492, "learning_rate": 8.82133202004047e-06, "loss": 1.507, "step": 616 }, { "epoch": 0.23907163582260338, "grad_norm": 0.17096832394599915, "learning_rate": 8.81732192005583e-06, "loss": 1.502, "step": 617 }, { "epoch": 0.23945911011080856, "grad_norm": 0.13916252553462982, "learning_rate": 8.813305924792557e-06, "loss": 1.4829, "step": 618 }, { "epoch": 0.23984658439901377, "grad_norm": 0.1557036191225052, "learning_rate": 8.809284040452747e-06, "loss": 1.4667, "step": 619 }, { "epoch": 0.24023405868721895, "grad_norm": 0.1555638313293457, "learning_rate": 8.805256273247597e-06, "loss": 1.4985, "step": 620 }, { "epoch": 0.24062153297542413, "grad_norm": 0.15950115025043488, "learning_rate": 8.801222629397388e-06, "loss": 1.5124, "step": 621 }, { "epoch": 0.24100900726362934, "grad_norm": 0.15124039351940155, "learning_rate": 8.79718311513147e-06, "loss": 1.4665, "step": 622 }, { "epoch": 0.24139648155183452, "grad_norm": 0.1373637616634369, "learning_rate": 8.793137736688271e-06, "loss": 1.4819, "step": 623 }, { "epoch": 0.24178395584003973, "grad_norm": 0.1423335075378418, "learning_rate": 8.789086500315264e-06, "loss": 1.505, "step": 624 }, { "epoch": 0.2421714301282449, "grad_norm": 0.13600526750087738, "learning_rate": 8.785029412268975e-06, "loss": 1.4627, "step": 625 }, { "epoch": 0.2425589044164501, "grad_norm": 0.151510089635849, "learning_rate": 8.780966478814963e-06, "loss": 1.4534, "step": 626 }, { "epoch": 0.2429463787046553, "grad_norm": 0.14740657806396484, "learning_rate": 8.77689770622782e-06, "loss": 1.4648, "step": 627 }, { "epoch": 0.24333385299286048, "grad_norm": 0.16135695576667786, "learning_rate": 8.772823100791152e-06, "loss": 1.4972, "step": 628 }, { "epoch": 0.24372132728106569, "grad_norm": 0.12826986610889435, "learning_rate": 8.76874266879757e-06, "loss": 1.4632, "step": 629 }, { "epoch": 0.24410880156927087, "grad_norm": 0.19150403141975403, "learning_rate": 8.76465641654869e-06, "loss": 1.4829, "step": 630 }, { "epoch": 0.24449627585747605, "grad_norm": 0.13565745949745178, "learning_rate": 8.76056435035511e-06, "loss": 1.4806, "step": 631 }, { "epoch": 0.24488375014568126, "grad_norm": 0.14396914839744568, "learning_rate": 8.756466476536413e-06, "loss": 1.4662, "step": 632 }, { "epoch": 0.24527122443388644, "grad_norm": 0.14467595517635345, "learning_rate": 8.752362801421145e-06, "loss": 1.5059, "step": 633 }, { "epoch": 0.24565869872209165, "grad_norm": 0.13789378106594086, "learning_rate": 8.748253331346816e-06, "loss": 1.4691, "step": 634 }, { "epoch": 0.24604617301029683, "grad_norm": 0.1595793217420578, "learning_rate": 8.744138072659881e-06, "loss": 1.5174, "step": 635 }, { "epoch": 0.246433647298502, "grad_norm": 0.14735355973243713, "learning_rate": 8.74001703171574e-06, "loss": 1.4815, "step": 636 }, { "epoch": 0.24682112158670722, "grad_norm": 0.12479910254478455, "learning_rate": 8.735890214878718e-06, "loss": 1.4527, "step": 637 }, { "epoch": 0.2472085958749124, "grad_norm": 0.13751129806041718, "learning_rate": 8.731757628522065e-06, "loss": 1.4714, "step": 638 }, { "epoch": 0.2475960701631176, "grad_norm": 0.1430538296699524, "learning_rate": 8.727619279027933e-06, "loss": 1.4882, "step": 639 }, { "epoch": 0.24798354445132278, "grad_norm": 0.13666033744812012, "learning_rate": 8.723475172787381e-06, "loss": 1.5187, "step": 640 }, { "epoch": 0.24837101873952797, "grad_norm": 0.14606216549873352, "learning_rate": 8.71932531620036e-06, "loss": 1.5195, "step": 641 }, { "epoch": 0.24875849302773317, "grad_norm": 0.13632288575172424, "learning_rate": 8.715169715675694e-06, "loss": 1.4182, "step": 642 }, { "epoch": 0.24914596731593835, "grad_norm": 0.15077637135982513, "learning_rate": 8.711008377631085e-06, "loss": 1.5027, "step": 643 }, { "epoch": 0.24953344160414356, "grad_norm": 0.15140017867088318, "learning_rate": 8.706841308493092e-06, "loss": 1.5414, "step": 644 }, { "epoch": 0.24992091589234874, "grad_norm": 0.1380661129951477, "learning_rate": 8.702668514697124e-06, "loss": 1.4854, "step": 645 }, { "epoch": 0.25030839018055395, "grad_norm": 0.154111847281456, "learning_rate": 8.698490002687435e-06, "loss": 1.4683, "step": 646 }, { "epoch": 0.25069586446875913, "grad_norm": 0.1327594816684723, "learning_rate": 8.694305778917107e-06, "loss": 1.4647, "step": 647 }, { "epoch": 0.2510833387569643, "grad_norm": 0.13395848870277405, "learning_rate": 8.69011584984804e-06, "loss": 1.5006, "step": 648 }, { "epoch": 0.2514708130451695, "grad_norm": 0.13487151265144348, "learning_rate": 8.685920221950951e-06, "loss": 1.4665, "step": 649 }, { "epoch": 0.25185828733337473, "grad_norm": 0.1310337781906128, "learning_rate": 8.681718901705354e-06, "loss": 1.4608, "step": 650 }, { "epoch": 0.2522457616215799, "grad_norm": 0.15542523562908173, "learning_rate": 8.677511895599558e-06, "loss": 1.4795, "step": 651 }, { "epoch": 0.2526332359097851, "grad_norm": 0.1488679051399231, "learning_rate": 8.673299210130647e-06, "loss": 1.4902, "step": 652 }, { "epoch": 0.2530207101979903, "grad_norm": 0.1472606062889099, "learning_rate": 8.66908085180448e-06, "loss": 1.5003, "step": 653 }, { "epoch": 0.25340818448619545, "grad_norm": 0.1243843138217926, "learning_rate": 8.664856827135678e-06, "loss": 1.4621, "step": 654 }, { "epoch": 0.2537956587744007, "grad_norm": 0.16722583770751953, "learning_rate": 8.660627142647606e-06, "loss": 1.4907, "step": 655 }, { "epoch": 0.25418313306260587, "grad_norm": 0.14107263088226318, "learning_rate": 8.656391804872376e-06, "loss": 1.4863, "step": 656 }, { "epoch": 0.25457060735081105, "grad_norm": 0.15278343856334686, "learning_rate": 8.652150820350835e-06, "loss": 1.4617, "step": 657 }, { "epoch": 0.25495808163901623, "grad_norm": 0.1562173217535019, "learning_rate": 8.647904195632537e-06, "loss": 1.4604, "step": 658 }, { "epoch": 0.2553455559272214, "grad_norm": 0.14969122409820557, "learning_rate": 8.643651937275759e-06, "loss": 1.4158, "step": 659 }, { "epoch": 0.25573303021542665, "grad_norm": 0.16635915637016296, "learning_rate": 8.639394051847472e-06, "loss": 1.4725, "step": 660 }, { "epoch": 0.25612050450363183, "grad_norm": 0.1554764211177826, "learning_rate": 8.635130545923338e-06, "loss": 1.4855, "step": 661 }, { "epoch": 0.256507978791837, "grad_norm": 0.13997481763362885, "learning_rate": 8.630861426087697e-06, "loss": 1.4782, "step": 662 }, { "epoch": 0.2568954530800422, "grad_norm": 0.1723310798406601, "learning_rate": 8.626586698933568e-06, "loss": 1.4422, "step": 663 }, { "epoch": 0.25728292736824737, "grad_norm": 0.16526159644126892, "learning_rate": 8.622306371062619e-06, "loss": 1.4388, "step": 664 }, { "epoch": 0.2576704016564526, "grad_norm": 0.15530291199684143, "learning_rate": 8.618020449085172e-06, "loss": 1.4628, "step": 665 }, { "epoch": 0.2580578759446578, "grad_norm": 0.15494416654109955, "learning_rate": 8.613728939620188e-06, "loss": 1.431, "step": 666 }, { "epoch": 0.25844535023286297, "grad_norm": 0.15847663581371307, "learning_rate": 8.609431849295256e-06, "loss": 1.4463, "step": 667 }, { "epoch": 0.25883282452106815, "grad_norm": 0.1586245596408844, "learning_rate": 8.605129184746586e-06, "loss": 1.504, "step": 668 }, { "epoch": 0.25922029880927333, "grad_norm": 0.14400990307331085, "learning_rate": 8.600820952618993e-06, "loss": 1.4396, "step": 669 }, { "epoch": 0.25960777309747857, "grad_norm": 0.1336282640695572, "learning_rate": 8.596507159565897e-06, "loss": 1.4535, "step": 670 }, { "epoch": 0.25999524738568375, "grad_norm": 0.15047763288021088, "learning_rate": 8.592187812249297e-06, "loss": 1.4932, "step": 671 }, { "epoch": 0.26038272167388893, "grad_norm": 0.12953871488571167, "learning_rate": 8.587862917339776e-06, "loss": 1.4636, "step": 672 }, { "epoch": 0.2607701959620941, "grad_norm": 0.14853544533252716, "learning_rate": 8.583532481516483e-06, "loss": 1.4269, "step": 673 }, { "epoch": 0.2611576702502993, "grad_norm": 0.14879341423511505, "learning_rate": 8.579196511467123e-06, "loss": 1.453, "step": 674 }, { "epoch": 0.2615451445385045, "grad_norm": 0.14248977601528168, "learning_rate": 8.574855013887949e-06, "loss": 1.4624, "step": 675 }, { "epoch": 0.2619326188267097, "grad_norm": 0.13511423766613007, "learning_rate": 8.57050799548375e-06, "loss": 1.4452, "step": 676 }, { "epoch": 0.2623200931149149, "grad_norm": 0.1529817432165146, "learning_rate": 8.566155462967841e-06, "loss": 1.4839, "step": 677 }, { "epoch": 0.26270756740312007, "grad_norm": 0.1388455331325531, "learning_rate": 8.561797423062052e-06, "loss": 1.4892, "step": 678 }, { "epoch": 0.26309504169132525, "grad_norm": 0.15706799924373627, "learning_rate": 8.55743388249672e-06, "loss": 1.5413, "step": 679 }, { "epoch": 0.2634825159795305, "grad_norm": 0.1351846605539322, "learning_rate": 8.553064848010677e-06, "loss": 1.5188, "step": 680 }, { "epoch": 0.26386999026773567, "grad_norm": 0.1319943219423294, "learning_rate": 8.548690326351235e-06, "loss": 1.4954, "step": 681 }, { "epoch": 0.26425746455594085, "grad_norm": 0.13971370458602905, "learning_rate": 8.544310324274188e-06, "loss": 1.472, "step": 682 }, { "epoch": 0.264644938844146, "grad_norm": 0.1496502012014389, "learning_rate": 8.539924848543786e-06, "loss": 1.4922, "step": 683 }, { "epoch": 0.2650324131323512, "grad_norm": 0.18633179366588593, "learning_rate": 8.535533905932739e-06, "loss": 1.509, "step": 684 }, { "epoch": 0.26541988742055644, "grad_norm": 0.1360493004322052, "learning_rate": 8.531137503222193e-06, "loss": 1.425, "step": 685 }, { "epoch": 0.2658073617087616, "grad_norm": 0.1508435159921646, "learning_rate": 8.526735647201732e-06, "loss": 1.5021, "step": 686 }, { "epoch": 0.2661948359969668, "grad_norm": 0.1471691131591797, "learning_rate": 8.522328344669358e-06, "loss": 1.4936, "step": 687 }, { "epoch": 0.266582310285172, "grad_norm": 0.1516815423965454, "learning_rate": 8.517915602431485e-06, "loss": 1.438, "step": 688 }, { "epoch": 0.26696978457337717, "grad_norm": 0.14342458546161652, "learning_rate": 8.513497427302932e-06, "loss": 1.4689, "step": 689 }, { "epoch": 0.2673572588615824, "grad_norm": 0.1402633935213089, "learning_rate": 8.509073826106902e-06, "loss": 1.4916, "step": 690 }, { "epoch": 0.2677447331497876, "grad_norm": 0.13995106518268585, "learning_rate": 8.504644805674983e-06, "loss": 1.4851, "step": 691 }, { "epoch": 0.26813220743799276, "grad_norm": 0.14614573121070862, "learning_rate": 8.500210372847128e-06, "loss": 1.5058, "step": 692 }, { "epoch": 0.26851968172619795, "grad_norm": 0.15533696115016937, "learning_rate": 8.495770534471651e-06, "loss": 1.4716, "step": 693 }, { "epoch": 0.2689071560144031, "grad_norm": 0.13189584016799927, "learning_rate": 8.491325297405212e-06, "loss": 1.4996, "step": 694 }, { "epoch": 0.26929463030260836, "grad_norm": 0.14488151669502258, "learning_rate": 8.486874668512812e-06, "loss": 1.4022, "step": 695 }, { "epoch": 0.26968210459081354, "grad_norm": 0.14596489071846008, "learning_rate": 8.482418654667777e-06, "loss": 1.5014, "step": 696 }, { "epoch": 0.2700695788790187, "grad_norm": 0.15570686757564545, "learning_rate": 8.477957262751748e-06, "loss": 1.5297, "step": 697 }, { "epoch": 0.2704570531672239, "grad_norm": 0.15529954433441162, "learning_rate": 8.473490499654672e-06, "loss": 1.4749, "step": 698 }, { "epoch": 0.2708445274554291, "grad_norm": 0.17491869628429413, "learning_rate": 8.469018372274793e-06, "loss": 1.4836, "step": 699 }, { "epoch": 0.2712320017436343, "grad_norm": 0.1494663506746292, "learning_rate": 8.464540887518638e-06, "loss": 1.4703, "step": 700 }, { "epoch": 0.2716194760318395, "grad_norm": 0.15739183127880096, "learning_rate": 8.460058052301007e-06, "loss": 1.4974, "step": 701 }, { "epoch": 0.2720069503200447, "grad_norm": 0.15465249121189117, "learning_rate": 8.455569873544966e-06, "loss": 1.4977, "step": 702 }, { "epoch": 0.27239442460824986, "grad_norm": 0.16172809898853302, "learning_rate": 8.451076358181828e-06, "loss": 1.4444, "step": 703 }, { "epoch": 0.27278189889645504, "grad_norm": 0.14356407523155212, "learning_rate": 8.44657751315115e-06, "loss": 1.4505, "step": 704 }, { "epoch": 0.2731693731846603, "grad_norm": 0.1387721598148346, "learning_rate": 8.442073345400725e-06, "loss": 1.4988, "step": 705 }, { "epoch": 0.27355684747286546, "grad_norm": 0.16691885888576508, "learning_rate": 8.437563861886557e-06, "loss": 1.472, "step": 706 }, { "epoch": 0.27394432176107064, "grad_norm": 0.14912067353725433, "learning_rate": 8.433049069572865e-06, "loss": 1.4839, "step": 707 }, { "epoch": 0.2743317960492758, "grad_norm": 0.144108846783638, "learning_rate": 8.428528975432067e-06, "loss": 1.4443, "step": 708 }, { "epoch": 0.274719270337481, "grad_norm": 0.16965490579605103, "learning_rate": 8.424003586444766e-06, "loss": 1.4257, "step": 709 }, { "epoch": 0.27510674462568624, "grad_norm": 0.1590406894683838, "learning_rate": 8.419472909599744e-06, "loss": 1.4589, "step": 710 }, { "epoch": 0.2754942189138914, "grad_norm": 0.1538558304309845, "learning_rate": 8.414936951893949e-06, "loss": 1.4689, "step": 711 }, { "epoch": 0.2758816932020966, "grad_norm": 0.19440090656280518, "learning_rate": 8.410395720332485e-06, "loss": 1.4534, "step": 712 }, { "epoch": 0.2762691674903018, "grad_norm": 0.15675346553325653, "learning_rate": 8.4058492219286e-06, "loss": 1.4634, "step": 713 }, { "epoch": 0.27665664177850696, "grad_norm": 0.1466962993144989, "learning_rate": 8.401297463703673e-06, "loss": 1.4272, "step": 714 }, { "epoch": 0.2770441160667122, "grad_norm": 0.1594041883945465, "learning_rate": 8.39674045268721e-06, "loss": 1.5013, "step": 715 }, { "epoch": 0.2774315903549174, "grad_norm": 0.16060969233512878, "learning_rate": 8.392178195916832e-06, "loss": 1.5296, "step": 716 }, { "epoch": 0.27781906464312256, "grad_norm": 0.13957712054252625, "learning_rate": 8.387610700438254e-06, "loss": 1.4974, "step": 717 }, { "epoch": 0.27820653893132774, "grad_norm": 0.16962507367134094, "learning_rate": 8.383037973305284e-06, "loss": 1.4671, "step": 718 }, { "epoch": 0.2785940132195329, "grad_norm": 0.14929309487342834, "learning_rate": 8.378460021579811e-06, "loss": 1.4672, "step": 719 }, { "epoch": 0.27898148750773816, "grad_norm": 0.1428220570087433, "learning_rate": 8.373876852331793e-06, "loss": 1.4501, "step": 720 }, { "epoch": 0.27936896179594334, "grad_norm": 0.1678919792175293, "learning_rate": 8.369288472639243e-06, "loss": 1.4031, "step": 721 }, { "epoch": 0.2797564360841485, "grad_norm": 0.1527702659368515, "learning_rate": 8.364694889588223e-06, "loss": 1.4674, "step": 722 }, { "epoch": 0.2801439103723537, "grad_norm": 0.13663198053836823, "learning_rate": 8.36009611027283e-06, "loss": 1.4498, "step": 723 }, { "epoch": 0.2805313846605589, "grad_norm": 0.16356690227985382, "learning_rate": 8.355492141795185e-06, "loss": 1.4737, "step": 724 }, { "epoch": 0.2809188589487641, "grad_norm": 0.13592801988124847, "learning_rate": 8.350882991265423e-06, "loss": 1.4724, "step": 725 }, { "epoch": 0.2813063332369693, "grad_norm": 0.14242208003997803, "learning_rate": 8.346268665801687e-06, "loss": 1.4536, "step": 726 }, { "epoch": 0.2816938075251745, "grad_norm": 0.14208894968032837, "learning_rate": 8.341649172530102e-06, "loss": 1.4936, "step": 727 }, { "epoch": 0.28208128181337966, "grad_norm": 0.148577481508255, "learning_rate": 8.337024518584782e-06, "loss": 1.4909, "step": 728 }, { "epoch": 0.28246875610158484, "grad_norm": 0.15973429381847382, "learning_rate": 8.332394711107809e-06, "loss": 1.4916, "step": 729 }, { "epoch": 0.2828562303897901, "grad_norm": 0.1549791693687439, "learning_rate": 8.327759757249221e-06, "loss": 1.4049, "step": 730 }, { "epoch": 0.28324370467799526, "grad_norm": 0.15074998140335083, "learning_rate": 8.32311966416701e-06, "loss": 1.5095, "step": 731 }, { "epoch": 0.28363117896620044, "grad_norm": 0.14616377651691437, "learning_rate": 8.318474439027096e-06, "loss": 1.4872, "step": 732 }, { "epoch": 0.2840186532544056, "grad_norm": 0.14417921006679535, "learning_rate": 8.313824089003331e-06, "loss": 1.4424, "step": 733 }, { "epoch": 0.2844061275426108, "grad_norm": 0.15317051112651825, "learning_rate": 8.309168621277482e-06, "loss": 1.428, "step": 734 }, { "epoch": 0.28479360183081603, "grad_norm": 0.15252573788166046, "learning_rate": 8.304508043039216e-06, "loss": 1.4946, "step": 735 }, { "epoch": 0.2851810761190212, "grad_norm": 0.14861930906772614, "learning_rate": 8.299842361486094e-06, "loss": 1.4475, "step": 736 }, { "epoch": 0.2855685504072264, "grad_norm": 0.13927814364433289, "learning_rate": 8.295171583823558e-06, "loss": 1.461, "step": 737 }, { "epoch": 0.2859560246954316, "grad_norm": 0.15012970566749573, "learning_rate": 8.29049571726492e-06, "loss": 1.5051, "step": 738 }, { "epoch": 0.28634349898363676, "grad_norm": 0.1529163271188736, "learning_rate": 8.285814769031355e-06, "loss": 1.4347, "step": 739 }, { "epoch": 0.286730973271842, "grad_norm": 0.15904228389263153, "learning_rate": 8.281128746351878e-06, "loss": 1.4359, "step": 740 }, { "epoch": 0.2871184475600472, "grad_norm": 0.16519121825695038, "learning_rate": 8.276437656463347e-06, "loss": 1.4979, "step": 741 }, { "epoch": 0.28750592184825235, "grad_norm": 0.1469881236553192, "learning_rate": 8.271741506610444e-06, "loss": 1.459, "step": 742 }, { "epoch": 0.28789339613645754, "grad_norm": 0.1401979774236679, "learning_rate": 8.267040304045666e-06, "loss": 1.4732, "step": 743 }, { "epoch": 0.2882808704246627, "grad_norm": 0.1600959748029709, "learning_rate": 8.262334056029309e-06, "loss": 1.4553, "step": 744 }, { "epoch": 0.28866834471286795, "grad_norm": 0.1479867696762085, "learning_rate": 8.257622769829466e-06, "loss": 1.5212, "step": 745 }, { "epoch": 0.28905581900107313, "grad_norm": 0.1361788809299469, "learning_rate": 8.252906452722007e-06, "loss": 1.4459, "step": 746 }, { "epoch": 0.2894432932892783, "grad_norm": 0.1438966691493988, "learning_rate": 8.248185111990575e-06, "loss": 1.4408, "step": 747 }, { "epoch": 0.2898307675774835, "grad_norm": 0.1523556411266327, "learning_rate": 8.24345875492657e-06, "loss": 1.4307, "step": 748 }, { "epoch": 0.2902182418656887, "grad_norm": 0.14753277599811554, "learning_rate": 8.238727388829138e-06, "loss": 1.4394, "step": 749 }, { "epoch": 0.2906057161538939, "grad_norm": 0.14122191071510315, "learning_rate": 8.233991021005162e-06, "loss": 1.4957, "step": 750 }, { "epoch": 0.2909931904420991, "grad_norm": 0.1722385734319687, "learning_rate": 8.229249658769244e-06, "loss": 1.4016, "step": 751 }, { "epoch": 0.2913806647303043, "grad_norm": 0.1475449949502945, "learning_rate": 8.22450330944371e-06, "loss": 1.408, "step": 752 }, { "epoch": 0.29176813901850945, "grad_norm": 0.15873020887374878, "learning_rate": 8.219751980358577e-06, "loss": 1.4723, "step": 753 }, { "epoch": 0.29215561330671463, "grad_norm": 0.13740164041519165, "learning_rate": 8.21499567885156e-06, "loss": 1.4608, "step": 754 }, { "epoch": 0.29254308759491987, "grad_norm": 0.1414264440536499, "learning_rate": 8.210234412268048e-06, "loss": 1.4438, "step": 755 }, { "epoch": 0.29293056188312505, "grad_norm": 0.1648436039686203, "learning_rate": 8.2054681879611e-06, "loss": 1.434, "step": 756 }, { "epoch": 0.29331803617133023, "grad_norm": 0.1731117218732834, "learning_rate": 8.200697013291434e-06, "loss": 1.445, "step": 757 }, { "epoch": 0.2937055104595354, "grad_norm": 0.17596910893917084, "learning_rate": 8.195920895627409e-06, "loss": 1.4813, "step": 758 }, { "epoch": 0.2940929847477406, "grad_norm": 0.14132454991340637, "learning_rate": 8.191139842345017e-06, "loss": 1.4939, "step": 759 }, { "epoch": 0.29448045903594583, "grad_norm": 0.14825333654880524, "learning_rate": 8.186353860827878e-06, "loss": 1.4468, "step": 760 }, { "epoch": 0.294867933324151, "grad_norm": 0.16069425642490387, "learning_rate": 8.181562958467217e-06, "loss": 1.4257, "step": 761 }, { "epoch": 0.2952554076123562, "grad_norm": 0.15000644326210022, "learning_rate": 8.176767142661862e-06, "loss": 1.4828, "step": 762 }, { "epoch": 0.29564288190056137, "grad_norm": 0.170976459980011, "learning_rate": 8.171966420818227e-06, "loss": 1.472, "step": 763 }, { "epoch": 0.29603035618876655, "grad_norm": 0.16561229526996613, "learning_rate": 8.167160800350306e-06, "loss": 1.4414, "step": 764 }, { "epoch": 0.2964178304769718, "grad_norm": 0.15158337354660034, "learning_rate": 8.162350288679655e-06, "loss": 1.4449, "step": 765 }, { "epoch": 0.29680530476517697, "grad_norm": 0.1524280607700348, "learning_rate": 8.157534893235383e-06, "loss": 1.45, "step": 766 }, { "epoch": 0.29719277905338215, "grad_norm": 0.16068504750728607, "learning_rate": 8.152714621454144e-06, "loss": 1.49, "step": 767 }, { "epoch": 0.29758025334158733, "grad_norm": 0.14728783071041107, "learning_rate": 8.147889480780121e-06, "loss": 1.4576, "step": 768 }, { "epoch": 0.2979677276297925, "grad_norm": 0.1427835077047348, "learning_rate": 8.143059478665018e-06, "loss": 1.4045, "step": 769 }, { "epoch": 0.29835520191799775, "grad_norm": 0.1369456797838211, "learning_rate": 8.138224622568046e-06, "loss": 1.4361, "step": 770 }, { "epoch": 0.29874267620620293, "grad_norm": 0.16593009233474731, "learning_rate": 8.13338491995591e-06, "loss": 1.4876, "step": 771 }, { "epoch": 0.2991301504944081, "grad_norm": 0.16165055334568024, "learning_rate": 8.1285403783028e-06, "loss": 1.4822, "step": 772 }, { "epoch": 0.2995176247826133, "grad_norm": 0.15383972227573395, "learning_rate": 8.123691005090385e-06, "loss": 1.48, "step": 773 }, { "epoch": 0.29990509907081847, "grad_norm": 0.1597556620836258, "learning_rate": 8.118836807807791e-06, "loss": 1.4365, "step": 774 }, { "epoch": 0.3002925733590237, "grad_norm": 0.14866268634796143, "learning_rate": 8.11397779395159e-06, "loss": 1.4114, "step": 775 }, { "epoch": 0.3006800476472289, "grad_norm": 0.1493736058473587, "learning_rate": 8.109113971025803e-06, "loss": 1.4799, "step": 776 }, { "epoch": 0.30106752193543407, "grad_norm": 0.1422111839056015, "learning_rate": 8.104245346541868e-06, "loss": 1.4611, "step": 777 }, { "epoch": 0.30145499622363925, "grad_norm": 0.15822738409042358, "learning_rate": 8.099371928018643e-06, "loss": 1.4846, "step": 778 }, { "epoch": 0.30184247051184443, "grad_norm": 0.15854236483573914, "learning_rate": 8.094493722982386e-06, "loss": 1.4834, "step": 779 }, { "epoch": 0.30222994480004967, "grad_norm": 0.14811210334300995, "learning_rate": 8.089610738966754e-06, "loss": 1.4584, "step": 780 }, { "epoch": 0.30261741908825485, "grad_norm": 0.13197796046733856, "learning_rate": 8.084722983512778e-06, "loss": 1.4617, "step": 781 }, { "epoch": 0.30300489337646, "grad_norm": 0.16184119880199432, "learning_rate": 8.079830464168862e-06, "loss": 1.4427, "step": 782 }, { "epoch": 0.3033923676646652, "grad_norm": 0.16323141753673553, "learning_rate": 8.074933188490763e-06, "loss": 1.4924, "step": 783 }, { "epoch": 0.3037798419528704, "grad_norm": 0.1657770276069641, "learning_rate": 8.070031164041585e-06, "loss": 1.4553, "step": 784 }, { "epoch": 0.3041673162410756, "grad_norm": 0.15447205305099487, "learning_rate": 8.065124398391768e-06, "loss": 1.4676, "step": 785 }, { "epoch": 0.3045547905292808, "grad_norm": 0.14583422243595123, "learning_rate": 8.060212899119072e-06, "loss": 1.4893, "step": 786 }, { "epoch": 0.304942264817486, "grad_norm": 0.15187671780586243, "learning_rate": 8.055296673808567e-06, "loss": 1.533, "step": 787 }, { "epoch": 0.30532973910569117, "grad_norm": 0.16394977271556854, "learning_rate": 8.050375730052622e-06, "loss": 1.4094, "step": 788 }, { "epoch": 0.30571721339389635, "grad_norm": 0.14317701756954193, "learning_rate": 8.045450075450896e-06, "loss": 1.4553, "step": 789 }, { "epoch": 0.3061046876821016, "grad_norm": 0.1455092579126358, "learning_rate": 8.040519717610318e-06, "loss": 1.4047, "step": 790 }, { "epoch": 0.30649216197030676, "grad_norm": 0.138349711894989, "learning_rate": 8.035584664145082e-06, "loss": 1.4208, "step": 791 }, { "epoch": 0.30687963625851195, "grad_norm": 0.16322718560695648, "learning_rate": 8.030644922676638e-06, "loss": 1.4371, "step": 792 }, { "epoch": 0.3072671105467171, "grad_norm": 0.14605118334293365, "learning_rate": 8.02570050083367e-06, "loss": 1.4725, "step": 793 }, { "epoch": 0.3076545848349223, "grad_norm": 0.15201842784881592, "learning_rate": 8.020751406252093e-06, "loss": 1.4441, "step": 794 }, { "epoch": 0.30804205912312754, "grad_norm": 0.15420253574848175, "learning_rate": 8.015797646575039e-06, "loss": 1.4452, "step": 795 }, { "epoch": 0.3084295334113327, "grad_norm": 0.14655013382434845, "learning_rate": 8.010839229452843e-06, "loss": 1.4023, "step": 796 }, { "epoch": 0.3088170076995379, "grad_norm": 0.14309237897396088, "learning_rate": 8.005876162543032e-06, "loss": 1.41, "step": 797 }, { "epoch": 0.3092044819877431, "grad_norm": 0.14741793274879456, "learning_rate": 8.000908453510317e-06, "loss": 1.417, "step": 798 }, { "epoch": 0.30959195627594827, "grad_norm": 0.15891531109809875, "learning_rate": 7.995936110026577e-06, "loss": 1.4813, "step": 799 }, { "epoch": 0.3099794305641535, "grad_norm": 0.15796200931072235, "learning_rate": 7.990959139770844e-06, "loss": 1.4495, "step": 800 }, { "epoch": 0.3103669048523587, "grad_norm": 0.1586228609085083, "learning_rate": 7.985977550429302e-06, "loss": 1.4387, "step": 801 }, { "epoch": 0.31075437914056386, "grad_norm": 0.14945408701896667, "learning_rate": 7.980991349695264e-06, "loss": 1.4633, "step": 802 }, { "epoch": 0.31114185342876904, "grad_norm": 0.15251071751117706, "learning_rate": 7.976000545269167e-06, "loss": 1.4236, "step": 803 }, { "epoch": 0.3115293277169742, "grad_norm": 0.14501605927944183, "learning_rate": 7.971005144858554e-06, "loss": 1.4588, "step": 804 }, { "epoch": 0.31191680200517946, "grad_norm": 0.1415509283542633, "learning_rate": 7.96600515617807e-06, "loss": 1.4233, "step": 805 }, { "epoch": 0.31230427629338464, "grad_norm": 0.14208824932575226, "learning_rate": 7.961000586949445e-06, "loss": 1.4263, "step": 806 }, { "epoch": 0.3126917505815898, "grad_norm": 0.1521013081073761, "learning_rate": 7.95599144490148e-06, "loss": 1.3987, "step": 807 }, { "epoch": 0.313079224869795, "grad_norm": 0.17135606706142426, "learning_rate": 7.950977737770043e-06, "loss": 1.4364, "step": 808 }, { "epoch": 0.3134666991580002, "grad_norm": 0.16234120726585388, "learning_rate": 7.945959473298048e-06, "loss": 1.4415, "step": 809 }, { "epoch": 0.3138541734462054, "grad_norm": 0.1472405195236206, "learning_rate": 7.940936659235447e-06, "loss": 1.4756, "step": 810 }, { "epoch": 0.3142416477344106, "grad_norm": 0.15172179043293, "learning_rate": 7.935909303339225e-06, "loss": 1.4102, "step": 811 }, { "epoch": 0.3146291220226158, "grad_norm": 0.16304846107959747, "learning_rate": 7.930877413373369e-06, "loss": 1.4271, "step": 812 }, { "epoch": 0.31501659631082096, "grad_norm": 0.16580542922019958, "learning_rate": 7.925840997108878e-06, "loss": 1.4487, "step": 813 }, { "epoch": 0.31540407059902614, "grad_norm": 0.15764565765857697, "learning_rate": 7.92080006232374e-06, "loss": 1.4227, "step": 814 }, { "epoch": 0.3157915448872314, "grad_norm": 0.17322389781475067, "learning_rate": 7.915754616802915e-06, "loss": 1.5054, "step": 815 }, { "epoch": 0.31617901917543656, "grad_norm": 0.13678540289402008, "learning_rate": 7.910704668338338e-06, "loss": 1.4941, "step": 816 }, { "epoch": 0.31656649346364174, "grad_norm": 0.14693835377693176, "learning_rate": 7.90565022472889e-06, "loss": 1.4279, "step": 817 }, { "epoch": 0.3169539677518469, "grad_norm": 0.16043202579021454, "learning_rate": 7.900591293780404e-06, "loss": 1.4588, "step": 818 }, { "epoch": 0.3173414420400521, "grad_norm": 0.14665043354034424, "learning_rate": 7.895527883305629e-06, "loss": 1.4781, "step": 819 }, { "epoch": 0.31772891632825734, "grad_norm": 0.18899346888065338, "learning_rate": 7.890460001124242e-06, "loss": 1.4412, "step": 820 }, { "epoch": 0.3181163906164625, "grad_norm": 0.15513849258422852, "learning_rate": 7.885387655062827e-06, "loss": 1.4504, "step": 821 }, { "epoch": 0.3185038649046677, "grad_norm": 0.16910052299499512, "learning_rate": 7.880310852954853e-06, "loss": 1.4098, "step": 822 }, { "epoch": 0.3188913391928729, "grad_norm": 0.16828958690166473, "learning_rate": 7.875229602640682e-06, "loss": 1.4355, "step": 823 }, { "epoch": 0.31927881348107806, "grad_norm": 0.16144761443138123, "learning_rate": 7.870143911967534e-06, "loss": 1.4623, "step": 824 }, { "epoch": 0.3196662877692833, "grad_norm": 0.20135493576526642, "learning_rate": 7.865053788789496e-06, "loss": 1.4433, "step": 825 }, { "epoch": 0.3200537620574885, "grad_norm": 0.14852716028690338, "learning_rate": 7.859959240967494e-06, "loss": 1.4072, "step": 826 }, { "epoch": 0.32044123634569366, "grad_norm": 0.14671224355697632, "learning_rate": 7.854860276369292e-06, "loss": 1.4403, "step": 827 }, { "epoch": 0.32082871063389884, "grad_norm": 0.15859992802143097, "learning_rate": 7.849756902869471e-06, "loss": 1.376, "step": 828 }, { "epoch": 0.321216184922104, "grad_norm": 0.1696745902299881, "learning_rate": 7.844649128349424e-06, "loss": 1.4575, "step": 829 }, { "epoch": 0.32160365921030926, "grad_norm": 0.17247240245342255, "learning_rate": 7.839536960697335e-06, "loss": 1.4806, "step": 830 }, { "epoch": 0.32199113349851444, "grad_norm": 0.16279096901416779, "learning_rate": 7.834420407808184e-06, "loss": 1.4545, "step": 831 }, { "epoch": 0.3223786077867196, "grad_norm": 0.15555816888809204, "learning_rate": 7.82929947758371e-06, "loss": 1.4298, "step": 832 }, { "epoch": 0.3227660820749248, "grad_norm": 0.16524650156497955, "learning_rate": 7.82417417793242e-06, "loss": 1.4883, "step": 833 }, { "epoch": 0.32315355636313, "grad_norm": 0.16012325882911682, "learning_rate": 7.819044516769568e-06, "loss": 1.41, "step": 834 }, { "epoch": 0.3235410306513352, "grad_norm": 0.1543051302433014, "learning_rate": 7.813910502017141e-06, "loss": 1.4923, "step": 835 }, { "epoch": 0.3239285049395404, "grad_norm": 0.1507895141839981, "learning_rate": 7.808772141603855e-06, "loss": 1.4801, "step": 836 }, { "epoch": 0.3243159792277456, "grad_norm": 0.17335234582424164, "learning_rate": 7.80362944346513e-06, "loss": 1.4044, "step": 837 }, { "epoch": 0.32470345351595076, "grad_norm": 0.15458303689956665, "learning_rate": 7.798482415543092e-06, "loss": 1.4722, "step": 838 }, { "epoch": 0.32509092780415594, "grad_norm": 0.16532739996910095, "learning_rate": 7.793331065786547e-06, "loss": 1.4457, "step": 839 }, { "epoch": 0.3254784020923612, "grad_norm": 0.15705567598342896, "learning_rate": 7.78817540215098e-06, "loss": 1.4206, "step": 840 }, { "epoch": 0.32586587638056635, "grad_norm": 0.15737512707710266, "learning_rate": 7.783015432598536e-06, "loss": 1.4145, "step": 841 }, { "epoch": 0.32625335066877154, "grad_norm": 0.16128292679786682, "learning_rate": 7.777851165098012e-06, "loss": 1.4377, "step": 842 }, { "epoch": 0.3266408249569767, "grad_norm": 0.16689883172512054, "learning_rate": 7.77268260762484e-06, "loss": 1.4742, "step": 843 }, { "epoch": 0.3270282992451819, "grad_norm": 0.15718883275985718, "learning_rate": 7.767509768161079e-06, "loss": 1.4508, "step": 844 }, { "epoch": 0.32741577353338713, "grad_norm": 0.15396955609321594, "learning_rate": 7.762332654695402e-06, "loss": 1.452, "step": 845 }, { "epoch": 0.3278032478215923, "grad_norm": 0.1585526019334793, "learning_rate": 7.757151275223077e-06, "loss": 1.4237, "step": 846 }, { "epoch": 0.3281907221097975, "grad_norm": 0.17759980261325836, "learning_rate": 7.751965637745965e-06, "loss": 1.427, "step": 847 }, { "epoch": 0.3285781963980027, "grad_norm": 0.1641695648431778, "learning_rate": 7.746775750272504e-06, "loss": 1.5025, "step": 848 }, { "epoch": 0.32896567068620786, "grad_norm": 0.1522056609392166, "learning_rate": 7.741581620817693e-06, "loss": 1.4491, "step": 849 }, { "epoch": 0.3293531449744131, "grad_norm": 0.15956749022006989, "learning_rate": 7.73638325740308e-06, "loss": 1.3924, "step": 850 }, { "epoch": 0.3297406192626183, "grad_norm": 0.1445731222629547, "learning_rate": 7.731180668056758e-06, "loss": 1.4311, "step": 851 }, { "epoch": 0.33012809355082345, "grad_norm": 0.15257970988750458, "learning_rate": 7.725973860813338e-06, "loss": 1.4147, "step": 852 }, { "epoch": 0.33051556783902863, "grad_norm": 0.1839246153831482, "learning_rate": 7.720762843713954e-06, "loss": 1.4368, "step": 853 }, { "epoch": 0.3309030421272338, "grad_norm": 0.16418370604515076, "learning_rate": 7.715547624806233e-06, "loss": 1.4008, "step": 854 }, { "epoch": 0.33129051641543905, "grad_norm": 0.14454062283039093, "learning_rate": 7.710328212144297e-06, "loss": 1.4627, "step": 855 }, { "epoch": 0.33167799070364423, "grad_norm": 0.16399945318698883, "learning_rate": 7.705104613788743e-06, "loss": 1.4304, "step": 856 }, { "epoch": 0.3320654649918494, "grad_norm": 0.14636695384979248, "learning_rate": 7.69987683780663e-06, "loss": 1.477, "step": 857 }, { "epoch": 0.3324529392800546, "grad_norm": 0.14961768686771393, "learning_rate": 7.694644892271472e-06, "loss": 1.4125, "step": 858 }, { "epoch": 0.3328404135682598, "grad_norm": 0.14095452427864075, "learning_rate": 7.68940878526322e-06, "loss": 1.4627, "step": 859 }, { "epoch": 0.333227887856465, "grad_norm": 0.18338559567928314, "learning_rate": 7.684168524868253e-06, "loss": 1.4869, "step": 860 }, { "epoch": 0.3336153621446702, "grad_norm": 0.1554839164018631, "learning_rate": 7.678924119179361e-06, "loss": 1.4227, "step": 861 }, { "epoch": 0.33400283643287537, "grad_norm": 0.16991862654685974, "learning_rate": 7.673675576295743e-06, "loss": 1.4503, "step": 862 }, { "epoch": 0.33439031072108055, "grad_norm": 0.15350666642189026, "learning_rate": 7.668422904322979e-06, "loss": 1.4595, "step": 863 }, { "epoch": 0.33477778500928573, "grad_norm": 0.1718270182609558, "learning_rate": 7.66316611137303e-06, "loss": 1.4878, "step": 864 }, { "epoch": 0.33516525929749097, "grad_norm": 0.15605993568897247, "learning_rate": 7.657905205564224e-06, "loss": 1.4204, "step": 865 }, { "epoch": 0.33555273358569615, "grad_norm": 0.1629057377576828, "learning_rate": 7.652640195021235e-06, "loss": 1.4415, "step": 866 }, { "epoch": 0.33594020787390133, "grad_norm": 0.15075266361236572, "learning_rate": 7.647371087875079e-06, "loss": 1.4364, "step": 867 }, { "epoch": 0.3363276821621065, "grad_norm": 0.16332507133483887, "learning_rate": 7.642097892263098e-06, "loss": 1.4437, "step": 868 }, { "epoch": 0.3367151564503117, "grad_norm": 0.1527612954378128, "learning_rate": 7.636820616328947e-06, "loss": 1.4795, "step": 869 }, { "epoch": 0.33710263073851693, "grad_norm": 0.1605743020772934, "learning_rate": 7.631539268222588e-06, "loss": 1.4956, "step": 870 }, { "epoch": 0.3374901050267221, "grad_norm": 0.17839299142360687, "learning_rate": 7.626253856100263e-06, "loss": 1.4617, "step": 871 }, { "epoch": 0.3378775793149273, "grad_norm": 0.1663694679737091, "learning_rate": 7.6209643881244964e-06, "loss": 1.4342, "step": 872 }, { "epoch": 0.33826505360313247, "grad_norm": 0.15347841382026672, "learning_rate": 7.615670872464076e-06, "loss": 1.4388, "step": 873 }, { "epoch": 0.3386525278913377, "grad_norm": 0.16536401212215424, "learning_rate": 7.610373317294038e-06, "loss": 1.4561, "step": 874 }, { "epoch": 0.3390400021795429, "grad_norm": 0.18227246403694153, "learning_rate": 7.605071730795657e-06, "loss": 1.4293, "step": 875 }, { "epoch": 0.33942747646774807, "grad_norm": 0.16656138002872467, "learning_rate": 7.599766121156436e-06, "loss": 1.3798, "step": 876 }, { "epoch": 0.33981495075595325, "grad_norm": 0.1530277281999588, "learning_rate": 7.594456496570088e-06, "loss": 1.4698, "step": 877 }, { "epoch": 0.34020242504415843, "grad_norm": 0.1542046070098877, "learning_rate": 7.589142865236534e-06, "loss": 1.388, "step": 878 }, { "epoch": 0.34058989933236367, "grad_norm": 0.14951136708259583, "learning_rate": 7.5838252353618706e-06, "loss": 1.5064, "step": 879 }, { "epoch": 0.34097737362056885, "grad_norm": 0.15820246934890747, "learning_rate": 7.578503615158379e-06, "loss": 1.4446, "step": 880 }, { "epoch": 0.341364847908774, "grad_norm": 0.17398501932621002, "learning_rate": 7.5731780128444995e-06, "loss": 1.426, "step": 881 }, { "epoch": 0.3417523221969792, "grad_norm": 0.15991035103797913, "learning_rate": 7.567848436644824e-06, "loss": 1.4507, "step": 882 }, { "epoch": 0.3421397964851844, "grad_norm": 0.1490257978439331, "learning_rate": 7.562514894790076e-06, "loss": 1.47, "step": 883 }, { "epoch": 0.3425272707733896, "grad_norm": 0.13392885029315948, "learning_rate": 7.5571773955171124e-06, "loss": 1.4659, "step": 884 }, { "epoch": 0.3429147450615948, "grad_norm": 0.16413800418376923, "learning_rate": 7.551835947068893e-06, "loss": 1.4898, "step": 885 }, { "epoch": 0.3433022193498, "grad_norm": 0.14323648810386658, "learning_rate": 7.5464905576944815e-06, "loss": 1.4284, "step": 886 }, { "epoch": 0.34368969363800517, "grad_norm": 0.16930046677589417, "learning_rate": 7.541141235649027e-06, "loss": 1.3869, "step": 887 }, { "epoch": 0.34407716792621035, "grad_norm": 0.14766676723957062, "learning_rate": 7.535787989193752e-06, "loss": 1.4402, "step": 888 }, { "epoch": 0.3444646422144156, "grad_norm": 0.15954647958278656, "learning_rate": 7.530430826595938e-06, "loss": 1.4456, "step": 889 }, { "epoch": 0.34485211650262076, "grad_norm": 0.15348728001117706, "learning_rate": 7.525069756128915e-06, "loss": 1.4799, "step": 890 }, { "epoch": 0.34523959079082595, "grad_norm": 0.15276850759983063, "learning_rate": 7.519704786072051e-06, "loss": 1.4075, "step": 891 }, { "epoch": 0.3456270650790311, "grad_norm": 0.15454278886318207, "learning_rate": 7.5143359247107314e-06, "loss": 1.4484, "step": 892 }, { "epoch": 0.3460145393672363, "grad_norm": 0.15512150526046753, "learning_rate": 7.508963180336355e-06, "loss": 1.4385, "step": 893 }, { "epoch": 0.34640201365544154, "grad_norm": 0.14882658421993256, "learning_rate": 7.5035865612463175e-06, "loss": 1.4477, "step": 894 }, { "epoch": 0.3467894879436467, "grad_norm": 0.1688275784254074, "learning_rate": 7.4982060757439945e-06, "loss": 1.4326, "step": 895 }, { "epoch": 0.3471769622318519, "grad_norm": 0.1520664393901825, "learning_rate": 7.492821732138737e-06, "loss": 1.4882, "step": 896 }, { "epoch": 0.3475644365200571, "grad_norm": 0.1724391132593155, "learning_rate": 7.487433538745853e-06, "loss": 1.4655, "step": 897 }, { "epoch": 0.34795191080826227, "grad_norm": 0.1600770354270935, "learning_rate": 7.4820415038865945e-06, "loss": 1.4022, "step": 898 }, { "epoch": 0.3483393850964675, "grad_norm": 0.1727876216173172, "learning_rate": 7.476645635888146e-06, "loss": 1.4397, "step": 899 }, { "epoch": 0.3487268593846727, "grad_norm": 0.15318836271762848, "learning_rate": 7.471245943083615e-06, "loss": 1.4326, "step": 900 }, { "epoch": 0.34911433367287786, "grad_norm": 0.15994839370250702, "learning_rate": 7.465842433812011e-06, "loss": 1.3928, "step": 901 }, { "epoch": 0.34950180796108304, "grad_norm": 0.14651064574718475, "learning_rate": 7.460435116418243e-06, "loss": 1.4435, "step": 902 }, { "epoch": 0.3498892822492882, "grad_norm": 0.16392751038074493, "learning_rate": 7.455023999253097e-06, "loss": 1.4555, "step": 903 }, { "epoch": 0.35027675653749346, "grad_norm": 0.1640239953994751, "learning_rate": 7.449609090673226e-06, "loss": 1.4416, "step": 904 }, { "epoch": 0.35066423082569864, "grad_norm": 0.17586208879947662, "learning_rate": 7.444190399041145e-06, "loss": 1.3895, "step": 905 }, { "epoch": 0.3510517051139038, "grad_norm": 0.15483367443084717, "learning_rate": 7.4387679327252025e-06, "loss": 1.4101, "step": 906 }, { "epoch": 0.351439179402109, "grad_norm": 0.15885113179683685, "learning_rate": 7.433341700099584e-06, "loss": 1.4513, "step": 907 }, { "epoch": 0.3518266536903142, "grad_norm": 0.15789271891117096, "learning_rate": 7.427911709544288e-06, "loss": 1.4349, "step": 908 }, { "epoch": 0.3522141279785194, "grad_norm": 0.17059139907360077, "learning_rate": 7.422477969445115e-06, "loss": 1.4308, "step": 909 }, { "epoch": 0.3526016022667246, "grad_norm": 0.17578917741775513, "learning_rate": 7.417040488193658e-06, "loss": 1.4327, "step": 910 }, { "epoch": 0.3529890765549298, "grad_norm": 0.17245271801948547, "learning_rate": 7.411599274187289e-06, "loss": 1.4327, "step": 911 }, { "epoch": 0.35337655084313496, "grad_norm": 0.17725500464439392, "learning_rate": 7.406154335829142e-06, "loss": 1.4483, "step": 912 }, { "epoch": 0.35376402513134014, "grad_norm": 0.15762491524219513, "learning_rate": 7.400705681528106e-06, "loss": 1.435, "step": 913 }, { "epoch": 0.3541514994195454, "grad_norm": 0.18917882442474365, "learning_rate": 7.395253319698806e-06, "loss": 1.416, "step": 914 }, { "epoch": 0.35453897370775056, "grad_norm": 0.1628836989402771, "learning_rate": 7.389797258761593e-06, "loss": 1.4691, "step": 915 }, { "epoch": 0.35492644799595574, "grad_norm": 0.14232923090457916, "learning_rate": 7.3843375071425315e-06, "loss": 1.4604, "step": 916 }, { "epoch": 0.3553139222841609, "grad_norm": 0.16134363412857056, "learning_rate": 7.378874073273386e-06, "loss": 1.4682, "step": 917 }, { "epoch": 0.3557013965723661, "grad_norm": 0.17269288003444672, "learning_rate": 7.373406965591604e-06, "loss": 1.4517, "step": 918 }, { "epoch": 0.35608887086057134, "grad_norm": 0.14228790998458862, "learning_rate": 7.367936192540314e-06, "loss": 1.4392, "step": 919 }, { "epoch": 0.3564763451487765, "grad_norm": 0.14950601756572723, "learning_rate": 7.362461762568298e-06, "loss": 1.4472, "step": 920 }, { "epoch": 0.3568638194369817, "grad_norm": 0.16685424745082855, "learning_rate": 7.3569836841299905e-06, "loss": 1.4445, "step": 921 }, { "epoch": 0.3572512937251869, "grad_norm": 0.17290189862251282, "learning_rate": 7.351501965685455e-06, "loss": 1.4388, "step": 922 }, { "epoch": 0.35763876801339206, "grad_norm": 0.16835904121398926, "learning_rate": 7.346016615700382e-06, "loss": 1.4189, "step": 923 }, { "epoch": 0.3580262423015973, "grad_norm": 0.16141045093536377, "learning_rate": 7.340527642646069e-06, "loss": 1.4009, "step": 924 }, { "epoch": 0.3584137165898025, "grad_norm": 0.16227784752845764, "learning_rate": 7.335035054999408e-06, "loss": 1.5106, "step": 925 }, { "epoch": 0.35880119087800766, "grad_norm": 0.15463976562023163, "learning_rate": 7.329538861242871e-06, "loss": 1.3876, "step": 926 }, { "epoch": 0.35918866516621284, "grad_norm": 0.17115172743797302, "learning_rate": 7.324039069864503e-06, "loss": 1.4277, "step": 927 }, { "epoch": 0.359576139454418, "grad_norm": 0.16652178764343262, "learning_rate": 7.318535689357903e-06, "loss": 1.4473, "step": 928 }, { "epoch": 0.35996361374262326, "grad_norm": 0.1604868471622467, "learning_rate": 7.3130287282222125e-06, "loss": 1.4647, "step": 929 }, { "epoch": 0.36035108803082844, "grad_norm": 0.18412534892559052, "learning_rate": 7.3075181949621045e-06, "loss": 1.4551, "step": 930 }, { "epoch": 0.3607385623190336, "grad_norm": 0.15662308037281036, "learning_rate": 7.302004098087766e-06, "loss": 1.3987, "step": 931 }, { "epoch": 0.3611260366072388, "grad_norm": 0.16243167221546173, "learning_rate": 7.2964864461148895e-06, "loss": 1.4802, "step": 932 }, { "epoch": 0.361513510895444, "grad_norm": 0.1471649706363678, "learning_rate": 7.290965247564658e-06, "loss": 1.4448, "step": 933 }, { "epoch": 0.3619009851836492, "grad_norm": 0.17024336755275726, "learning_rate": 7.285440510963731e-06, "loss": 1.456, "step": 934 }, { "epoch": 0.3622884594718544, "grad_norm": 0.1894913911819458, "learning_rate": 7.2799122448442304e-06, "loss": 1.4579, "step": 935 }, { "epoch": 0.3626759337600596, "grad_norm": 0.17906716465950012, "learning_rate": 7.274380457743731e-06, "loss": 1.4796, "step": 936 }, { "epoch": 0.36306340804826476, "grad_norm": 0.16443364322185516, "learning_rate": 7.268845158205246e-06, "loss": 1.4627, "step": 937 }, { "epoch": 0.36345088233646994, "grad_norm": 0.1734602004289627, "learning_rate": 7.26330635477721e-06, "loss": 1.4487, "step": 938 }, { "epoch": 0.3638383566246752, "grad_norm": 0.1699114441871643, "learning_rate": 7.25776405601347e-06, "loss": 1.4497, "step": 939 }, { "epoch": 0.36422583091288036, "grad_norm": 0.168977290391922, "learning_rate": 7.252218270473274e-06, "loss": 1.4063, "step": 940 }, { "epoch": 0.36461330520108554, "grad_norm": 0.15939922630786896, "learning_rate": 7.246669006721249e-06, "loss": 1.4552, "step": 941 }, { "epoch": 0.3650007794892907, "grad_norm": 0.160956472158432, "learning_rate": 7.2411162733273995e-06, "loss": 1.4168, "step": 942 }, { "epoch": 0.3653882537774959, "grad_norm": 0.1612844616174698, "learning_rate": 7.235560078867086e-06, "loss": 1.4484, "step": 943 }, { "epoch": 0.36577572806570113, "grad_norm": 0.16947251558303833, "learning_rate": 7.230000431921012e-06, "loss": 1.4171, "step": 944 }, { "epoch": 0.3661632023539063, "grad_norm": 0.1917184591293335, "learning_rate": 7.2244373410752145e-06, "loss": 1.4427, "step": 945 }, { "epoch": 0.3665506766421115, "grad_norm": 0.15174360573291779, "learning_rate": 7.218870814921052e-06, "loss": 1.3863, "step": 946 }, { "epoch": 0.3669381509303167, "grad_norm": 0.18260155618190765, "learning_rate": 7.213300862055182e-06, "loss": 1.4541, "step": 947 }, { "epoch": 0.36732562521852186, "grad_norm": 0.17407847940921783, "learning_rate": 7.2077274910795605e-06, "loss": 1.435, "step": 948 }, { "epoch": 0.3677130995067271, "grad_norm": 0.15096981823444366, "learning_rate": 7.202150710601417e-06, "loss": 1.4229, "step": 949 }, { "epoch": 0.3681005737949323, "grad_norm": 0.14988911151885986, "learning_rate": 7.196570529233251e-06, "loss": 1.4207, "step": 950 }, { "epoch": 0.36848804808313745, "grad_norm": 0.1553945690393448, "learning_rate": 7.19098695559281e-06, "loss": 1.4488, "step": 951 }, { "epoch": 0.36887552237134263, "grad_norm": 0.14816579222679138, "learning_rate": 7.1853999983030845e-06, "loss": 1.433, "step": 952 }, { "epoch": 0.3692629966595478, "grad_norm": 0.16012710332870483, "learning_rate": 7.179809665992286e-06, "loss": 1.4276, "step": 953 }, { "epoch": 0.36965047094775305, "grad_norm": 0.17249585688114166, "learning_rate": 7.174215967293842e-06, "loss": 1.4252, "step": 954 }, { "epoch": 0.37003794523595823, "grad_norm": 0.16195714473724365, "learning_rate": 7.168618910846378e-06, "loss": 1.4502, "step": 955 }, { "epoch": 0.3704254195241634, "grad_norm": 0.15032973885536194, "learning_rate": 7.163018505293703e-06, "loss": 1.3817, "step": 956 }, { "epoch": 0.3708128938123686, "grad_norm": 0.17819739878177643, "learning_rate": 7.1574147592848e-06, "loss": 1.4587, "step": 957 }, { "epoch": 0.3712003681005738, "grad_norm": 0.1799643337726593, "learning_rate": 7.151807681473812e-06, "loss": 1.4539, "step": 958 }, { "epoch": 0.371587842388779, "grad_norm": 0.14966720342636108, "learning_rate": 7.146197280520024e-06, "loss": 1.4471, "step": 959 }, { "epoch": 0.3719753166769842, "grad_norm": 0.17094524204730988, "learning_rate": 7.140583565087856e-06, "loss": 1.4397, "step": 960 }, { "epoch": 0.37236279096518937, "grad_norm": 0.1586240828037262, "learning_rate": 7.134966543846847e-06, "loss": 1.3909, "step": 961 }, { "epoch": 0.37275026525339455, "grad_norm": 0.15047770738601685, "learning_rate": 7.129346225471638e-06, "loss": 1.4978, "step": 962 }, { "epoch": 0.37313773954159973, "grad_norm": 0.14690403640270233, "learning_rate": 7.123722618641965e-06, "loss": 1.4483, "step": 963 }, { "epoch": 0.37352521382980497, "grad_norm": 0.15526287257671356, "learning_rate": 7.118095732042643e-06, "loss": 1.4234, "step": 964 }, { "epoch": 0.37391268811801015, "grad_norm": 0.16999071836471558, "learning_rate": 7.1124655743635465e-06, "loss": 1.419, "step": 965 }, { "epoch": 0.37430016240621533, "grad_norm": 0.14455309510231018, "learning_rate": 7.10683215429961e-06, "loss": 1.4416, "step": 966 }, { "epoch": 0.3746876366944205, "grad_norm": 0.17007213830947876, "learning_rate": 7.1011954805508e-06, "loss": 1.4661, "step": 967 }, { "epoch": 0.3750751109826257, "grad_norm": 0.1723259538412094, "learning_rate": 7.09555556182211e-06, "loss": 1.4113, "step": 968 }, { "epoch": 0.37546258527083093, "grad_norm": 0.1349567025899887, "learning_rate": 7.089912406823547e-06, "loss": 1.4668, "step": 969 }, { "epoch": 0.3758500595590361, "grad_norm": 0.15725009143352509, "learning_rate": 7.0842660242701125e-06, "loss": 1.4228, "step": 970 }, { "epoch": 0.3762375338472413, "grad_norm": 0.15955470502376556, "learning_rate": 7.0786164228817926e-06, "loss": 1.4183, "step": 971 }, { "epoch": 0.37662500813544647, "grad_norm": 0.15735258162021637, "learning_rate": 7.072963611383545e-06, "loss": 1.4608, "step": 972 }, { "epoch": 0.37701248242365165, "grad_norm": 0.15757252275943756, "learning_rate": 7.067307598505289e-06, "loss": 1.4002, "step": 973 }, { "epoch": 0.3773999567118569, "grad_norm": 0.1753820925951004, "learning_rate": 7.06164839298188e-06, "loss": 1.3881, "step": 974 }, { "epoch": 0.37778743100006207, "grad_norm": 0.16401535272598267, "learning_rate": 7.0559860035531125e-06, "loss": 1.4508, "step": 975 }, { "epoch": 0.37817490528826725, "grad_norm": 0.16342082619667053, "learning_rate": 7.050320438963691e-06, "loss": 1.4881, "step": 976 }, { "epoch": 0.37856237957647243, "grad_norm": 0.16982649266719818, "learning_rate": 7.04465170796323e-06, "loss": 1.5007, "step": 977 }, { "epoch": 0.3789498538646776, "grad_norm": 0.15949921309947968, "learning_rate": 7.038979819306226e-06, "loss": 1.4776, "step": 978 }, { "epoch": 0.37933732815288285, "grad_norm": 0.159776970744133, "learning_rate": 7.03330478175206e-06, "loss": 1.4461, "step": 979 }, { "epoch": 0.379724802441088, "grad_norm": 0.16660724580287933, "learning_rate": 7.02762660406497e-06, "loss": 1.4346, "step": 980 }, { "epoch": 0.3801122767292932, "grad_norm": 0.17782586812973022, "learning_rate": 7.021945295014047e-06, "loss": 1.4224, "step": 981 }, { "epoch": 0.3804997510174984, "grad_norm": 0.16744264960289001, "learning_rate": 7.016260863373219e-06, "loss": 1.4256, "step": 982 }, { "epoch": 0.38088722530570357, "grad_norm": 0.17790211737155914, "learning_rate": 7.010573317921232e-06, "loss": 1.4479, "step": 983 }, { "epoch": 0.3812746995939088, "grad_norm": 0.16855962574481964, "learning_rate": 7.004882667441643e-06, "loss": 1.3674, "step": 984 }, { "epoch": 0.381662173882114, "grad_norm": 0.1667313128709793, "learning_rate": 6.999188920722807e-06, "loss": 1.4049, "step": 985 }, { "epoch": 0.38204964817031917, "grad_norm": 0.15845279395580292, "learning_rate": 6.9934920865578545e-06, "loss": 1.455, "step": 986 }, { "epoch": 0.38243712245852435, "grad_norm": 0.1656493842601776, "learning_rate": 6.987792173744692e-06, "loss": 1.4087, "step": 987 }, { "epoch": 0.38282459674672953, "grad_norm": 0.1601235717535019, "learning_rate": 6.982089191085971e-06, "loss": 1.4459, "step": 988 }, { "epoch": 0.38321207103493476, "grad_norm": 0.147563174366951, "learning_rate": 6.976383147389092e-06, "loss": 1.4278, "step": 989 }, { "epoch": 0.38359954532313995, "grad_norm": 0.17684006690979004, "learning_rate": 6.97067405146618e-06, "loss": 1.4163, "step": 990 }, { "epoch": 0.3839870196113451, "grad_norm": 0.16054895520210266, "learning_rate": 6.964961912134073e-06, "loss": 1.4542, "step": 991 }, { "epoch": 0.3843744938995503, "grad_norm": 0.18154625594615936, "learning_rate": 6.959246738214309e-06, "loss": 1.3944, "step": 992 }, { "epoch": 0.3847619681877555, "grad_norm": 0.1616104394197464, "learning_rate": 6.9535285385331145e-06, "loss": 1.4643, "step": 993 }, { "epoch": 0.3851494424759607, "grad_norm": 0.1622704416513443, "learning_rate": 6.947807321921387e-06, "loss": 1.4602, "step": 994 }, { "epoch": 0.3855369167641659, "grad_norm": 0.15224257111549377, "learning_rate": 6.942083097214682e-06, "loss": 1.4505, "step": 995 }, { "epoch": 0.3859243910523711, "grad_norm": 0.1545490026473999, "learning_rate": 6.936355873253207e-06, "loss": 1.4204, "step": 996 }, { "epoch": 0.38631186534057627, "grad_norm": 0.17009851336479187, "learning_rate": 6.930625658881791e-06, "loss": 1.3571, "step": 997 }, { "epoch": 0.38669933962878145, "grad_norm": 0.15686728060245514, "learning_rate": 6.924892462949889e-06, "loss": 1.3845, "step": 998 }, { "epoch": 0.3870868139169867, "grad_norm": 0.16369551420211792, "learning_rate": 6.91915629431156e-06, "loss": 1.4704, "step": 999 }, { "epoch": 0.38747428820519186, "grad_norm": 0.16760091483592987, "learning_rate": 6.913417161825449e-06, "loss": 1.4682, "step": 1000 }, { "epoch": 0.38786176249339704, "grad_norm": 0.15764391422271729, "learning_rate": 6.907675074354785e-06, "loss": 1.4556, "step": 1001 }, { "epoch": 0.3882492367816022, "grad_norm": 0.16626544296741486, "learning_rate": 6.901930040767355e-06, "loss": 1.4141, "step": 1002 }, { "epoch": 0.3886367110698074, "grad_norm": 0.18187381327152252, "learning_rate": 6.8961820699355e-06, "loss": 1.4298, "step": 1003 }, { "epoch": 0.38902418535801264, "grad_norm": 0.1614377200603485, "learning_rate": 6.8904311707360914e-06, "loss": 1.3976, "step": 1004 }, { "epoch": 0.3894116596462178, "grad_norm": 0.18347837030887604, "learning_rate": 6.884677352050531e-06, "loss": 1.4254, "step": 1005 }, { "epoch": 0.389799133934423, "grad_norm": 0.17910531163215637, "learning_rate": 6.878920622764721e-06, "loss": 1.3851, "step": 1006 }, { "epoch": 0.3901866082226282, "grad_norm": 0.17418889701366425, "learning_rate": 6.873160991769065e-06, "loss": 1.4475, "step": 1007 }, { "epoch": 0.39057408251083336, "grad_norm": 0.16390106081962585, "learning_rate": 6.867398467958444e-06, "loss": 1.4093, "step": 1008 }, { "epoch": 0.3909615567990386, "grad_norm": 0.1632574051618576, "learning_rate": 6.861633060232209e-06, "loss": 1.431, "step": 1009 }, { "epoch": 0.3913490310872438, "grad_norm": 0.16226711869239807, "learning_rate": 6.855864777494162e-06, "loss": 1.4324, "step": 1010 }, { "epoch": 0.39173650537544896, "grad_norm": 0.16087520122528076, "learning_rate": 6.85009362865255e-06, "loss": 1.4463, "step": 1011 }, { "epoch": 0.39212397966365414, "grad_norm": 0.1625560224056244, "learning_rate": 6.844319622620039e-06, "loss": 1.4246, "step": 1012 }, { "epoch": 0.3925114539518593, "grad_norm": 0.16229349374771118, "learning_rate": 6.838542768313715e-06, "loss": 1.4238, "step": 1013 }, { "epoch": 0.39289892824006456, "grad_norm": 0.14039869606494904, "learning_rate": 6.832763074655058e-06, "loss": 1.4137, "step": 1014 }, { "epoch": 0.39328640252826974, "grad_norm": 0.16233877837657928, "learning_rate": 6.826980550569933e-06, "loss": 1.4216, "step": 1015 }, { "epoch": 0.3936738768164749, "grad_norm": 0.1470850259065628, "learning_rate": 6.821195204988578e-06, "loss": 1.4108, "step": 1016 }, { "epoch": 0.3940613511046801, "grad_norm": 0.14053499698638916, "learning_rate": 6.815407046845588e-06, "loss": 1.4362, "step": 1017 }, { "epoch": 0.3944488253928853, "grad_norm": 0.15767896175384521, "learning_rate": 6.809616085079901e-06, "loss": 1.4675, "step": 1018 }, { "epoch": 0.3948362996810905, "grad_norm": 0.17567338049411774, "learning_rate": 6.803822328634785e-06, "loss": 1.4449, "step": 1019 }, { "epoch": 0.3952237739692957, "grad_norm": 0.14876675605773926, "learning_rate": 6.798025786457825e-06, "loss": 1.4285, "step": 1020 }, { "epoch": 0.3956112482575009, "grad_norm": 0.1644352227449417, "learning_rate": 6.792226467500905e-06, "loss": 1.448, "step": 1021 }, { "epoch": 0.39599872254570606, "grad_norm": 0.15590433776378632, "learning_rate": 6.7864243807202015e-06, "loss": 1.3931, "step": 1022 }, { "epoch": 0.39638619683391124, "grad_norm": 0.1698845624923706, "learning_rate": 6.780619535076163e-06, "loss": 1.4629, "step": 1023 }, { "epoch": 0.3967736711221165, "grad_norm": 0.15523579716682434, "learning_rate": 6.774811939533498e-06, "loss": 1.4853, "step": 1024 }, { "epoch": 0.39716114541032166, "grad_norm": 0.1804419308900833, "learning_rate": 6.769001603061164e-06, "loss": 1.4139, "step": 1025 }, { "epoch": 0.39754861969852684, "grad_norm": 0.14946512877941132, "learning_rate": 6.76318853463235e-06, "loss": 1.447, "step": 1026 }, { "epoch": 0.397936093986732, "grad_norm": 0.16372789442539215, "learning_rate": 6.757372743224464e-06, "loss": 1.4171, "step": 1027 }, { "epoch": 0.3983235682749372, "grad_norm": 0.15804488956928253, "learning_rate": 6.751554237819122e-06, "loss": 1.4179, "step": 1028 }, { "epoch": 0.39871104256314244, "grad_norm": 0.17804661393165588, "learning_rate": 6.745733027402126e-06, "loss": 1.4142, "step": 1029 }, { "epoch": 0.3990985168513476, "grad_norm": 0.15130841732025146, "learning_rate": 6.739909120963461e-06, "loss": 1.4515, "step": 1030 }, { "epoch": 0.3994859911395528, "grad_norm": 0.17838387191295624, "learning_rate": 6.734082527497271e-06, "loss": 1.4166, "step": 1031 }, { "epoch": 0.399873465427758, "grad_norm": 0.1704101711511612, "learning_rate": 6.728253256001854e-06, "loss": 1.4036, "step": 1032 }, { "epoch": 0.40026093971596316, "grad_norm": 0.18352718651294708, "learning_rate": 6.722421315479639e-06, "loss": 1.494, "step": 1033 }, { "epoch": 0.4006484140041684, "grad_norm": 0.1683838963508606, "learning_rate": 6.716586714937184e-06, "loss": 1.47, "step": 1034 }, { "epoch": 0.4010358882923736, "grad_norm": 0.1464998722076416, "learning_rate": 6.7107494633851445e-06, "loss": 1.4825, "step": 1035 }, { "epoch": 0.40142336258057876, "grad_norm": 0.17667463421821594, "learning_rate": 6.704909569838281e-06, "loss": 1.4505, "step": 1036 }, { "epoch": 0.40181083686878394, "grad_norm": 0.14623914659023285, "learning_rate": 6.699067043315427e-06, "loss": 1.4149, "step": 1037 }, { "epoch": 0.4021983111569891, "grad_norm": 0.14629803597927094, "learning_rate": 6.693221892839486e-06, "loss": 1.4528, "step": 1038 }, { "epoch": 0.40258578544519436, "grad_norm": 0.154071643948555, "learning_rate": 6.687374127437409e-06, "loss": 1.3992, "step": 1039 }, { "epoch": 0.40297325973339954, "grad_norm": 0.15823394060134888, "learning_rate": 6.6815237561401955e-06, "loss": 1.4378, "step": 1040 }, { "epoch": 0.4033607340216047, "grad_norm": 0.1675831377506256, "learning_rate": 6.675670787982855e-06, "loss": 1.4037, "step": 1041 }, { "epoch": 0.4037482083098099, "grad_norm": 0.15216083824634552, "learning_rate": 6.669815232004419e-06, "loss": 1.4285, "step": 1042 }, { "epoch": 0.4041356825980151, "grad_norm": 0.1563536524772644, "learning_rate": 6.663957097247913e-06, "loss": 1.3979, "step": 1043 }, { "epoch": 0.4045231568862203, "grad_norm": 0.16344080865383148, "learning_rate": 6.65809639276034e-06, "loss": 1.4218, "step": 1044 }, { "epoch": 0.4049106311744255, "grad_norm": 0.1691950112581253, "learning_rate": 6.652233127592677e-06, "loss": 1.4681, "step": 1045 }, { "epoch": 0.4052981054626307, "grad_norm": 0.16229641437530518, "learning_rate": 6.646367310799854e-06, "loss": 1.4666, "step": 1046 }, { "epoch": 0.40568557975083586, "grad_norm": 0.16147230565547943, "learning_rate": 6.64049895144074e-06, "loss": 1.4591, "step": 1047 }, { "epoch": 0.40607305403904104, "grad_norm": 0.16081230342388153, "learning_rate": 6.634628058578135e-06, "loss": 1.42, "step": 1048 }, { "epoch": 0.4064605283272463, "grad_norm": 0.18392443656921387, "learning_rate": 6.628754641278744e-06, "loss": 1.4454, "step": 1049 }, { "epoch": 0.40684800261545145, "grad_norm": 0.16093522310256958, "learning_rate": 6.622878708613178e-06, "loss": 1.46, "step": 1050 }, { "epoch": 0.40723547690365663, "grad_norm": 0.1506911963224411, "learning_rate": 6.617000269655931e-06, "loss": 1.4424, "step": 1051 }, { "epoch": 0.4076229511918618, "grad_norm": 0.17088399827480316, "learning_rate": 6.611119333485364e-06, "loss": 1.3859, "step": 1052 }, { "epoch": 0.408010425480067, "grad_norm": 0.16328437626361847, "learning_rate": 6.605235909183698e-06, "loss": 1.3955, "step": 1053 }, { "epoch": 0.40839789976827223, "grad_norm": 0.16427358984947205, "learning_rate": 6.599350005836995e-06, "loss": 1.4349, "step": 1054 }, { "epoch": 0.4087853740564774, "grad_norm": 0.1548972874879837, "learning_rate": 6.593461632535147e-06, "loss": 1.3909, "step": 1055 }, { "epoch": 0.4091728483446826, "grad_norm": 0.16179370880126953, "learning_rate": 6.58757079837186e-06, "loss": 1.4157, "step": 1056 }, { "epoch": 0.4095603226328878, "grad_norm": 0.15379148721694946, "learning_rate": 6.581677512444638e-06, "loss": 1.4077, "step": 1057 }, { "epoch": 0.40994779692109296, "grad_norm": 0.14562982320785522, "learning_rate": 6.5757817838547734e-06, "loss": 1.4456, "step": 1058 }, { "epoch": 0.4103352712092982, "grad_norm": 0.18258197605609894, "learning_rate": 6.569883621707331e-06, "loss": 1.4488, "step": 1059 }, { "epoch": 0.41072274549750337, "grad_norm": 0.16730335354804993, "learning_rate": 6.563983035111136e-06, "loss": 1.4719, "step": 1060 }, { "epoch": 0.41111021978570855, "grad_norm": 0.16625116765499115, "learning_rate": 6.5580800331787534e-06, "loss": 1.4573, "step": 1061 }, { "epoch": 0.41149769407391373, "grad_norm": 0.17769338190555573, "learning_rate": 6.55217462502648e-06, "loss": 1.4009, "step": 1062 }, { "epoch": 0.4118851683621189, "grad_norm": 0.1571877896785736, "learning_rate": 6.546266819774331e-06, "loss": 1.4367, "step": 1063 }, { "epoch": 0.41227264265032415, "grad_norm": 0.16129718720912933, "learning_rate": 6.5403566265460196e-06, "loss": 1.5198, "step": 1064 }, { "epoch": 0.41266011693852933, "grad_norm": 0.18157969415187836, "learning_rate": 6.53444405446895e-06, "loss": 1.4118, "step": 1065 }, { "epoch": 0.4130475912267345, "grad_norm": 0.16661176085472107, "learning_rate": 6.5285291126742e-06, "loss": 1.4589, "step": 1066 }, { "epoch": 0.4134350655149397, "grad_norm": 0.16733871400356293, "learning_rate": 6.522611810296503e-06, "loss": 1.4288, "step": 1067 }, { "epoch": 0.4138225398031449, "grad_norm": 0.16039817035198212, "learning_rate": 6.516692156474243e-06, "loss": 1.4144, "step": 1068 }, { "epoch": 0.4142100140913501, "grad_norm": 0.17110982537269592, "learning_rate": 6.5107701603494356e-06, "loss": 1.4235, "step": 1069 }, { "epoch": 0.4145974883795553, "grad_norm": 0.16129432618618011, "learning_rate": 6.5048458310677075e-06, "loss": 1.436, "step": 1070 }, { "epoch": 0.41498496266776047, "grad_norm": 0.16394788026809692, "learning_rate": 6.4989191777782955e-06, "loss": 1.3975, "step": 1071 }, { "epoch": 0.41537243695596565, "grad_norm": 0.1724306344985962, "learning_rate": 6.492990209634023e-06, "loss": 1.4417, "step": 1072 }, { "epoch": 0.41575991124417083, "grad_norm": 0.15363602340221405, "learning_rate": 6.487058935791286e-06, "loss": 1.3873, "step": 1073 }, { "epoch": 0.41614738553237607, "grad_norm": 0.1798592209815979, "learning_rate": 6.481125365410046e-06, "loss": 1.4218, "step": 1074 }, { "epoch": 0.41653485982058125, "grad_norm": 0.1702873557806015, "learning_rate": 6.4751895076538065e-06, "loss": 1.4447, "step": 1075 }, { "epoch": 0.41692233410878643, "grad_norm": 0.15276753902435303, "learning_rate": 6.469251371689606e-06, "loss": 1.4325, "step": 1076 }, { "epoch": 0.4173098083969916, "grad_norm": 0.16717717051506042, "learning_rate": 6.4633109666880014e-06, "loss": 1.4179, "step": 1077 }, { "epoch": 0.4176972826851968, "grad_norm": 0.17639610171318054, "learning_rate": 6.457368301823054e-06, "loss": 1.4066, "step": 1078 }, { "epoch": 0.418084756973402, "grad_norm": 0.15627701580524445, "learning_rate": 6.451423386272312e-06, "loss": 1.3776, "step": 1079 }, { "epoch": 0.4184722312616072, "grad_norm": 0.15705382823944092, "learning_rate": 6.445476229216805e-06, "loss": 1.3681, "step": 1080 }, { "epoch": 0.4188597055498124, "grad_norm": 0.16354140639305115, "learning_rate": 6.439526839841019e-06, "loss": 1.4339, "step": 1081 }, { "epoch": 0.41924717983801757, "grad_norm": 0.16098952293395996, "learning_rate": 6.433575227332891e-06, "loss": 1.4298, "step": 1082 }, { "epoch": 0.41963465412622275, "grad_norm": 0.17020434141159058, "learning_rate": 6.427621400883789e-06, "loss": 1.4323, "step": 1083 }, { "epoch": 0.420022128414428, "grad_norm": 0.19614115357398987, "learning_rate": 6.421665369688501e-06, "loss": 1.4344, "step": 1084 }, { "epoch": 0.42040960270263317, "grad_norm": 0.18635059893131256, "learning_rate": 6.415707142945221e-06, "loss": 1.5216, "step": 1085 }, { "epoch": 0.42079707699083835, "grad_norm": 0.16742843389511108, "learning_rate": 6.409746729855531e-06, "loss": 1.4144, "step": 1086 }, { "epoch": 0.42118455127904353, "grad_norm": 0.18507635593414307, "learning_rate": 6.403784139624392e-06, "loss": 1.3993, "step": 1087 }, { "epoch": 0.4215720255672487, "grad_norm": 0.15949174761772156, "learning_rate": 6.397819381460127e-06, "loss": 1.4383, "step": 1088 }, { "epoch": 0.42195949985545395, "grad_norm": 0.18002521991729736, "learning_rate": 6.391852464574404e-06, "loss": 1.4946, "step": 1089 }, { "epoch": 0.4223469741436591, "grad_norm": 0.19626915454864502, "learning_rate": 6.385883398182231e-06, "loss": 1.3898, "step": 1090 }, { "epoch": 0.4227344484318643, "grad_norm": 0.17218096554279327, "learning_rate": 6.379912191501928e-06, "loss": 1.4045, "step": 1091 }, { "epoch": 0.4231219227200695, "grad_norm": 0.1733282208442688, "learning_rate": 6.373938853755126e-06, "loss": 1.3895, "step": 1092 }, { "epoch": 0.42350939700827467, "grad_norm": 0.18145468831062317, "learning_rate": 6.367963394166748e-06, "loss": 1.4193, "step": 1093 }, { "epoch": 0.4238968712964799, "grad_norm": 0.18641622364521027, "learning_rate": 6.361985821964985e-06, "loss": 1.4115, "step": 1094 }, { "epoch": 0.4242843455846851, "grad_norm": 0.16818645596504211, "learning_rate": 6.356006146381302e-06, "loss": 1.4015, "step": 1095 }, { "epoch": 0.42467181987289027, "grad_norm": 0.15998797118663788, "learning_rate": 6.350024376650403e-06, "loss": 1.3976, "step": 1096 }, { "epoch": 0.42505929416109545, "grad_norm": 0.17177380621433258, "learning_rate": 6.344040522010233e-06, "loss": 1.4506, "step": 1097 }, { "epoch": 0.4254467684493006, "grad_norm": 0.18556781113147736, "learning_rate": 6.3380545917019545e-06, "loss": 1.4152, "step": 1098 }, { "epoch": 0.42583424273750586, "grad_norm": 0.16724736988544464, "learning_rate": 6.332066594969934e-06, "loss": 1.4133, "step": 1099 }, { "epoch": 0.42622171702571104, "grad_norm": 0.18675531446933746, "learning_rate": 6.326076541061729e-06, "loss": 1.3778, "step": 1100 }, { "epoch": 0.4266091913139162, "grad_norm": 0.15763436257839203, "learning_rate": 6.320084439228078e-06, "loss": 1.4708, "step": 1101 }, { "epoch": 0.4269966656021214, "grad_norm": 0.16645574569702148, "learning_rate": 6.314090298722877e-06, "loss": 1.4198, "step": 1102 }, { "epoch": 0.4273841398903266, "grad_norm": 0.16112612187862396, "learning_rate": 6.308094128803175e-06, "loss": 1.3557, "step": 1103 }, { "epoch": 0.4277716141785318, "grad_norm": 0.15866170823574066, "learning_rate": 6.3020959387291536e-06, "loss": 1.4572, "step": 1104 }, { "epoch": 0.428159088466737, "grad_norm": 0.17627714574337006, "learning_rate": 6.296095737764111e-06, "loss": 1.3775, "step": 1105 }, { "epoch": 0.4285465627549422, "grad_norm": 0.18380136787891388, "learning_rate": 6.290093535174457e-06, "loss": 1.4624, "step": 1106 }, { "epoch": 0.42893403704314736, "grad_norm": 0.15969452261924744, "learning_rate": 6.284089340229685e-06, "loss": 1.4452, "step": 1107 }, { "epoch": 0.42932151133135255, "grad_norm": 0.17875613272190094, "learning_rate": 6.278083162202374e-06, "loss": 1.4231, "step": 1108 }, { "epoch": 0.4297089856195578, "grad_norm": 0.17075638473033905, "learning_rate": 6.27207501036816e-06, "loss": 1.4037, "step": 1109 }, { "epoch": 0.43009645990776296, "grad_norm": 0.16351713240146637, "learning_rate": 6.266064894005726e-06, "loss": 1.4152, "step": 1110 }, { "epoch": 0.43048393419596814, "grad_norm": 0.18713587522506714, "learning_rate": 6.260052822396794e-06, "loss": 1.4104, "step": 1111 }, { "epoch": 0.4308714084841733, "grad_norm": 0.1878727376461029, "learning_rate": 6.254038804826101e-06, "loss": 1.3991, "step": 1112 }, { "epoch": 0.4312588827723785, "grad_norm": 0.18700483441352844, "learning_rate": 6.248022850581392e-06, "loss": 1.4101, "step": 1113 }, { "epoch": 0.43164635706058374, "grad_norm": 0.16843660175800323, "learning_rate": 6.242004968953403e-06, "loss": 1.4493, "step": 1114 }, { "epoch": 0.4320338313487889, "grad_norm": 0.1765844076871872, "learning_rate": 6.235985169235845e-06, "loss": 1.4208, "step": 1115 }, { "epoch": 0.4324213056369941, "grad_norm": 0.15852834284305573, "learning_rate": 6.22996346072539e-06, "loss": 1.4176, "step": 1116 }, { "epoch": 0.4328087799251993, "grad_norm": 0.18474209308624268, "learning_rate": 6.2239398527216636e-06, "loss": 1.4017, "step": 1117 }, { "epoch": 0.4331962542134045, "grad_norm": 0.17207136750221252, "learning_rate": 6.217914354527216e-06, "loss": 1.3957, "step": 1118 }, { "epoch": 0.4335837285016097, "grad_norm": 0.17393407225608826, "learning_rate": 6.211886975447525e-06, "loss": 1.3936, "step": 1119 }, { "epoch": 0.4339712027898149, "grad_norm": 0.16261471807956696, "learning_rate": 6.205857724790969e-06, "loss": 1.4282, "step": 1120 }, { "epoch": 0.43435867707802006, "grad_norm": 0.1681116223335266, "learning_rate": 6.199826611868816e-06, "loss": 1.4592, "step": 1121 }, { "epoch": 0.43474615136622524, "grad_norm": 0.17131589353084564, "learning_rate": 6.193793645995212e-06, "loss": 1.4608, "step": 1122 }, { "epoch": 0.4351336256544305, "grad_norm": 0.183954119682312, "learning_rate": 6.187758836487163e-06, "loss": 1.4043, "step": 1123 }, { "epoch": 0.43552109994263566, "grad_norm": 0.16685621440410614, "learning_rate": 6.181722192664526e-06, "loss": 1.409, "step": 1124 }, { "epoch": 0.43590857423084084, "grad_norm": 0.16816650331020355, "learning_rate": 6.175683723849985e-06, "loss": 1.4243, "step": 1125 }, { "epoch": 0.436296048519046, "grad_norm": 0.15208658576011658, "learning_rate": 6.169643439369047e-06, "loss": 1.4381, "step": 1126 }, { "epoch": 0.4366835228072512, "grad_norm": 0.16500847041606903, "learning_rate": 6.163601348550021e-06, "loss": 1.3726, "step": 1127 }, { "epoch": 0.43707099709545644, "grad_norm": 0.15438148379325867, "learning_rate": 6.157557460724006e-06, "loss": 1.3778, "step": 1128 }, { "epoch": 0.4374584713836616, "grad_norm": 0.15452925860881805, "learning_rate": 6.151511785224877e-06, "loss": 1.4467, "step": 1129 }, { "epoch": 0.4378459456718668, "grad_norm": 0.15694154798984528, "learning_rate": 6.145464331389268e-06, "loss": 1.4105, "step": 1130 }, { "epoch": 0.438233419960072, "grad_norm": 0.17021729052066803, "learning_rate": 6.139415108556561e-06, "loss": 1.3861, "step": 1131 }, { "epoch": 0.43862089424827716, "grad_norm": 0.19350208342075348, "learning_rate": 6.133364126068867e-06, "loss": 1.3987, "step": 1132 }, { "epoch": 0.4390083685364824, "grad_norm": 0.17959317564964294, "learning_rate": 6.127311393271021e-06, "loss": 1.3935, "step": 1133 }, { "epoch": 0.4393958428246876, "grad_norm": 0.17524205148220062, "learning_rate": 6.1212569195105545e-06, "loss": 1.417, "step": 1134 }, { "epoch": 0.43978331711289276, "grad_norm": 0.1575501710176468, "learning_rate": 6.115200714137691e-06, "loss": 1.4485, "step": 1135 }, { "epoch": 0.44017079140109794, "grad_norm": 0.1845422089099884, "learning_rate": 6.109142786505327e-06, "loss": 1.4208, "step": 1136 }, { "epoch": 0.4405582656893031, "grad_norm": 0.17654171586036682, "learning_rate": 6.103083145969019e-06, "loss": 1.4033, "step": 1137 }, { "epoch": 0.44094573997750836, "grad_norm": 0.1816931664943695, "learning_rate": 6.097021801886969e-06, "loss": 1.3819, "step": 1138 }, { "epoch": 0.44133321426571354, "grad_norm": 0.16762346029281616, "learning_rate": 6.09095876362001e-06, "loss": 1.4463, "step": 1139 }, { "epoch": 0.4417206885539187, "grad_norm": 0.17286045849323273, "learning_rate": 6.084894040531591e-06, "loss": 1.4419, "step": 1140 }, { "epoch": 0.4421081628421239, "grad_norm": 0.1722288280725479, "learning_rate": 6.078827641987762e-06, "loss": 1.3761, "step": 1141 }, { "epoch": 0.4424956371303291, "grad_norm": 0.1533670574426651, "learning_rate": 6.072759577357165e-06, "loss": 1.4368, "step": 1142 }, { "epoch": 0.4428831114185343, "grad_norm": 0.17432500422000885, "learning_rate": 6.0666898560110075e-06, "loss": 1.426, "step": 1143 }, { "epoch": 0.4432705857067395, "grad_norm": 0.1712101548910141, "learning_rate": 6.060618487323063e-06, "loss": 1.428, "step": 1144 }, { "epoch": 0.4436580599949447, "grad_norm": 0.1676892191171646, "learning_rate": 6.054545480669646e-06, "loss": 1.41, "step": 1145 }, { "epoch": 0.44404553428314986, "grad_norm": 0.17208708822727203, "learning_rate": 6.048470845429597e-06, "loss": 1.4363, "step": 1146 }, { "epoch": 0.44443300857135504, "grad_norm": 0.17240700125694275, "learning_rate": 6.042394590984278e-06, "loss": 1.3946, "step": 1147 }, { "epoch": 0.4448204828595603, "grad_norm": 0.1492319107055664, "learning_rate": 6.036316726717546e-06, "loss": 1.4366, "step": 1148 }, { "epoch": 0.44520795714776545, "grad_norm": 0.190591499209404, "learning_rate": 6.030237262015749e-06, "loss": 1.3574, "step": 1149 }, { "epoch": 0.44559543143597063, "grad_norm": 0.18184834718704224, "learning_rate": 6.0241562062677035e-06, "loss": 1.438, "step": 1150 }, { "epoch": 0.4459829057241758, "grad_norm": 0.19512149691581726, "learning_rate": 6.018073568864685e-06, "loss": 1.4303, "step": 1151 }, { "epoch": 0.446370380012381, "grad_norm": 0.17334552109241486, "learning_rate": 6.01198935920041e-06, "loss": 1.4846, "step": 1152 }, { "epoch": 0.44675785430058623, "grad_norm": 0.16575270891189575, "learning_rate": 6.005903586671023e-06, "loss": 1.3923, "step": 1153 }, { "epoch": 0.4471453285887914, "grad_norm": 0.17477364838123322, "learning_rate": 5.999816260675084e-06, "loss": 1.4201, "step": 1154 }, { "epoch": 0.4475328028769966, "grad_norm": 0.17594224214553833, "learning_rate": 5.99372739061355e-06, "loss": 1.4447, "step": 1155 }, { "epoch": 0.4479202771652018, "grad_norm": 0.19594331085681915, "learning_rate": 5.987636985889764e-06, "loss": 1.4065, "step": 1156 }, { "epoch": 0.44830775145340696, "grad_norm": 0.16264590620994568, "learning_rate": 5.981545055909439e-06, "loss": 1.4459, "step": 1157 }, { "epoch": 0.4486952257416122, "grad_norm": 0.15787331759929657, "learning_rate": 5.975451610080643e-06, "loss": 1.3919, "step": 1158 }, { "epoch": 0.44908270002981737, "grad_norm": 0.15090268850326538, "learning_rate": 5.969356657813783e-06, "loss": 1.4025, "step": 1159 }, { "epoch": 0.44947017431802255, "grad_norm": 0.16826654970645905, "learning_rate": 5.963260208521598e-06, "loss": 1.4205, "step": 1160 }, { "epoch": 0.44985764860622773, "grad_norm": 0.16491298377513885, "learning_rate": 5.957162271619133e-06, "loss": 1.4151, "step": 1161 }, { "epoch": 0.4502451228944329, "grad_norm": 0.17381861805915833, "learning_rate": 5.951062856523734e-06, "loss": 1.4329, "step": 1162 }, { "epoch": 0.45063259718263815, "grad_norm": 0.17352046072483063, "learning_rate": 5.944961972655029e-06, "loss": 1.3987, "step": 1163 }, { "epoch": 0.45102007147084333, "grad_norm": 0.1563253402709961, "learning_rate": 5.938859629434913e-06, "loss": 1.3688, "step": 1164 }, { "epoch": 0.4514075457590485, "grad_norm": 0.16565488278865814, "learning_rate": 5.932755836287537e-06, "loss": 1.4267, "step": 1165 }, { "epoch": 0.4517950200472537, "grad_norm": 0.17581629753112793, "learning_rate": 5.926650602639289e-06, "loss": 1.4898, "step": 1166 }, { "epoch": 0.4521824943354589, "grad_norm": 0.18431949615478516, "learning_rate": 5.920543937918783e-06, "loss": 1.4031, "step": 1167 }, { "epoch": 0.4525699686236641, "grad_norm": 0.17554426193237305, "learning_rate": 5.914435851556843e-06, "loss": 1.416, "step": 1168 }, { "epoch": 0.4529574429118693, "grad_norm": 0.16644640266895294, "learning_rate": 5.908326352986489e-06, "loss": 1.4302, "step": 1169 }, { "epoch": 0.45334491720007447, "grad_norm": 0.18242782354354858, "learning_rate": 5.902215451642919e-06, "loss": 1.4445, "step": 1170 }, { "epoch": 0.45373239148827965, "grad_norm": 0.15325985848903656, "learning_rate": 5.896103156963499e-06, "loss": 1.3949, "step": 1171 }, { "epoch": 0.45411986577648483, "grad_norm": 0.17655497789382935, "learning_rate": 5.8899894783877536e-06, "loss": 1.3614, "step": 1172 }, { "epoch": 0.45450734006469007, "grad_norm": 0.17898263037204742, "learning_rate": 5.88387442535733e-06, "loss": 1.3839, "step": 1173 }, { "epoch": 0.45489481435289525, "grad_norm": 0.18744240701198578, "learning_rate": 5.877758007316012e-06, "loss": 1.4456, "step": 1174 }, { "epoch": 0.45528228864110043, "grad_norm": 0.18953365087509155, "learning_rate": 5.871640233709681e-06, "loss": 1.4759, "step": 1175 }, { "epoch": 0.4556697629293056, "grad_norm": 0.17352838814258575, "learning_rate": 5.865521113986322e-06, "loss": 1.4122, "step": 1176 }, { "epoch": 0.4560572372175108, "grad_norm": 0.17662395536899567, "learning_rate": 5.8594006575959905e-06, "loss": 1.4067, "step": 1177 }, { "epoch": 0.456444711505716, "grad_norm": 0.16589704155921936, "learning_rate": 5.853278873990811e-06, "loss": 1.4207, "step": 1178 }, { "epoch": 0.4568321857939212, "grad_norm": 0.16235943138599396, "learning_rate": 5.847155772624954e-06, "loss": 1.4281, "step": 1179 }, { "epoch": 0.4572196600821264, "grad_norm": 0.16977904736995697, "learning_rate": 5.841031362954629e-06, "loss": 1.3962, "step": 1180 }, { "epoch": 0.45760713437033157, "grad_norm": 0.17245438694953918, "learning_rate": 5.834905654438064e-06, "loss": 1.4069, "step": 1181 }, { "epoch": 0.45799460865853675, "grad_norm": 0.1699805110692978, "learning_rate": 5.82877865653549e-06, "loss": 1.4298, "step": 1182 }, { "epoch": 0.458382082946742, "grad_norm": 0.17535148561000824, "learning_rate": 5.822650378709136e-06, "loss": 1.3901, "step": 1183 }, { "epoch": 0.45876955723494717, "grad_norm": 0.1738366186618805, "learning_rate": 5.816520830423203e-06, "loss": 1.4523, "step": 1184 }, { "epoch": 0.45915703152315235, "grad_norm": 0.17489930987358093, "learning_rate": 5.810390021143852e-06, "loss": 1.3997, "step": 1185 }, { "epoch": 0.45954450581135753, "grad_norm": 0.16970095038414001, "learning_rate": 5.804257960339199e-06, "loss": 1.3808, "step": 1186 }, { "epoch": 0.4599319800995627, "grad_norm": 0.17519405484199524, "learning_rate": 5.798124657479286e-06, "loss": 1.4147, "step": 1187 }, { "epoch": 0.46031945438776795, "grad_norm": 0.16436678171157837, "learning_rate": 5.791990122036075e-06, "loss": 1.4438, "step": 1188 }, { "epoch": 0.4607069286759731, "grad_norm": 0.1484728753566742, "learning_rate": 5.785854363483432e-06, "loss": 1.4498, "step": 1189 }, { "epoch": 0.4610944029641783, "grad_norm": 0.1630781888961792, "learning_rate": 5.779717391297112e-06, "loss": 1.3997, "step": 1190 }, { "epoch": 0.4614818772523835, "grad_norm": 0.17815375328063965, "learning_rate": 5.773579214954743e-06, "loss": 1.4339, "step": 1191 }, { "epoch": 0.46186935154058867, "grad_norm": 0.16182856261730194, "learning_rate": 5.7674398439358145e-06, "loss": 1.4002, "step": 1192 }, { "epoch": 0.4622568258287939, "grad_norm": 0.16087403893470764, "learning_rate": 5.761299287721661e-06, "loss": 1.4075, "step": 1193 }, { "epoch": 0.4626443001169991, "grad_norm": 0.17332421243190765, "learning_rate": 5.755157555795445e-06, "loss": 1.4786, "step": 1194 }, { "epoch": 0.46303177440520427, "grad_norm": 0.17085309326648712, "learning_rate": 5.749014657642147e-06, "loss": 1.4277, "step": 1195 }, { "epoch": 0.46341924869340945, "grad_norm": 0.16695760190486908, "learning_rate": 5.742870602748547e-06, "loss": 1.3899, "step": 1196 }, { "epoch": 0.4638067229816146, "grad_norm": 0.16975972056388855, "learning_rate": 5.736725400603214e-06, "loss": 1.3782, "step": 1197 }, { "epoch": 0.46419419726981986, "grad_norm": 0.17631316184997559, "learning_rate": 5.730579060696486e-06, "loss": 1.4219, "step": 1198 }, { "epoch": 0.46458167155802504, "grad_norm": 0.1716679483652115, "learning_rate": 5.7244315925204586e-06, "loss": 1.4559, "step": 1199 }, { "epoch": 0.4649691458462302, "grad_norm": 0.174521803855896, "learning_rate": 5.71828300556897e-06, "loss": 1.4474, "step": 1200 }, { "epoch": 0.4653566201344354, "grad_norm": 0.17839747667312622, "learning_rate": 5.712133309337586e-06, "loss": 1.3764, "step": 1201 }, { "epoch": 0.4657440944226406, "grad_norm": 0.182560533285141, "learning_rate": 5.705982513323588e-06, "loss": 1.4579, "step": 1202 }, { "epoch": 0.4661315687108458, "grad_norm": 0.16891227662563324, "learning_rate": 5.6998306270259525e-06, "loss": 1.4088, "step": 1203 }, { "epoch": 0.466519042999051, "grad_norm": 0.161055326461792, "learning_rate": 5.693677659945343e-06, "loss": 1.4171, "step": 1204 }, { "epoch": 0.4669065172872562, "grad_norm": 0.16830380260944366, "learning_rate": 5.687523621584087e-06, "loss": 1.4625, "step": 1205 }, { "epoch": 0.46729399157546136, "grad_norm": 0.15619122982025146, "learning_rate": 5.681368521446171e-06, "loss": 1.4083, "step": 1206 }, { "epoch": 0.46768146586366655, "grad_norm": 0.15866610407829285, "learning_rate": 5.675212369037221e-06, "loss": 1.3593, "step": 1207 }, { "epoch": 0.4680689401518718, "grad_norm": 0.17369763553142548, "learning_rate": 5.6690551738644865e-06, "loss": 1.4126, "step": 1208 }, { "epoch": 0.46845641444007696, "grad_norm": 0.1869356334209442, "learning_rate": 5.662896945436827e-06, "loss": 1.4654, "step": 1209 }, { "epoch": 0.46884388872828214, "grad_norm": 0.18220089375972748, "learning_rate": 5.6567376932647e-06, "loss": 1.4289, "step": 1210 }, { "epoch": 0.4692313630164873, "grad_norm": 0.17128990590572357, "learning_rate": 5.650577426860141e-06, "loss": 1.3501, "step": 1211 }, { "epoch": 0.4696188373046925, "grad_norm": 0.1619592308998108, "learning_rate": 5.6444161557367534e-06, "loss": 1.4053, "step": 1212 }, { "epoch": 0.47000631159289774, "grad_norm": 0.17500106990337372, "learning_rate": 5.638253889409695e-06, "loss": 1.3849, "step": 1213 }, { "epoch": 0.4703937858811029, "grad_norm": 0.16830924153327942, "learning_rate": 5.6320906373956555e-06, "loss": 1.4495, "step": 1214 }, { "epoch": 0.4707812601693081, "grad_norm": 0.1802515983581543, "learning_rate": 5.6259264092128486e-06, "loss": 1.4406, "step": 1215 }, { "epoch": 0.4711687344575133, "grad_norm": 0.18993596732616425, "learning_rate": 5.619761214380998e-06, "loss": 1.3952, "step": 1216 }, { "epoch": 0.47155620874571846, "grad_norm": 0.156313955783844, "learning_rate": 5.613595062421317e-06, "loss": 1.3915, "step": 1217 }, { "epoch": 0.4719436830339237, "grad_norm": 0.1584126204252243, "learning_rate": 5.607427962856499e-06, "loss": 1.3962, "step": 1218 }, { "epoch": 0.4723311573221289, "grad_norm": 0.15167003870010376, "learning_rate": 5.601259925210699e-06, "loss": 1.3558, "step": 1219 }, { "epoch": 0.47271863161033406, "grad_norm": 0.17863698303699493, "learning_rate": 5.595090959009525e-06, "loss": 1.41, "step": 1220 }, { "epoch": 0.47310610589853924, "grad_norm": 0.1904880851507187, "learning_rate": 5.588921073780013e-06, "loss": 1.3915, "step": 1221 }, { "epoch": 0.4734935801867444, "grad_norm": 0.17093965411186218, "learning_rate": 5.58275027905062e-06, "loss": 1.4025, "step": 1222 }, { "epoch": 0.47388105447494966, "grad_norm": 0.1799425482749939, "learning_rate": 5.576578584351214e-06, "loss": 1.368, "step": 1223 }, { "epoch": 0.47426852876315484, "grad_norm": 0.16649112105369568, "learning_rate": 5.570405999213044e-06, "loss": 1.4017, "step": 1224 }, { "epoch": 0.47465600305136, "grad_norm": 0.1667657047510147, "learning_rate": 5.5642325331687385e-06, "loss": 1.3861, "step": 1225 }, { "epoch": 0.4750434773395652, "grad_norm": 0.19311518967151642, "learning_rate": 5.558058195752286e-06, "loss": 1.4142, "step": 1226 }, { "epoch": 0.4754309516277704, "grad_norm": 0.1683465987443924, "learning_rate": 5.551882996499019e-06, "loss": 1.4224, "step": 1227 }, { "epoch": 0.4758184259159756, "grad_norm": 0.17655274271965027, "learning_rate": 5.5457069449456055e-06, "loss": 1.3772, "step": 1228 }, { "epoch": 0.4762059002041808, "grad_norm": 0.17944836616516113, "learning_rate": 5.539530050630024e-06, "loss": 1.418, "step": 1229 }, { "epoch": 0.476593374492386, "grad_norm": 0.16521701216697693, "learning_rate": 5.5333523230915585e-06, "loss": 1.3823, "step": 1230 }, { "epoch": 0.47698084878059116, "grad_norm": 0.16083993017673492, "learning_rate": 5.527173771870779e-06, "loss": 1.3391, "step": 1231 }, { "epoch": 0.47736832306879634, "grad_norm": 0.17628128826618195, "learning_rate": 5.5209944065095255e-06, "loss": 1.4306, "step": 1232 }, { "epoch": 0.4777557973570016, "grad_norm": 0.1726638525724411, "learning_rate": 5.514814236550898e-06, "loss": 1.4437, "step": 1233 }, { "epoch": 0.47814327164520676, "grad_norm": 0.17284612357616425, "learning_rate": 5.5086332715392374e-06, "loss": 1.4243, "step": 1234 }, { "epoch": 0.47853074593341194, "grad_norm": 0.18134444952011108, "learning_rate": 5.502451521020114e-06, "loss": 1.4368, "step": 1235 }, { "epoch": 0.4789182202216171, "grad_norm": 0.1595321148633957, "learning_rate": 5.496268994540309e-06, "loss": 1.3877, "step": 1236 }, { "epoch": 0.4793056945098223, "grad_norm": 0.16382324695587158, "learning_rate": 5.490085701647805e-06, "loss": 1.3834, "step": 1237 }, { "epoch": 0.47969316879802754, "grad_norm": 0.16654498875141144, "learning_rate": 5.483901651891762e-06, "loss": 1.4344, "step": 1238 }, { "epoch": 0.4800806430862327, "grad_norm": 0.16116628050804138, "learning_rate": 5.477716854822519e-06, "loss": 1.3868, "step": 1239 }, { "epoch": 0.4804681173744379, "grad_norm": 0.17746926844120026, "learning_rate": 5.4715313199915595e-06, "loss": 1.3762, "step": 1240 }, { "epoch": 0.4808555916626431, "grad_norm": 0.17140384018421173, "learning_rate": 5.465345056951512e-06, "loss": 1.4025, "step": 1241 }, { "epoch": 0.48124306595084826, "grad_norm": 0.19219277799129486, "learning_rate": 5.459158075256128e-06, "loss": 1.3967, "step": 1242 }, { "epoch": 0.4816305402390535, "grad_norm": 0.17507079243659973, "learning_rate": 5.452970384460267e-06, "loss": 1.3878, "step": 1243 }, { "epoch": 0.4820180145272587, "grad_norm": 0.17386656999588013, "learning_rate": 5.446781994119886e-06, "loss": 1.4377, "step": 1244 }, { "epoch": 0.48240548881546386, "grad_norm": 0.16569776833057404, "learning_rate": 5.4405929137920225e-06, "loss": 1.3528, "step": 1245 }, { "epoch": 0.48279296310366904, "grad_norm": 0.15871652960777283, "learning_rate": 5.434403153034777e-06, "loss": 1.437, "step": 1246 }, { "epoch": 0.4831804373918742, "grad_norm": 0.1783515214920044, "learning_rate": 5.428212721407304e-06, "loss": 1.4114, "step": 1247 }, { "epoch": 0.48356791168007945, "grad_norm": 0.16589252650737762, "learning_rate": 5.4220216284697925e-06, "loss": 1.371, "step": 1248 }, { "epoch": 0.48395538596828463, "grad_norm": 0.17244261503219604, "learning_rate": 5.415829883783452e-06, "loss": 1.4418, "step": 1249 }, { "epoch": 0.4843428602564898, "grad_norm": 0.18761850893497467, "learning_rate": 5.409637496910499e-06, "loss": 1.4331, "step": 1250 }, { "epoch": 0.484730334544695, "grad_norm": 0.17848147451877594, "learning_rate": 5.403444477414146e-06, "loss": 1.3937, "step": 1251 }, { "epoch": 0.4851178088329002, "grad_norm": 0.16284756362438202, "learning_rate": 5.397250834858573e-06, "loss": 1.3674, "step": 1252 }, { "epoch": 0.4855052831211054, "grad_norm": 0.1680128127336502, "learning_rate": 5.391056578808931e-06, "loss": 1.3713, "step": 1253 }, { "epoch": 0.4858927574093106, "grad_norm": 0.17388911545276642, "learning_rate": 5.384861718831315e-06, "loss": 1.3894, "step": 1254 }, { "epoch": 0.4862802316975158, "grad_norm": 0.19191807508468628, "learning_rate": 5.378666264492752e-06, "loss": 1.3971, "step": 1255 }, { "epoch": 0.48666770598572096, "grad_norm": 0.18213531374931335, "learning_rate": 5.372470225361189e-06, "loss": 1.3739, "step": 1256 }, { "epoch": 0.48705518027392614, "grad_norm": 0.17884351313114166, "learning_rate": 5.366273611005475e-06, "loss": 1.3955, "step": 1257 }, { "epoch": 0.48744265456213137, "grad_norm": 0.18737460672855377, "learning_rate": 5.360076430995347e-06, "loss": 1.3532, "step": 1258 }, { "epoch": 0.48783012885033655, "grad_norm": 0.17311598360538483, "learning_rate": 5.353878694901416e-06, "loss": 1.4251, "step": 1259 }, { "epoch": 0.48821760313854173, "grad_norm": 0.1673484593629837, "learning_rate": 5.347680412295152e-06, "loss": 1.4092, "step": 1260 }, { "epoch": 0.4886050774267469, "grad_norm": 0.18345338106155396, "learning_rate": 5.341481592748868e-06, "loss": 1.4091, "step": 1261 }, { "epoch": 0.4889925517149521, "grad_norm": 0.1577897071838379, "learning_rate": 5.3352822458357075e-06, "loss": 1.3964, "step": 1262 }, { "epoch": 0.48938002600315733, "grad_norm": 0.16556425392627716, "learning_rate": 5.329082381129629e-06, "loss": 1.4155, "step": 1263 }, { "epoch": 0.4897675002913625, "grad_norm": 0.18216826021671295, "learning_rate": 5.322882008205388e-06, "loss": 1.3956, "step": 1264 }, { "epoch": 0.4901549745795677, "grad_norm": 0.16069266200065613, "learning_rate": 5.316681136638525e-06, "loss": 1.4446, "step": 1265 }, { "epoch": 0.4905424488677729, "grad_norm": 0.16232500970363617, "learning_rate": 5.310479776005355e-06, "loss": 1.4272, "step": 1266 }, { "epoch": 0.49092992315597805, "grad_norm": 0.17337587475776672, "learning_rate": 5.304277935882944e-06, "loss": 1.4079, "step": 1267 }, { "epoch": 0.4913173974441833, "grad_norm": 0.17269353568553925, "learning_rate": 5.2980756258491e-06, "loss": 1.4313, "step": 1268 }, { "epoch": 0.49170487173238847, "grad_norm": 0.1662793904542923, "learning_rate": 5.291872855482356e-06, "loss": 1.3543, "step": 1269 }, { "epoch": 0.49209234602059365, "grad_norm": 0.1733420044183731, "learning_rate": 5.285669634361957e-06, "loss": 1.3693, "step": 1270 }, { "epoch": 0.49247982030879883, "grad_norm": 0.17704029381275177, "learning_rate": 5.279465972067843e-06, "loss": 1.3502, "step": 1271 }, { "epoch": 0.492867294597004, "grad_norm": 0.1818942129611969, "learning_rate": 5.273261878180636e-06, "loss": 1.3701, "step": 1272 }, { "epoch": 0.49325476888520925, "grad_norm": 0.17481544613838196, "learning_rate": 5.267057362281625e-06, "loss": 1.4136, "step": 1273 }, { "epoch": 0.49364224317341443, "grad_norm": 0.17805305123329163, "learning_rate": 5.260852433952748e-06, "loss": 1.4107, "step": 1274 }, { "epoch": 0.4940297174616196, "grad_norm": 0.17063778638839722, "learning_rate": 5.254647102776585e-06, "loss": 1.3602, "step": 1275 }, { "epoch": 0.4944171917498248, "grad_norm": 0.16799448430538177, "learning_rate": 5.2484413783363335e-06, "loss": 1.408, "step": 1276 }, { "epoch": 0.49480466603802997, "grad_norm": 0.16338828206062317, "learning_rate": 5.242235270215801e-06, "loss": 1.4296, "step": 1277 }, { "epoch": 0.4951921403262352, "grad_norm": 0.16728238761425018, "learning_rate": 5.236028787999385e-06, "loss": 1.3761, "step": 1278 }, { "epoch": 0.4955796146144404, "grad_norm": 0.1836881935596466, "learning_rate": 5.229821941272063e-06, "loss": 1.396, "step": 1279 }, { "epoch": 0.49596708890264557, "grad_norm": 0.166386678814888, "learning_rate": 5.223614739619375e-06, "loss": 1.3579, "step": 1280 }, { "epoch": 0.49635456319085075, "grad_norm": 0.1810721904039383, "learning_rate": 5.217407192627408e-06, "loss": 1.4481, "step": 1281 }, { "epoch": 0.49674203747905593, "grad_norm": 0.16548117995262146, "learning_rate": 5.2111993098827855e-06, "loss": 1.3806, "step": 1282 }, { "epoch": 0.49712951176726117, "grad_norm": 0.18060392141342163, "learning_rate": 5.204991100972644e-06, "loss": 1.419, "step": 1283 }, { "epoch": 0.49751698605546635, "grad_norm": 0.163921520113945, "learning_rate": 5.19878257548463e-06, "loss": 1.4083, "step": 1284 }, { "epoch": 0.49790446034367153, "grad_norm": 0.1668134182691574, "learning_rate": 5.192573743006873e-06, "loss": 1.4753, "step": 1285 }, { "epoch": 0.4982919346318767, "grad_norm": 0.1955236792564392, "learning_rate": 5.186364613127981e-06, "loss": 1.3808, "step": 1286 }, { "epoch": 0.4986794089200819, "grad_norm": 0.1771843284368515, "learning_rate": 5.18015519543702e-06, "loss": 1.4139, "step": 1287 }, { "epoch": 0.4990668832082871, "grad_norm": 0.17899397015571594, "learning_rate": 5.173945499523499e-06, "loss": 1.4047, "step": 1288 }, { "epoch": 0.4994543574964923, "grad_norm": 0.17400653660297394, "learning_rate": 5.167735534977358e-06, "loss": 1.3381, "step": 1289 }, { "epoch": 0.4998418317846975, "grad_norm": 0.18282900750637054, "learning_rate": 5.161525311388953e-06, "loss": 1.4147, "step": 1290 }, { "epoch": 0.5002293060729027, "grad_norm": 0.1640361249446869, "learning_rate": 5.155314838349037e-06, "loss": 1.4047, "step": 1291 }, { "epoch": 0.5006167803611079, "grad_norm": 0.1723088026046753, "learning_rate": 5.149104125448752e-06, "loss": 1.421, "step": 1292 }, { "epoch": 0.5010042546493131, "grad_norm": 0.1674494445323944, "learning_rate": 5.142893182279608e-06, "loss": 1.4436, "step": 1293 }, { "epoch": 0.5013917289375183, "grad_norm": 0.1896222084760666, "learning_rate": 5.13668201843347e-06, "loss": 1.3967, "step": 1294 }, { "epoch": 0.5017792032257234, "grad_norm": 0.18183128535747528, "learning_rate": 5.130470643502544e-06, "loss": 1.4363, "step": 1295 }, { "epoch": 0.5021666775139286, "grad_norm": 0.14944671094417572, "learning_rate": 5.124259067079365e-06, "loss": 1.3917, "step": 1296 }, { "epoch": 0.5025541518021338, "grad_norm": 0.1771329790353775, "learning_rate": 5.1180472987567776e-06, "loss": 1.4357, "step": 1297 }, { "epoch": 0.502941626090339, "grad_norm": 0.17076587677001953, "learning_rate": 5.11183534812792e-06, "loss": 1.3735, "step": 1298 }, { "epoch": 0.5033291003785442, "grad_norm": 0.1554473340511322, "learning_rate": 5.1056232247862145e-06, "loss": 1.3924, "step": 1299 }, { "epoch": 0.5037165746667495, "grad_norm": 0.16912999749183655, "learning_rate": 5.099410938325351e-06, "loss": 1.4176, "step": 1300 }, { "epoch": 0.5041040489549546, "grad_norm": 0.17451034486293793, "learning_rate": 5.093198498339269e-06, "loss": 1.4149, "step": 1301 }, { "epoch": 0.5044915232431598, "grad_norm": 0.17700143158435822, "learning_rate": 5.086985914422145e-06, "loss": 1.3777, "step": 1302 }, { "epoch": 0.504878997531365, "grad_norm": 0.15840089321136475, "learning_rate": 5.080773196168384e-06, "loss": 1.3912, "step": 1303 }, { "epoch": 0.5052664718195702, "grad_norm": 0.17308010160923004, "learning_rate": 5.074560353172587e-06, "loss": 1.4429, "step": 1304 }, { "epoch": 0.5056539461077754, "grad_norm": 0.17095321416854858, "learning_rate": 5.068347395029558e-06, "loss": 1.423, "step": 1305 }, { "epoch": 0.5060414203959805, "grad_norm": 0.18255391716957092, "learning_rate": 5.062134331334273e-06, "loss": 1.3782, "step": 1306 }, { "epoch": 0.5064288946841857, "grad_norm": 0.1728234589099884, "learning_rate": 5.055921171681873e-06, "loss": 1.4571, "step": 1307 }, { "epoch": 0.5068163689723909, "grad_norm": 0.16988715529441833, "learning_rate": 5.04970792566765e-06, "loss": 1.3949, "step": 1308 }, { "epoch": 0.5072038432605961, "grad_norm": 0.18354693055152893, "learning_rate": 5.043494602887021e-06, "loss": 1.4149, "step": 1309 }, { "epoch": 0.5075913175488014, "grad_norm": 0.18179309368133545, "learning_rate": 5.037281212935533e-06, "loss": 1.4286, "step": 1310 }, { "epoch": 0.5079787918370066, "grad_norm": 0.1996416598558426, "learning_rate": 5.031067765408827e-06, "loss": 1.3909, "step": 1311 }, { "epoch": 0.5083662661252117, "grad_norm": 0.1782781034708023, "learning_rate": 5.024854269902637e-06, "loss": 1.4002, "step": 1312 }, { "epoch": 0.5087537404134169, "grad_norm": 0.17283526062965393, "learning_rate": 5.018640736012772e-06, "loss": 1.4198, "step": 1313 }, { "epoch": 0.5091412147016221, "grad_norm": 0.19283011555671692, "learning_rate": 5.012427173335098e-06, "loss": 1.4109, "step": 1314 }, { "epoch": 0.5095286889898273, "grad_norm": 0.17696219682693481, "learning_rate": 5.006213591465527e-06, "loss": 1.4136, "step": 1315 }, { "epoch": 0.5099161632780325, "grad_norm": 0.1825561672449112, "learning_rate": 5e-06, "loss": 1.3707, "step": 1316 }, { "epoch": 0.5103036375662376, "grad_norm": 0.18883338570594788, "learning_rate": 4.9937864085344754e-06, "loss": 1.4363, "step": 1317 }, { "epoch": 0.5106911118544428, "grad_norm": 0.1909293234348297, "learning_rate": 4.987572826664903e-06, "loss": 1.4296, "step": 1318 }, { "epoch": 0.511078586142648, "grad_norm": 0.1768261045217514, "learning_rate": 4.981359263987231e-06, "loss": 1.4111, "step": 1319 }, { "epoch": 0.5114660604308533, "grad_norm": 0.1749366670846939, "learning_rate": 4.975145730097364e-06, "loss": 1.4049, "step": 1320 }, { "epoch": 0.5118535347190585, "grad_norm": 0.20439854264259338, "learning_rate": 4.968932234591176e-06, "loss": 1.436, "step": 1321 }, { "epoch": 0.5122410090072637, "grad_norm": 0.17655953764915466, "learning_rate": 4.962718787064469e-06, "loss": 1.4021, "step": 1322 }, { "epoch": 0.5126284832954688, "grad_norm": 0.1773209571838379, "learning_rate": 4.9565053971129805e-06, "loss": 1.4471, "step": 1323 }, { "epoch": 0.513015957583674, "grad_norm": 0.17433862388134003, "learning_rate": 4.9502920743323525e-06, "loss": 1.3699, "step": 1324 }, { "epoch": 0.5134034318718792, "grad_norm": 0.1732746958732605, "learning_rate": 4.944078828318129e-06, "loss": 1.4182, "step": 1325 }, { "epoch": 0.5137909061600844, "grad_norm": 0.1719723790884018, "learning_rate": 4.937865668665729e-06, "loss": 1.4049, "step": 1326 }, { "epoch": 0.5141783804482896, "grad_norm": 0.17275363206863403, "learning_rate": 4.931652604970445e-06, "loss": 1.4204, "step": 1327 }, { "epoch": 0.5145658547364947, "grad_norm": 0.1791892945766449, "learning_rate": 4.925439646827414e-06, "loss": 1.4119, "step": 1328 }, { "epoch": 0.5149533290246999, "grad_norm": 0.19768878817558289, "learning_rate": 4.919226803831619e-06, "loss": 1.4055, "step": 1329 }, { "epoch": 0.5153408033129052, "grad_norm": 0.17722371220588684, "learning_rate": 4.913014085577855e-06, "loss": 1.3673, "step": 1330 }, { "epoch": 0.5157282776011104, "grad_norm": 0.17486241459846497, "learning_rate": 4.906801501660733e-06, "loss": 1.4394, "step": 1331 }, { "epoch": 0.5161157518893156, "grad_norm": 0.18968409299850464, "learning_rate": 4.900589061674649e-06, "loss": 1.4249, "step": 1332 }, { "epoch": 0.5165032261775208, "grad_norm": 0.1774653196334839, "learning_rate": 4.894376775213786e-06, "loss": 1.3744, "step": 1333 }, { "epoch": 0.5168907004657259, "grad_norm": 0.1767013967037201, "learning_rate": 4.88816465187208e-06, "loss": 1.3623, "step": 1334 }, { "epoch": 0.5172781747539311, "grad_norm": 0.18801890313625336, "learning_rate": 4.881952701243224e-06, "loss": 1.4222, "step": 1335 }, { "epoch": 0.5176656490421363, "grad_norm": 0.15637265145778656, "learning_rate": 4.875740932920635e-06, "loss": 1.3835, "step": 1336 }, { "epoch": 0.5180531233303415, "grad_norm": 0.1605391651391983, "learning_rate": 4.869529356497456e-06, "loss": 1.4511, "step": 1337 }, { "epoch": 0.5184405976185467, "grad_norm": 0.16668446362018585, "learning_rate": 4.863317981566531e-06, "loss": 1.3876, "step": 1338 }, { "epoch": 0.5188280719067518, "grad_norm": 0.18239177763462067, "learning_rate": 4.857106817720394e-06, "loss": 1.435, "step": 1339 }, { "epoch": 0.5192155461949571, "grad_norm": 0.1727922111749649, "learning_rate": 4.850895874551248e-06, "loss": 1.3866, "step": 1340 }, { "epoch": 0.5196030204831623, "grad_norm": 0.15481922030448914, "learning_rate": 4.844685161650964e-06, "loss": 1.3388, "step": 1341 }, { "epoch": 0.5199904947713675, "grad_norm": 0.1754877269268036, "learning_rate": 4.838474688611048e-06, "loss": 1.4303, "step": 1342 }, { "epoch": 0.5203779690595727, "grad_norm": 0.17923691868782043, "learning_rate": 4.8322644650226425e-06, "loss": 1.3624, "step": 1343 }, { "epoch": 0.5207654433477779, "grad_norm": 0.15487973392009735, "learning_rate": 4.8260545004765034e-06, "loss": 1.4037, "step": 1344 }, { "epoch": 0.521152917635983, "grad_norm": 0.16563992202281952, "learning_rate": 4.819844804562982e-06, "loss": 1.369, "step": 1345 }, { "epoch": 0.5215403919241882, "grad_norm": 0.1691073179244995, "learning_rate": 4.8136353868720215e-06, "loss": 1.3619, "step": 1346 }, { "epoch": 0.5219278662123934, "grad_norm": 0.1665688157081604, "learning_rate": 4.807426256993129e-06, "loss": 1.4184, "step": 1347 }, { "epoch": 0.5223153405005986, "grad_norm": 0.19875416159629822, "learning_rate": 4.801217424515373e-06, "loss": 1.3861, "step": 1348 }, { "epoch": 0.5227028147888038, "grad_norm": 0.1682368516921997, "learning_rate": 4.795008899027357e-06, "loss": 1.448, "step": 1349 }, { "epoch": 0.523090289077009, "grad_norm": 0.16035263240337372, "learning_rate": 4.788800690117217e-06, "loss": 1.4032, "step": 1350 }, { "epoch": 0.5234777633652142, "grad_norm": 0.16288919746875763, "learning_rate": 4.7825928073725924e-06, "loss": 1.3895, "step": 1351 }, { "epoch": 0.5238652376534194, "grad_norm": 0.17447850108146667, "learning_rate": 4.776385260380628e-06, "loss": 1.3653, "step": 1352 }, { "epoch": 0.5242527119416246, "grad_norm": 0.17730212211608887, "learning_rate": 4.770178058727939e-06, "loss": 1.349, "step": 1353 }, { "epoch": 0.5246401862298298, "grad_norm": 0.2002531886100769, "learning_rate": 4.763971212000617e-06, "loss": 1.4309, "step": 1354 }, { "epoch": 0.525027660518035, "grad_norm": 0.1717255860567093, "learning_rate": 4.757764729784201e-06, "loss": 1.4298, "step": 1355 }, { "epoch": 0.5254151348062401, "grad_norm": 0.17800889909267426, "learning_rate": 4.751558621663668e-06, "loss": 1.4331, "step": 1356 }, { "epoch": 0.5258026090944453, "grad_norm": 0.16502103209495544, "learning_rate": 4.745352897223415e-06, "loss": 1.4485, "step": 1357 }, { "epoch": 0.5261900833826505, "grad_norm": 0.17099234461784363, "learning_rate": 4.739147566047253e-06, "loss": 1.3772, "step": 1358 }, { "epoch": 0.5265775576708557, "grad_norm": 0.16848300397396088, "learning_rate": 4.732942637718376e-06, "loss": 1.4137, "step": 1359 }, { "epoch": 0.526965031959061, "grad_norm": 0.17936858534812927, "learning_rate": 4.726738121819365e-06, "loss": 1.4355, "step": 1360 }, { "epoch": 0.5273525062472662, "grad_norm": 0.16731949150562286, "learning_rate": 4.720534027932158e-06, "loss": 1.3815, "step": 1361 }, { "epoch": 0.5277399805354713, "grad_norm": 0.1983889490365982, "learning_rate": 4.714330365638044e-06, "loss": 1.3982, "step": 1362 }, { "epoch": 0.5281274548236765, "grad_norm": 0.16804300248622894, "learning_rate": 4.708127144517644e-06, "loss": 1.4151, "step": 1363 }, { "epoch": 0.5285149291118817, "grad_norm": 0.18182480335235596, "learning_rate": 4.701924374150901e-06, "loss": 1.4202, "step": 1364 }, { "epoch": 0.5289024034000869, "grad_norm": 0.17097985744476318, "learning_rate": 4.695722064117056e-06, "loss": 1.3803, "step": 1365 }, { "epoch": 0.529289877688292, "grad_norm": 0.186282217502594, "learning_rate": 4.689520223994646e-06, "loss": 1.4006, "step": 1366 }, { "epoch": 0.5296773519764972, "grad_norm": 0.1653113067150116, "learning_rate": 4.683318863361475e-06, "loss": 1.3661, "step": 1367 }, { "epoch": 0.5300648262647024, "grad_norm": 0.18289950489997864, "learning_rate": 4.6771179917946145e-06, "loss": 1.3239, "step": 1368 }, { "epoch": 0.5304523005529076, "grad_norm": 0.17741255462169647, "learning_rate": 4.6709176188703736e-06, "loss": 1.3874, "step": 1369 }, { "epoch": 0.5308397748411129, "grad_norm": 0.1585928052663803, "learning_rate": 4.664717754164293e-06, "loss": 1.4703, "step": 1370 }, { "epoch": 0.5312272491293181, "grad_norm": 0.1670512706041336, "learning_rate": 4.658518407251134e-06, "loss": 1.3552, "step": 1371 }, { "epoch": 0.5316147234175232, "grad_norm": 0.18530553579330444, "learning_rate": 4.6523195877048495e-06, "loss": 1.421, "step": 1372 }, { "epoch": 0.5320021977057284, "grad_norm": 0.1734737902879715, "learning_rate": 4.646121305098586e-06, "loss": 1.4378, "step": 1373 }, { "epoch": 0.5323896719939336, "grad_norm": 0.19610601663589478, "learning_rate": 4.639923569004654e-06, "loss": 1.3729, "step": 1374 }, { "epoch": 0.5327771462821388, "grad_norm": 0.20131148397922516, "learning_rate": 4.6337263889945274e-06, "loss": 1.3832, "step": 1375 }, { "epoch": 0.533164620570344, "grad_norm": 0.18909497559070587, "learning_rate": 4.627529774638812e-06, "loss": 1.398, "step": 1376 }, { "epoch": 0.5335520948585492, "grad_norm": 0.18387851119041443, "learning_rate": 4.621333735507251e-06, "loss": 1.3581, "step": 1377 }, { "epoch": 0.5339395691467543, "grad_norm": 0.17950032651424408, "learning_rate": 4.615138281168687e-06, "loss": 1.413, "step": 1378 }, { "epoch": 0.5343270434349595, "grad_norm": 0.1718689650297165, "learning_rate": 4.608943421191072e-06, "loss": 1.373, "step": 1379 }, { "epoch": 0.5347145177231648, "grad_norm": 0.17184041440486908, "learning_rate": 4.602749165141429e-06, "loss": 1.3899, "step": 1380 }, { "epoch": 0.53510199201137, "grad_norm": 0.187111034989357, "learning_rate": 4.5965555225858575e-06, "loss": 1.3967, "step": 1381 }, { "epoch": 0.5354894662995752, "grad_norm": 0.17013849318027496, "learning_rate": 4.590362503089501e-06, "loss": 1.368, "step": 1382 }, { "epoch": 0.5358769405877803, "grad_norm": 0.18679924309253693, "learning_rate": 4.58417011621655e-06, "loss": 1.3877, "step": 1383 }, { "epoch": 0.5362644148759855, "grad_norm": 0.18026600778102875, "learning_rate": 4.577978371530208e-06, "loss": 1.4039, "step": 1384 }, { "epoch": 0.5366518891641907, "grad_norm": 0.16442783176898956, "learning_rate": 4.5717872785926976e-06, "loss": 1.4635, "step": 1385 }, { "epoch": 0.5370393634523959, "grad_norm": 0.17293189465999603, "learning_rate": 4.5655968469652234e-06, "loss": 1.4053, "step": 1386 }, { "epoch": 0.5374268377406011, "grad_norm": 0.1842036098241806, "learning_rate": 4.559407086207979e-06, "loss": 1.3927, "step": 1387 }, { "epoch": 0.5378143120288063, "grad_norm": 0.1791335493326187, "learning_rate": 4.5532180058801145e-06, "loss": 1.387, "step": 1388 }, { "epoch": 0.5382017863170114, "grad_norm": 0.17194968461990356, "learning_rate": 4.547029615539735e-06, "loss": 1.4515, "step": 1389 }, { "epoch": 0.5385892606052167, "grad_norm": 0.18875034153461456, "learning_rate": 4.540841924743873e-06, "loss": 1.4489, "step": 1390 }, { "epoch": 0.5389767348934219, "grad_norm": 0.1824525147676468, "learning_rate": 4.534654943048489e-06, "loss": 1.3998, "step": 1391 }, { "epoch": 0.5393642091816271, "grad_norm": 0.16469988226890564, "learning_rate": 4.5284686800084405e-06, "loss": 1.4513, "step": 1392 }, { "epoch": 0.5397516834698323, "grad_norm": 0.18099071085453033, "learning_rate": 4.522283145177483e-06, "loss": 1.4056, "step": 1393 }, { "epoch": 0.5401391577580374, "grad_norm": 0.18242214620113373, "learning_rate": 4.516098348108238e-06, "loss": 1.3993, "step": 1394 }, { "epoch": 0.5405266320462426, "grad_norm": 0.17531774938106537, "learning_rate": 4.509914298352197e-06, "loss": 1.4048, "step": 1395 }, { "epoch": 0.5409141063344478, "grad_norm": 0.1774042248725891, "learning_rate": 4.5037310054596936e-06, "loss": 1.357, "step": 1396 }, { "epoch": 0.541301580622653, "grad_norm": 0.17292766273021698, "learning_rate": 4.497548478979887e-06, "loss": 1.3717, "step": 1397 }, { "epoch": 0.5416890549108582, "grad_norm": 0.17667751014232635, "learning_rate": 4.491366728460765e-06, "loss": 1.3458, "step": 1398 }, { "epoch": 0.5420765291990634, "grad_norm": 0.1781769096851349, "learning_rate": 4.485185763449104e-06, "loss": 1.4563, "step": 1399 }, { "epoch": 0.5424640034872686, "grad_norm": 0.16901659965515137, "learning_rate": 4.479005593490477e-06, "loss": 1.4335, "step": 1400 }, { "epoch": 0.5428514777754738, "grad_norm": 0.18356795608997345, "learning_rate": 4.472826228129223e-06, "loss": 1.4152, "step": 1401 }, { "epoch": 0.543238952063679, "grad_norm": 0.19738247990608215, "learning_rate": 4.466647676908444e-06, "loss": 1.395, "step": 1402 }, { "epoch": 0.5436264263518842, "grad_norm": 0.19750946760177612, "learning_rate": 4.460469949369977e-06, "loss": 1.3697, "step": 1403 }, { "epoch": 0.5440139006400894, "grad_norm": 0.1791369765996933, "learning_rate": 4.454293055054397e-06, "loss": 1.3758, "step": 1404 }, { "epoch": 0.5444013749282945, "grad_norm": 0.1628568023443222, "learning_rate": 4.448117003500982e-06, "loss": 1.4158, "step": 1405 }, { "epoch": 0.5447888492164997, "grad_norm": 0.18445752561092377, "learning_rate": 4.441941804247717e-06, "loss": 1.3717, "step": 1406 }, { "epoch": 0.5451763235047049, "grad_norm": 0.16347937285900116, "learning_rate": 4.435767466831263e-06, "loss": 1.373, "step": 1407 }, { "epoch": 0.5455637977929101, "grad_norm": 0.18343698978424072, "learning_rate": 4.429594000786958e-06, "loss": 1.3752, "step": 1408 }, { "epoch": 0.5459512720811153, "grad_norm": 0.18185225129127502, "learning_rate": 4.423421415648787e-06, "loss": 1.3874, "step": 1409 }, { "epoch": 0.5463387463693206, "grad_norm": 0.16304226219654083, "learning_rate": 4.4172497209493805e-06, "loss": 1.4226, "step": 1410 }, { "epoch": 0.5467262206575257, "grad_norm": 0.17191314697265625, "learning_rate": 4.411078926219988e-06, "loss": 1.3841, "step": 1411 }, { "epoch": 0.5471136949457309, "grad_norm": 0.17984727025032043, "learning_rate": 4.404909040990477e-06, "loss": 1.4174, "step": 1412 }, { "epoch": 0.5475011692339361, "grad_norm": 0.17929215729236603, "learning_rate": 4.3987400747893e-06, "loss": 1.4002, "step": 1413 }, { "epoch": 0.5478886435221413, "grad_norm": 0.16416257619857788, "learning_rate": 4.392572037143503e-06, "loss": 1.4118, "step": 1414 }, { "epoch": 0.5482761178103465, "grad_norm": 0.18430471420288086, "learning_rate": 4.386404937578683e-06, "loss": 1.3766, "step": 1415 }, { "epoch": 0.5486635920985516, "grad_norm": 0.1768052577972412, "learning_rate": 4.380238785619003e-06, "loss": 1.3759, "step": 1416 }, { "epoch": 0.5490510663867568, "grad_norm": 0.17785727977752686, "learning_rate": 4.374073590787151e-06, "loss": 1.4436, "step": 1417 }, { "epoch": 0.549438540674962, "grad_norm": 0.17862512171268463, "learning_rate": 4.367909362604346e-06, "loss": 1.3653, "step": 1418 }, { "epoch": 0.5498260149631672, "grad_norm": 0.17543375492095947, "learning_rate": 4.361746110590306e-06, "loss": 1.3733, "step": 1419 }, { "epoch": 0.5502134892513725, "grad_norm": 0.19497253000736237, "learning_rate": 4.355583844263247e-06, "loss": 1.3775, "step": 1420 }, { "epoch": 0.5506009635395777, "grad_norm": 0.16905707120895386, "learning_rate": 4.34942257313986e-06, "loss": 1.4008, "step": 1421 }, { "epoch": 0.5509884378277828, "grad_norm": 0.16476447880268097, "learning_rate": 4.3432623067353015e-06, "loss": 1.4024, "step": 1422 }, { "epoch": 0.551375912115988, "grad_norm": 0.18739654123783112, "learning_rate": 4.337103054563175e-06, "loss": 1.3538, "step": 1423 }, { "epoch": 0.5517633864041932, "grad_norm": 0.17131520807743073, "learning_rate": 4.330944826135515e-06, "loss": 1.36, "step": 1424 }, { "epoch": 0.5521508606923984, "grad_norm": 0.1829390525817871, "learning_rate": 4.324787630962782e-06, "loss": 1.4099, "step": 1425 }, { "epoch": 0.5525383349806036, "grad_norm": 0.18478021025657654, "learning_rate": 4.3186314785538305e-06, "loss": 1.3778, "step": 1426 }, { "epoch": 0.5529258092688087, "grad_norm": 0.1889704167842865, "learning_rate": 4.312476378415916e-06, "loss": 1.3831, "step": 1427 }, { "epoch": 0.5533132835570139, "grad_norm": 0.2160082459449768, "learning_rate": 4.30632234005466e-06, "loss": 1.3898, "step": 1428 }, { "epoch": 0.5537007578452191, "grad_norm": 0.17976605892181396, "learning_rate": 4.30016937297405e-06, "loss": 1.3818, "step": 1429 }, { "epoch": 0.5540882321334244, "grad_norm": 0.1727171540260315, "learning_rate": 4.294017486676413e-06, "loss": 1.3686, "step": 1430 }, { "epoch": 0.5544757064216296, "grad_norm": 0.15987946093082428, "learning_rate": 4.287866690662416e-06, "loss": 1.4273, "step": 1431 }, { "epoch": 0.5548631807098348, "grad_norm": 0.1992597132921219, "learning_rate": 4.281716994431032e-06, "loss": 1.3964, "step": 1432 }, { "epoch": 0.5552506549980399, "grad_norm": 0.1900787502527237, "learning_rate": 4.275568407479545e-06, "loss": 1.3812, "step": 1433 }, { "epoch": 0.5556381292862451, "grad_norm": 0.170169398188591, "learning_rate": 4.269420939303516e-06, "loss": 1.4018, "step": 1434 }, { "epoch": 0.5560256035744503, "grad_norm": 0.18049611151218414, "learning_rate": 4.263274599396788e-06, "loss": 1.4091, "step": 1435 }, { "epoch": 0.5564130778626555, "grad_norm": 0.19879399240016937, "learning_rate": 4.257129397251453e-06, "loss": 1.41, "step": 1436 }, { "epoch": 0.5568005521508607, "grad_norm": 0.1969165951013565, "learning_rate": 4.2509853423578545e-06, "loss": 1.3504, "step": 1437 }, { "epoch": 0.5571880264390658, "grad_norm": 0.19273993372917175, "learning_rate": 4.244842444204556e-06, "loss": 1.444, "step": 1438 }, { "epoch": 0.557575500727271, "grad_norm": 0.18410234153270721, "learning_rate": 4.238700712278341e-06, "loss": 1.4193, "step": 1439 }, { "epoch": 0.5579629750154763, "grad_norm": 0.18014274537563324, "learning_rate": 4.2325601560641854e-06, "loss": 1.4333, "step": 1440 }, { "epoch": 0.5583504493036815, "grad_norm": 0.18753263354301453, "learning_rate": 4.226420785045259e-06, "loss": 1.3607, "step": 1441 }, { "epoch": 0.5587379235918867, "grad_norm": 0.17138415575027466, "learning_rate": 4.220282608702889e-06, "loss": 1.4589, "step": 1442 }, { "epoch": 0.5591253978800919, "grad_norm": 0.18438640236854553, "learning_rate": 4.21414563651657e-06, "loss": 1.3936, "step": 1443 }, { "epoch": 0.559512872168297, "grad_norm": 0.18252649903297424, "learning_rate": 4.2080098779639255e-06, "loss": 1.3783, "step": 1444 }, { "epoch": 0.5599003464565022, "grad_norm": 0.17984099686145782, "learning_rate": 4.201875342520715e-06, "loss": 1.4245, "step": 1445 }, { "epoch": 0.5602878207447074, "grad_norm": 0.1926432102918625, "learning_rate": 4.195742039660801e-06, "loss": 1.3722, "step": 1446 }, { "epoch": 0.5606752950329126, "grad_norm": 0.18637371063232422, "learning_rate": 4.189609978856149e-06, "loss": 1.4442, "step": 1447 }, { "epoch": 0.5610627693211178, "grad_norm": 0.20782019197940826, "learning_rate": 4.1834791695768e-06, "loss": 1.3749, "step": 1448 }, { "epoch": 0.5614502436093229, "grad_norm": 0.1946801245212555, "learning_rate": 4.1773496212908645e-06, "loss": 1.4101, "step": 1449 }, { "epoch": 0.5618377178975282, "grad_norm": 0.1888292133808136, "learning_rate": 4.171221343464512e-06, "loss": 1.4048, "step": 1450 }, { "epoch": 0.5622251921857334, "grad_norm": 0.18372724950313568, "learning_rate": 4.165094345561939e-06, "loss": 1.4633, "step": 1451 }, { "epoch": 0.5626126664739386, "grad_norm": 0.18598143756389618, "learning_rate": 4.158968637045374e-06, "loss": 1.3871, "step": 1452 }, { "epoch": 0.5630001407621438, "grad_norm": 0.19194017350673676, "learning_rate": 4.1528442273750474e-06, "loss": 1.3723, "step": 1453 }, { "epoch": 0.563387615050349, "grad_norm": 0.1870420277118683, "learning_rate": 4.146721126009192e-06, "loss": 1.3649, "step": 1454 }, { "epoch": 0.5637750893385541, "grad_norm": 0.1765395700931549, "learning_rate": 4.14059934240401e-06, "loss": 1.4215, "step": 1455 }, { "epoch": 0.5641625636267593, "grad_norm": 0.19205699861049652, "learning_rate": 4.13447888601368e-06, "loss": 1.3906, "step": 1456 }, { "epoch": 0.5645500379149645, "grad_norm": 0.18698300421237946, "learning_rate": 4.12835976629032e-06, "loss": 1.3975, "step": 1457 }, { "epoch": 0.5649375122031697, "grad_norm": 0.1704002171754837, "learning_rate": 4.122241992683992e-06, "loss": 1.4158, "step": 1458 }, { "epoch": 0.5653249864913749, "grad_norm": 0.19322945177555084, "learning_rate": 4.116125574642671e-06, "loss": 1.4122, "step": 1459 }, { "epoch": 0.5657124607795802, "grad_norm": 0.1720888465642929, "learning_rate": 4.11001052161225e-06, "loss": 1.3926, "step": 1460 }, { "epoch": 0.5660999350677853, "grad_norm": 0.17180366814136505, "learning_rate": 4.1038968430365e-06, "loss": 1.4567, "step": 1461 }, { "epoch": 0.5664874093559905, "grad_norm": 0.20670254528522491, "learning_rate": 4.097784548357083e-06, "loss": 1.4442, "step": 1462 }, { "epoch": 0.5668748836441957, "grad_norm": 0.1928243339061737, "learning_rate": 4.091673647013512e-06, "loss": 1.4163, "step": 1463 }, { "epoch": 0.5672623579324009, "grad_norm": 0.17436975240707397, "learning_rate": 4.085564148443158e-06, "loss": 1.4069, "step": 1464 }, { "epoch": 0.567649832220606, "grad_norm": 0.16226370632648468, "learning_rate": 4.079456062081217e-06, "loss": 1.4032, "step": 1465 }, { "epoch": 0.5680373065088112, "grad_norm": 0.19417916238307953, "learning_rate": 4.0733493973607115e-06, "loss": 1.3593, "step": 1466 }, { "epoch": 0.5684247807970164, "grad_norm": 0.18427981436252594, "learning_rate": 4.067244163712463e-06, "loss": 1.4306, "step": 1467 }, { "epoch": 0.5688122550852216, "grad_norm": 0.1832556128501892, "learning_rate": 4.061140370565088e-06, "loss": 1.4273, "step": 1468 }, { "epoch": 0.5691997293734268, "grad_norm": 0.17957410216331482, "learning_rate": 4.055038027344971e-06, "loss": 1.4215, "step": 1469 }, { "epoch": 0.5695872036616321, "grad_norm": 0.17172379791736603, "learning_rate": 4.048937143476267e-06, "loss": 1.4126, "step": 1470 }, { "epoch": 0.5699746779498372, "grad_norm": 0.17163854837417603, "learning_rate": 4.042837728380868e-06, "loss": 1.3903, "step": 1471 }, { "epoch": 0.5703621522380424, "grad_norm": 0.18666456639766693, "learning_rate": 4.036739791478403e-06, "loss": 1.4134, "step": 1472 }, { "epoch": 0.5707496265262476, "grad_norm": 0.16964206099510193, "learning_rate": 4.0306433421862175e-06, "loss": 1.3662, "step": 1473 }, { "epoch": 0.5711371008144528, "grad_norm": 0.1998378485441208, "learning_rate": 4.02454838991936e-06, "loss": 1.3978, "step": 1474 }, { "epoch": 0.571524575102658, "grad_norm": 0.19043032824993134, "learning_rate": 4.018454944090564e-06, "loss": 1.3727, "step": 1475 }, { "epoch": 0.5719120493908632, "grad_norm": 0.18171018362045288, "learning_rate": 4.012363014110237e-06, "loss": 1.4006, "step": 1476 }, { "epoch": 0.5722995236790683, "grad_norm": 0.17465321719646454, "learning_rate": 4.006272609386452e-06, "loss": 1.4362, "step": 1477 }, { "epoch": 0.5726869979672735, "grad_norm": 0.16947154700756073, "learning_rate": 4.0001837393249174e-06, "loss": 1.3992, "step": 1478 }, { "epoch": 0.5730744722554787, "grad_norm": 0.17664450407028198, "learning_rate": 3.994096413328979e-06, "loss": 1.4233, "step": 1479 }, { "epoch": 0.573461946543684, "grad_norm": 0.1969662606716156, "learning_rate": 3.988010640799591e-06, "loss": 1.3563, "step": 1480 }, { "epoch": 0.5738494208318892, "grad_norm": 0.1823781132698059, "learning_rate": 3.981926431135317e-06, "loss": 1.4115, "step": 1481 }, { "epoch": 0.5742368951200943, "grad_norm": 0.1838909089565277, "learning_rate": 3.975843793732297e-06, "loss": 1.4035, "step": 1482 }, { "epoch": 0.5746243694082995, "grad_norm": 0.18593402206897736, "learning_rate": 3.969762737984253e-06, "loss": 1.3964, "step": 1483 }, { "epoch": 0.5750118436965047, "grad_norm": 0.17695170640945435, "learning_rate": 3.9636832732824555e-06, "loss": 1.3696, "step": 1484 }, { "epoch": 0.5753993179847099, "grad_norm": 0.18205270171165466, "learning_rate": 3.957605409015726e-06, "loss": 1.4013, "step": 1485 }, { "epoch": 0.5757867922729151, "grad_norm": 0.16509775817394257, "learning_rate": 3.9515291545704046e-06, "loss": 1.4022, "step": 1486 }, { "epoch": 0.5761742665611203, "grad_norm": 0.17473699152469635, "learning_rate": 3.945454519330357e-06, "loss": 1.3974, "step": 1487 }, { "epoch": 0.5765617408493254, "grad_norm": 0.19026006758213043, "learning_rate": 3.9393815126769375e-06, "loss": 1.3542, "step": 1488 }, { "epoch": 0.5769492151375306, "grad_norm": 0.19034555554389954, "learning_rate": 3.933310143988993e-06, "loss": 1.3457, "step": 1489 }, { "epoch": 0.5773366894257359, "grad_norm": 0.1662503033876419, "learning_rate": 3.927240422642836e-06, "loss": 1.4153, "step": 1490 }, { "epoch": 0.5777241637139411, "grad_norm": 0.19178231060504913, "learning_rate": 3.921172358012239e-06, "loss": 1.437, "step": 1491 }, { "epoch": 0.5781116380021463, "grad_norm": 0.17944012582302094, "learning_rate": 3.91510595946841e-06, "loss": 1.374, "step": 1492 }, { "epoch": 0.5784991122903514, "grad_norm": 0.17100298404693604, "learning_rate": 3.909041236379992e-06, "loss": 1.3905, "step": 1493 }, { "epoch": 0.5788865865785566, "grad_norm": 0.1894504874944687, "learning_rate": 3.902978198113032e-06, "loss": 1.4212, "step": 1494 }, { "epoch": 0.5792740608667618, "grad_norm": 0.17078186571598053, "learning_rate": 3.896916854030983e-06, "loss": 1.3805, "step": 1495 }, { "epoch": 0.579661535154967, "grad_norm": 0.19247306883335114, "learning_rate": 3.890857213494673e-06, "loss": 1.4069, "step": 1496 }, { "epoch": 0.5800490094431722, "grad_norm": 0.19485192000865936, "learning_rate": 3.88479928586231e-06, "loss": 1.3789, "step": 1497 }, { "epoch": 0.5804364837313774, "grad_norm": 0.18943673372268677, "learning_rate": 3.8787430804894455e-06, "loss": 1.4304, "step": 1498 }, { "epoch": 0.5808239580195826, "grad_norm": 0.1740529090166092, "learning_rate": 3.87268860672898e-06, "loss": 1.4042, "step": 1499 }, { "epoch": 0.5812114323077878, "grad_norm": 0.1814330369234085, "learning_rate": 3.866635873931133e-06, "loss": 1.4795, "step": 1500 }, { "epoch": 0.581598906595993, "grad_norm": 0.1918506920337677, "learning_rate": 3.860584891443441e-06, "loss": 1.4159, "step": 1501 }, { "epoch": 0.5819863808841982, "grad_norm": 0.16079893708229065, "learning_rate": 3.854535668610734e-06, "loss": 1.4153, "step": 1502 }, { "epoch": 0.5823738551724034, "grad_norm": 0.19250813126564026, "learning_rate": 3.848488214775124e-06, "loss": 1.4346, "step": 1503 }, { "epoch": 0.5827613294606085, "grad_norm": 0.161249577999115, "learning_rate": 3.842442539275996e-06, "loss": 1.3889, "step": 1504 }, { "epoch": 0.5831488037488137, "grad_norm": 0.1872885376214981, "learning_rate": 3.83639865144998e-06, "loss": 1.404, "step": 1505 }, { "epoch": 0.5835362780370189, "grad_norm": 0.1852969080209732, "learning_rate": 3.8303565606309554e-06, "loss": 1.4285, "step": 1506 }, { "epoch": 0.5839237523252241, "grad_norm": 0.18908612430095673, "learning_rate": 3.8243162761500164e-06, "loss": 1.3736, "step": 1507 }, { "epoch": 0.5843112266134293, "grad_norm": 0.17448759078979492, "learning_rate": 3.818277807335477e-06, "loss": 1.4321, "step": 1508 }, { "epoch": 0.5846987009016346, "grad_norm": 0.18088315427303314, "learning_rate": 3.8122411635128374e-06, "loss": 1.3522, "step": 1509 }, { "epoch": 0.5850861751898397, "grad_norm": 0.17164316773414612, "learning_rate": 3.8062063540047913e-06, "loss": 1.4133, "step": 1510 }, { "epoch": 0.5854736494780449, "grad_norm": 0.18077291548252106, "learning_rate": 3.800173388131186e-06, "loss": 1.4306, "step": 1511 }, { "epoch": 0.5858611237662501, "grad_norm": 0.1729746162891388, "learning_rate": 3.794142275209034e-06, "loss": 1.4094, "step": 1512 }, { "epoch": 0.5862485980544553, "grad_norm": 0.18132896721363068, "learning_rate": 3.7881130245524756e-06, "loss": 1.3663, "step": 1513 }, { "epoch": 0.5866360723426605, "grad_norm": 0.21102333068847656, "learning_rate": 3.7820856454727855e-06, "loss": 1.428, "step": 1514 }, { "epoch": 0.5870235466308656, "grad_norm": 0.188574880361557, "learning_rate": 3.7760601472783377e-06, "loss": 1.4101, "step": 1515 }, { "epoch": 0.5874110209190708, "grad_norm": 0.17160926759243011, "learning_rate": 3.7700365392746106e-06, "loss": 1.4295, "step": 1516 }, { "epoch": 0.587798495207276, "grad_norm": 0.21089845895767212, "learning_rate": 3.7640148307641555e-06, "loss": 1.3907, "step": 1517 }, { "epoch": 0.5881859694954812, "grad_norm": 0.17737920582294464, "learning_rate": 3.757995031046598e-06, "loss": 1.4169, "step": 1518 }, { "epoch": 0.5885734437836865, "grad_norm": 0.17137059569358826, "learning_rate": 3.7519771494186073e-06, "loss": 1.3726, "step": 1519 }, { "epoch": 0.5889609180718917, "grad_norm": 0.17878267168998718, "learning_rate": 3.7459611951739007e-06, "loss": 1.3631, "step": 1520 }, { "epoch": 0.5893483923600968, "grad_norm": 0.17521874606609344, "learning_rate": 3.7399471776032072e-06, "loss": 1.4183, "step": 1521 }, { "epoch": 0.589735866648302, "grad_norm": 0.1703302562236786, "learning_rate": 3.7339351059942754e-06, "loss": 1.4045, "step": 1522 }, { "epoch": 0.5901233409365072, "grad_norm": 0.19698522984981537, "learning_rate": 3.7279249896318415e-06, "loss": 1.398, "step": 1523 }, { "epoch": 0.5905108152247124, "grad_norm": 0.19199053943157196, "learning_rate": 3.721916837797627e-06, "loss": 1.3949, "step": 1524 }, { "epoch": 0.5908982895129176, "grad_norm": 0.17835143208503723, "learning_rate": 3.7159106597703147e-06, "loss": 1.3565, "step": 1525 }, { "epoch": 0.5912857638011227, "grad_norm": 0.2098294496536255, "learning_rate": 3.7099064648255454e-06, "loss": 1.3908, "step": 1526 }, { "epoch": 0.5916732380893279, "grad_norm": 0.1715565323829651, "learning_rate": 3.7039042622358913e-06, "loss": 1.4443, "step": 1527 }, { "epoch": 0.5920607123775331, "grad_norm": 0.18078064918518066, "learning_rate": 3.6979040612708485e-06, "loss": 1.3451, "step": 1528 }, { "epoch": 0.5924481866657384, "grad_norm": 0.19142375886440277, "learning_rate": 3.691905871196827e-06, "loss": 1.4079, "step": 1529 }, { "epoch": 0.5928356609539436, "grad_norm": 0.1939711719751358, "learning_rate": 3.685909701277124e-06, "loss": 1.459, "step": 1530 }, { "epoch": 0.5932231352421488, "grad_norm": 0.18301540613174438, "learning_rate": 3.679915560771925e-06, "loss": 1.3831, "step": 1531 }, { "epoch": 0.5936106095303539, "grad_norm": 0.20196379721164703, "learning_rate": 3.6739234589382722e-06, "loss": 1.3697, "step": 1532 }, { "epoch": 0.5939980838185591, "grad_norm": 0.18526053428649902, "learning_rate": 3.6679334050300697e-06, "loss": 1.3629, "step": 1533 }, { "epoch": 0.5943855581067643, "grad_norm": 0.18368200957775116, "learning_rate": 3.661945408298047e-06, "loss": 1.4051, "step": 1534 }, { "epoch": 0.5947730323949695, "grad_norm": 0.1735275685787201, "learning_rate": 3.655959477989769e-06, "loss": 1.4067, "step": 1535 }, { "epoch": 0.5951605066831747, "grad_norm": 0.1983805149793625, "learning_rate": 3.649975623349599e-06, "loss": 1.3483, "step": 1536 }, { "epoch": 0.5955479809713798, "grad_norm": 0.2016724795103073, "learning_rate": 3.6439938536187015e-06, "loss": 1.3784, "step": 1537 }, { "epoch": 0.595935455259585, "grad_norm": 0.19161836802959442, "learning_rate": 3.6380141780350165e-06, "loss": 1.4042, "step": 1538 }, { "epoch": 0.5963229295477903, "grad_norm": 0.16175870597362518, "learning_rate": 3.632036605833255e-06, "loss": 1.4641, "step": 1539 }, { "epoch": 0.5967104038359955, "grad_norm": 0.17678959667682648, "learning_rate": 3.6260611462448736e-06, "loss": 1.3795, "step": 1540 }, { "epoch": 0.5970978781242007, "grad_norm": 0.1786733865737915, "learning_rate": 3.6200878084980733e-06, "loss": 1.4243, "step": 1541 }, { "epoch": 0.5974853524124059, "grad_norm": 0.1763695478439331, "learning_rate": 3.61411660181777e-06, "loss": 1.4356, "step": 1542 }, { "epoch": 0.597872826700611, "grad_norm": 0.19328220188617706, "learning_rate": 3.6081475354255968e-06, "loss": 1.4107, "step": 1543 }, { "epoch": 0.5982603009888162, "grad_norm": 0.1797202080488205, "learning_rate": 3.602180618539873e-06, "loss": 1.397, "step": 1544 }, { "epoch": 0.5986477752770214, "grad_norm": 0.18035988509655, "learning_rate": 3.596215860375609e-06, "loss": 1.418, "step": 1545 }, { "epoch": 0.5990352495652266, "grad_norm": 0.18358130753040314, "learning_rate": 3.5902532701444694e-06, "loss": 1.4609, "step": 1546 }, { "epoch": 0.5994227238534318, "grad_norm": 0.18819552659988403, "learning_rate": 3.584292857054781e-06, "loss": 1.4467, "step": 1547 }, { "epoch": 0.5998101981416369, "grad_norm": 0.1919112205505371, "learning_rate": 3.5783346303114986e-06, "loss": 1.3818, "step": 1548 }, { "epoch": 0.6001976724298422, "grad_norm": 0.18641002476215363, "learning_rate": 3.572378599116212e-06, "loss": 1.4023, "step": 1549 }, { "epoch": 0.6005851467180474, "grad_norm": 0.17233923077583313, "learning_rate": 3.566424772667109e-06, "loss": 1.3851, "step": 1550 }, { "epoch": 0.6009726210062526, "grad_norm": 0.20364555716514587, "learning_rate": 3.560473160158982e-06, "loss": 1.4058, "step": 1551 }, { "epoch": 0.6013600952944578, "grad_norm": 0.17164023220539093, "learning_rate": 3.554523770783196e-06, "loss": 1.3858, "step": 1552 }, { "epoch": 0.601747569582663, "grad_norm": 0.18738055229187012, "learning_rate": 3.5485766137276894e-06, "loss": 1.4231, "step": 1553 }, { "epoch": 0.6021350438708681, "grad_norm": 0.1837063729763031, "learning_rate": 3.5426316981769483e-06, "loss": 1.3869, "step": 1554 }, { "epoch": 0.6025225181590733, "grad_norm": 0.17001575231552124, "learning_rate": 3.536689033312e-06, "loss": 1.3898, "step": 1555 }, { "epoch": 0.6029099924472785, "grad_norm": 0.19592979550361633, "learning_rate": 3.5307486283103966e-06, "loss": 1.3382, "step": 1556 }, { "epoch": 0.6032974667354837, "grad_norm": 0.18079674243927002, "learning_rate": 3.5248104923461956e-06, "loss": 1.4479, "step": 1557 }, { "epoch": 0.6036849410236889, "grad_norm": 0.17908865213394165, "learning_rate": 3.518874634589957e-06, "loss": 1.3569, "step": 1558 }, { "epoch": 0.6040724153118942, "grad_norm": 0.1746082305908203, "learning_rate": 3.5129410642087154e-06, "loss": 1.3851, "step": 1559 }, { "epoch": 0.6044598896000993, "grad_norm": 0.17718544602394104, "learning_rate": 3.50700979036598e-06, "loss": 1.3702, "step": 1560 }, { "epoch": 0.6048473638883045, "grad_norm": 0.20418326556682587, "learning_rate": 3.5010808222217057e-06, "loss": 1.3923, "step": 1561 }, { "epoch": 0.6052348381765097, "grad_norm": 0.18704842031002045, "learning_rate": 3.495154168932295e-06, "loss": 1.3918, "step": 1562 }, { "epoch": 0.6056223124647149, "grad_norm": 0.18926483392715454, "learning_rate": 3.4892298396505665e-06, "loss": 1.3937, "step": 1563 }, { "epoch": 0.60600978675292, "grad_norm": 0.1748504638671875, "learning_rate": 3.4833078435257584e-06, "loss": 1.4167, "step": 1564 }, { "epoch": 0.6063972610411252, "grad_norm": 0.19541841745376587, "learning_rate": 3.477388189703498e-06, "loss": 1.3582, "step": 1565 }, { "epoch": 0.6067847353293304, "grad_norm": 0.19785359501838684, "learning_rate": 3.471470887325803e-06, "loss": 1.4099, "step": 1566 }, { "epoch": 0.6071722096175356, "grad_norm": 0.18598459661006927, "learning_rate": 3.4655559455310508e-06, "loss": 1.3986, "step": 1567 }, { "epoch": 0.6075596839057408, "grad_norm": 0.19053520262241364, "learning_rate": 3.4596433734539813e-06, "loss": 1.3889, "step": 1568 }, { "epoch": 0.6079471581939461, "grad_norm": 0.18987713754177094, "learning_rate": 3.453733180225669e-06, "loss": 1.379, "step": 1569 }, { "epoch": 0.6083346324821512, "grad_norm": 0.18568649888038635, "learning_rate": 3.4478253749735206e-06, "loss": 1.3743, "step": 1570 }, { "epoch": 0.6087221067703564, "grad_norm": 0.179777592420578, "learning_rate": 3.4419199668212465e-06, "loss": 1.4058, "step": 1571 }, { "epoch": 0.6091095810585616, "grad_norm": 0.18556778132915497, "learning_rate": 3.4360169648888653e-06, "loss": 1.4063, "step": 1572 }, { "epoch": 0.6094970553467668, "grad_norm": 0.17254801094532013, "learning_rate": 3.4301163782926684e-06, "loss": 1.4285, "step": 1573 }, { "epoch": 0.609884529634972, "grad_norm": 0.1743870973587036, "learning_rate": 3.4242182161452282e-06, "loss": 1.3728, "step": 1574 }, { "epoch": 0.6102720039231772, "grad_norm": 0.1759854257106781, "learning_rate": 3.4183224875553634e-06, "loss": 1.3744, "step": 1575 }, { "epoch": 0.6106594782113823, "grad_norm": 0.19748550653457642, "learning_rate": 3.412429201628142e-06, "loss": 1.366, "step": 1576 }, { "epoch": 0.6110469524995875, "grad_norm": 0.1975429654121399, "learning_rate": 3.4065383674648524e-06, "loss": 1.4587, "step": 1577 }, { "epoch": 0.6114344267877927, "grad_norm": 0.19601546227931976, "learning_rate": 3.4006499941630055e-06, "loss": 1.3752, "step": 1578 }, { "epoch": 0.611821901075998, "grad_norm": 0.17840638756752014, "learning_rate": 3.3947640908163022e-06, "loss": 1.3856, "step": 1579 }, { "epoch": 0.6122093753642032, "grad_norm": 0.1882389932870865, "learning_rate": 3.388880666514637e-06, "loss": 1.365, "step": 1580 }, { "epoch": 0.6125968496524083, "grad_norm": 0.18404093384742737, "learning_rate": 3.382999730344072e-06, "loss": 1.3756, "step": 1581 }, { "epoch": 0.6129843239406135, "grad_norm": 0.18196435272693634, "learning_rate": 3.377121291386823e-06, "loss": 1.3711, "step": 1582 }, { "epoch": 0.6133717982288187, "grad_norm": 0.18113671243190765, "learning_rate": 3.371245358721259e-06, "loss": 1.3527, "step": 1583 }, { "epoch": 0.6137592725170239, "grad_norm": 0.18147501349449158, "learning_rate": 3.365371941421868e-06, "loss": 1.4186, "step": 1584 }, { "epoch": 0.6141467468052291, "grad_norm": 0.17941223084926605, "learning_rate": 3.359501048559262e-06, "loss": 1.3669, "step": 1585 }, { "epoch": 0.6145342210934343, "grad_norm": 0.17170292139053345, "learning_rate": 3.3536326892001482e-06, "loss": 1.4042, "step": 1586 }, { "epoch": 0.6149216953816394, "grad_norm": 0.18644371628761292, "learning_rate": 3.3477668724073255e-06, "loss": 1.4067, "step": 1587 }, { "epoch": 0.6153091696698446, "grad_norm": 0.1891889125108719, "learning_rate": 3.3419036072396614e-06, "loss": 1.3464, "step": 1588 }, { "epoch": 0.6156966439580499, "grad_norm": 0.16817675530910492, "learning_rate": 3.3360429027520898e-06, "loss": 1.3967, "step": 1589 }, { "epoch": 0.6160841182462551, "grad_norm": 0.17223194241523743, "learning_rate": 3.330184767995581e-06, "loss": 1.4131, "step": 1590 }, { "epoch": 0.6164715925344603, "grad_norm": 0.1718931496143341, "learning_rate": 3.3243292120171467e-06, "loss": 1.3821, "step": 1591 }, { "epoch": 0.6168590668226654, "grad_norm": 0.17999643087387085, "learning_rate": 3.318476243859806e-06, "loss": 1.4119, "step": 1592 }, { "epoch": 0.6172465411108706, "grad_norm": 0.18057338893413544, "learning_rate": 3.3126258725625913e-06, "loss": 1.4196, "step": 1593 }, { "epoch": 0.6176340153990758, "grad_norm": 0.176690012216568, "learning_rate": 3.3067781071605154e-06, "loss": 1.3514, "step": 1594 }, { "epoch": 0.618021489687281, "grad_norm": 0.19055034220218658, "learning_rate": 3.3009329566845747e-06, "loss": 1.3616, "step": 1595 }, { "epoch": 0.6184089639754862, "grad_norm": 0.19646205008029938, "learning_rate": 3.29509043016172e-06, "loss": 1.4149, "step": 1596 }, { "epoch": 0.6187964382636914, "grad_norm": 0.18597544729709625, "learning_rate": 3.289250536614857e-06, "loss": 1.4268, "step": 1597 }, { "epoch": 0.6191839125518965, "grad_norm": 0.18442484736442566, "learning_rate": 3.2834132850628177e-06, "loss": 1.4406, "step": 1598 }, { "epoch": 0.6195713868401018, "grad_norm": 0.17104828357696533, "learning_rate": 3.2775786845203615e-06, "loss": 1.3888, "step": 1599 }, { "epoch": 0.619958861128307, "grad_norm": 0.17790862917900085, "learning_rate": 3.2717467439981464e-06, "loss": 1.3195, "step": 1600 }, { "epoch": 0.6203463354165122, "grad_norm": 0.1798555850982666, "learning_rate": 3.26591747250273e-06, "loss": 1.3559, "step": 1601 }, { "epoch": 0.6207338097047174, "grad_norm": 0.200727179646492, "learning_rate": 3.2600908790365393e-06, "loss": 1.4157, "step": 1602 }, { "epoch": 0.6211212839929225, "grad_norm": 0.19643937051296234, "learning_rate": 3.2542669725978748e-06, "loss": 1.3532, "step": 1603 }, { "epoch": 0.6215087582811277, "grad_norm": 0.1758096069097519, "learning_rate": 3.2484457621808787e-06, "loss": 1.4059, "step": 1604 }, { "epoch": 0.6218962325693329, "grad_norm": 0.18621772527694702, "learning_rate": 3.242627256775537e-06, "loss": 1.3716, "step": 1605 }, { "epoch": 0.6222837068575381, "grad_norm": 0.19962482154369354, "learning_rate": 3.2368114653676517e-06, "loss": 1.4327, "step": 1606 }, { "epoch": 0.6226711811457433, "grad_norm": 0.1789565533399582, "learning_rate": 3.230998396938837e-06, "loss": 1.388, "step": 1607 }, { "epoch": 0.6230586554339484, "grad_norm": 0.17618854343891144, "learning_rate": 3.225188060466504e-06, "loss": 1.3431, "step": 1608 }, { "epoch": 0.6234461297221537, "grad_norm": 0.16482652723789215, "learning_rate": 3.2193804649238393e-06, "loss": 1.3731, "step": 1609 }, { "epoch": 0.6238336040103589, "grad_norm": 0.1955755054950714, "learning_rate": 3.213575619279801e-06, "loss": 1.3709, "step": 1610 }, { "epoch": 0.6242210782985641, "grad_norm": 0.20295721292495728, "learning_rate": 3.2077735324990965e-06, "loss": 1.3626, "step": 1611 }, { "epoch": 0.6246085525867693, "grad_norm": 0.18716463446617126, "learning_rate": 3.201974213542178e-06, "loss": 1.3987, "step": 1612 }, { "epoch": 0.6249960268749745, "grad_norm": 0.18286408483982086, "learning_rate": 3.196177671365216e-06, "loss": 1.3569, "step": 1613 }, { "epoch": 0.6253835011631796, "grad_norm": 0.20329616963863373, "learning_rate": 3.190383914920101e-06, "loss": 1.408, "step": 1614 }, { "epoch": 0.6257709754513848, "grad_norm": 0.16826100647449493, "learning_rate": 3.184592953154413e-06, "loss": 1.3689, "step": 1615 }, { "epoch": 0.62615844973959, "grad_norm": 0.17178748548030853, "learning_rate": 3.1788047950114244e-06, "loss": 1.3893, "step": 1616 }, { "epoch": 0.6265459240277952, "grad_norm": 0.18963943421840668, "learning_rate": 3.173019449430068e-06, "loss": 1.4024, "step": 1617 }, { "epoch": 0.6269333983160004, "grad_norm": 0.16885246336460114, "learning_rate": 3.1672369253449444e-06, "loss": 1.389, "step": 1618 }, { "epoch": 0.6273208726042057, "grad_norm": 0.1849628984928131, "learning_rate": 3.1614572316862853e-06, "loss": 1.4043, "step": 1619 }, { "epoch": 0.6277083468924108, "grad_norm": 0.18568649888038635, "learning_rate": 3.1556803773799616e-06, "loss": 1.3504, "step": 1620 }, { "epoch": 0.628095821180616, "grad_norm": 0.16634215414524078, "learning_rate": 3.149906371347451e-06, "loss": 1.3405, "step": 1621 }, { "epoch": 0.6284832954688212, "grad_norm": 0.17673878371715546, "learning_rate": 3.1441352225058385e-06, "loss": 1.4394, "step": 1622 }, { "epoch": 0.6288707697570264, "grad_norm": 0.16759416460990906, "learning_rate": 3.1383669397677917e-06, "loss": 1.3899, "step": 1623 }, { "epoch": 0.6292582440452316, "grad_norm": 0.1947753131389618, "learning_rate": 3.1326015320415583e-06, "loss": 1.352, "step": 1624 }, { "epoch": 0.6296457183334367, "grad_norm": 0.18655642867088318, "learning_rate": 3.1268390082309362e-06, "loss": 1.3488, "step": 1625 }, { "epoch": 0.6300331926216419, "grad_norm": 0.20496539771556854, "learning_rate": 3.121079377235281e-06, "loss": 1.3748, "step": 1626 }, { "epoch": 0.6304206669098471, "grad_norm": 0.1922077238559723, "learning_rate": 3.1153226479494703e-06, "loss": 1.3849, "step": 1627 }, { "epoch": 0.6308081411980523, "grad_norm": 0.18349799513816833, "learning_rate": 3.1095688292639094e-06, "loss": 1.3603, "step": 1628 }, { "epoch": 0.6311956154862576, "grad_norm": 0.18320289254188538, "learning_rate": 3.103817930064501e-06, "loss": 1.4284, "step": 1629 }, { "epoch": 0.6315830897744628, "grad_norm": 0.18834088742733002, "learning_rate": 3.0980699592326457e-06, "loss": 1.3268, "step": 1630 }, { "epoch": 0.6319705640626679, "grad_norm": 0.16582991182804108, "learning_rate": 3.0923249256452148e-06, "loss": 1.417, "step": 1631 }, { "epoch": 0.6323580383508731, "grad_norm": 0.19342875480651855, "learning_rate": 3.0865828381745515e-06, "loss": 1.3918, "step": 1632 }, { "epoch": 0.6327455126390783, "grad_norm": 0.18631957471370697, "learning_rate": 3.0808437056884432e-06, "loss": 1.3823, "step": 1633 }, { "epoch": 0.6331329869272835, "grad_norm": 0.18114720284938812, "learning_rate": 3.0751075370501127e-06, "loss": 1.4442, "step": 1634 }, { "epoch": 0.6335204612154887, "grad_norm": 0.17622534930706024, "learning_rate": 3.069374341118212e-06, "loss": 1.4206, "step": 1635 }, { "epoch": 0.6339079355036938, "grad_norm": 0.20148497819900513, "learning_rate": 3.0636441267467955e-06, "loss": 1.3947, "step": 1636 }, { "epoch": 0.634295409791899, "grad_norm": 0.18822365999221802, "learning_rate": 3.0579169027853195e-06, "loss": 1.406, "step": 1637 }, { "epoch": 0.6346828840801042, "grad_norm": 0.18503446877002716, "learning_rate": 3.052192678078615e-06, "loss": 1.3656, "step": 1638 }, { "epoch": 0.6350703583683095, "grad_norm": 0.18369054794311523, "learning_rate": 3.0464714614668876e-06, "loss": 1.3722, "step": 1639 }, { "epoch": 0.6354578326565147, "grad_norm": 0.22537167370319366, "learning_rate": 3.040753261785692e-06, "loss": 1.3776, "step": 1640 }, { "epoch": 0.6358453069447199, "grad_norm": 0.17909935116767883, "learning_rate": 3.0350380878659298e-06, "loss": 1.4219, "step": 1641 }, { "epoch": 0.636232781232925, "grad_norm": 0.18593275547027588, "learning_rate": 3.029325948533821e-06, "loss": 1.3738, "step": 1642 }, { "epoch": 0.6366202555211302, "grad_norm": 0.1795920431613922, "learning_rate": 3.02361685261091e-06, "loss": 1.454, "step": 1643 }, { "epoch": 0.6370077298093354, "grad_norm": 0.18419206142425537, "learning_rate": 3.01791080891403e-06, "loss": 1.422, "step": 1644 }, { "epoch": 0.6373952040975406, "grad_norm": 0.1981612592935562, "learning_rate": 3.0122078262553113e-06, "loss": 1.4083, "step": 1645 }, { "epoch": 0.6377826783857458, "grad_norm": 0.19791795313358307, "learning_rate": 3.0065079134421454e-06, "loss": 1.4203, "step": 1646 }, { "epoch": 0.6381701526739509, "grad_norm": 0.1776246577501297, "learning_rate": 3.0008110792771943e-06, "loss": 1.4112, "step": 1647 }, { "epoch": 0.6385576269621561, "grad_norm": 0.16729874908924103, "learning_rate": 2.9951173325583566e-06, "loss": 1.3963, "step": 1648 }, { "epoch": 0.6389451012503614, "grad_norm": 0.19643080234527588, "learning_rate": 2.989426682078769e-06, "loss": 1.3634, "step": 1649 }, { "epoch": 0.6393325755385666, "grad_norm": 0.17632822692394257, "learning_rate": 2.983739136626781e-06, "loss": 1.3928, "step": 1650 }, { "epoch": 0.6397200498267718, "grad_norm": 0.1883070319890976, "learning_rate": 2.978054704985954e-06, "loss": 1.3807, "step": 1651 }, { "epoch": 0.640107524114977, "grad_norm": 0.20489656925201416, "learning_rate": 2.972373395935031e-06, "loss": 1.4695, "step": 1652 }, { "epoch": 0.6404949984031821, "grad_norm": 0.1938476711511612, "learning_rate": 2.9666952182479423e-06, "loss": 1.3648, "step": 1653 }, { "epoch": 0.6408824726913873, "grad_norm": 0.18345028162002563, "learning_rate": 2.961020180693775e-06, "loss": 1.4217, "step": 1654 }, { "epoch": 0.6412699469795925, "grad_norm": 0.2075314223766327, "learning_rate": 2.955348292036773e-06, "loss": 1.4118, "step": 1655 }, { "epoch": 0.6416574212677977, "grad_norm": 0.19160676002502441, "learning_rate": 2.9496795610363087e-06, "loss": 1.3723, "step": 1656 }, { "epoch": 0.6420448955560029, "grad_norm": 0.1897740662097931, "learning_rate": 2.9440139964468896e-06, "loss": 1.3796, "step": 1657 }, { "epoch": 0.642432369844208, "grad_norm": 0.2113800346851349, "learning_rate": 2.9383516070181206e-06, "loss": 1.4596, "step": 1658 }, { "epoch": 0.6428198441324133, "grad_norm": 0.1884756237268448, "learning_rate": 2.9326924014947127e-06, "loss": 1.3496, "step": 1659 }, { "epoch": 0.6432073184206185, "grad_norm": 0.17893587052822113, "learning_rate": 2.927036388616457e-06, "loss": 1.3379, "step": 1660 }, { "epoch": 0.6435947927088237, "grad_norm": 0.1898370236158371, "learning_rate": 2.92138357711821e-06, "loss": 1.3999, "step": 1661 }, { "epoch": 0.6439822669970289, "grad_norm": 0.1796153038740158, "learning_rate": 2.915733975729892e-06, "loss": 1.4001, "step": 1662 }, { "epoch": 0.644369741285234, "grad_norm": 0.1835816204547882, "learning_rate": 2.910087593176455e-06, "loss": 1.3992, "step": 1663 }, { "epoch": 0.6447572155734392, "grad_norm": 0.20048892498016357, "learning_rate": 2.9044444381778923e-06, "loss": 1.3766, "step": 1664 }, { "epoch": 0.6451446898616444, "grad_norm": 0.17982755601406097, "learning_rate": 2.8988045194492024e-06, "loss": 1.4126, "step": 1665 }, { "epoch": 0.6455321641498496, "grad_norm": 0.18963520228862762, "learning_rate": 2.8931678457003944e-06, "loss": 1.3844, "step": 1666 }, { "epoch": 0.6459196384380548, "grad_norm": 0.19607919454574585, "learning_rate": 2.8875344256364556e-06, "loss": 1.3728, "step": 1667 }, { "epoch": 0.64630711272626, "grad_norm": 0.1786578744649887, "learning_rate": 2.8819042679573618e-06, "loss": 1.3909, "step": 1668 }, { "epoch": 0.6466945870144652, "grad_norm": 0.185596764087677, "learning_rate": 2.876277381358037e-06, "loss": 1.3814, "step": 1669 }, { "epoch": 0.6470820613026704, "grad_norm": 0.19560420513153076, "learning_rate": 2.870653774528365e-06, "loss": 1.3713, "step": 1670 }, { "epoch": 0.6474695355908756, "grad_norm": 0.17011965811252594, "learning_rate": 2.8650334561531545e-06, "loss": 1.4146, "step": 1671 }, { "epoch": 0.6478570098790808, "grad_norm": 0.1841110736131668, "learning_rate": 2.8594164349121454e-06, "loss": 1.4194, "step": 1672 }, { "epoch": 0.648244484167286, "grad_norm": 0.1807611733675003, "learning_rate": 2.853802719479977e-06, "loss": 1.3921, "step": 1673 }, { "epoch": 0.6486319584554912, "grad_norm": 0.19285838305950165, "learning_rate": 2.8481923185261904e-06, "loss": 1.375, "step": 1674 }, { "epoch": 0.6490194327436963, "grad_norm": 0.1951180249452591, "learning_rate": 2.842585240715201e-06, "loss": 1.4319, "step": 1675 }, { "epoch": 0.6494069070319015, "grad_norm": 0.18675553798675537, "learning_rate": 2.8369814947062994e-06, "loss": 1.3997, "step": 1676 }, { "epoch": 0.6497943813201067, "grad_norm": 0.1905914545059204, "learning_rate": 2.8313810891536243e-06, "loss": 1.41, "step": 1677 }, { "epoch": 0.6501818556083119, "grad_norm": 0.18613454699516296, "learning_rate": 2.82578403270616e-06, "loss": 1.4136, "step": 1678 }, { "epoch": 0.6505693298965172, "grad_norm": 0.18236248195171356, "learning_rate": 2.820190334007715e-06, "loss": 1.3729, "step": 1679 }, { "epoch": 0.6509568041847223, "grad_norm": 0.1780315637588501, "learning_rate": 2.814600001696919e-06, "loss": 1.3834, "step": 1680 }, { "epoch": 0.6513442784729275, "grad_norm": 0.18147656321525574, "learning_rate": 2.809013044407191e-06, "loss": 1.4036, "step": 1681 }, { "epoch": 0.6517317527611327, "grad_norm": 0.1748635619878769, "learning_rate": 2.803429470766752e-06, "loss": 1.4292, "step": 1682 }, { "epoch": 0.6521192270493379, "grad_norm": 0.1874506175518036, "learning_rate": 2.797849289398584e-06, "loss": 1.3837, "step": 1683 }, { "epoch": 0.6525067013375431, "grad_norm": 0.1763669103384018, "learning_rate": 2.792272508920443e-06, "loss": 1.391, "step": 1684 }, { "epoch": 0.6528941756257483, "grad_norm": 0.17939282953739166, "learning_rate": 2.7866991379448196e-06, "loss": 1.3841, "step": 1685 }, { "epoch": 0.6532816499139534, "grad_norm": 0.183648943901062, "learning_rate": 2.7811291850789492e-06, "loss": 1.3826, "step": 1686 }, { "epoch": 0.6536691242021586, "grad_norm": 0.18090073764324188, "learning_rate": 2.7755626589247864e-06, "loss": 1.3986, "step": 1687 }, { "epoch": 0.6540565984903638, "grad_norm": 0.1746295839548111, "learning_rate": 2.7699995680789887e-06, "loss": 1.376, "step": 1688 }, { "epoch": 0.6544440727785691, "grad_norm": 0.17358845472335815, "learning_rate": 2.764439921132915e-06, "loss": 1.478, "step": 1689 }, { "epoch": 0.6548315470667743, "grad_norm": 0.185023695230484, "learning_rate": 2.7588837266725996e-06, "loss": 1.3618, "step": 1690 }, { "epoch": 0.6552190213549794, "grad_norm": 0.194476917386055, "learning_rate": 2.7533309932787522e-06, "loss": 1.3668, "step": 1691 }, { "epoch": 0.6556064956431846, "grad_norm": 0.1810838133096695, "learning_rate": 2.7477817295267273e-06, "loss": 1.4097, "step": 1692 }, { "epoch": 0.6559939699313898, "grad_norm": 0.17249874770641327, "learning_rate": 2.742235943986531e-06, "loss": 1.4044, "step": 1693 }, { "epoch": 0.656381444219595, "grad_norm": 0.1784128099679947, "learning_rate": 2.736693645222791e-06, "loss": 1.4503, "step": 1694 }, { "epoch": 0.6567689185078002, "grad_norm": 0.2088146060705185, "learning_rate": 2.731154841794755e-06, "loss": 1.4005, "step": 1695 }, { "epoch": 0.6571563927960054, "grad_norm": 0.17535199224948883, "learning_rate": 2.7256195422562687e-06, "loss": 1.3487, "step": 1696 }, { "epoch": 0.6575438670842105, "grad_norm": 0.18511943519115448, "learning_rate": 2.7200877551557704e-06, "loss": 1.4096, "step": 1697 }, { "epoch": 0.6579313413724157, "grad_norm": 0.17721228301525116, "learning_rate": 2.7145594890362692e-06, "loss": 1.3918, "step": 1698 }, { "epoch": 0.658318815660621, "grad_norm": 0.17119456827640533, "learning_rate": 2.709034752435342e-06, "loss": 1.3963, "step": 1699 }, { "epoch": 0.6587062899488262, "grad_norm": 0.18290339410305023, "learning_rate": 2.70351355388511e-06, "loss": 1.4552, "step": 1700 }, { "epoch": 0.6590937642370314, "grad_norm": 0.20181624591350555, "learning_rate": 2.6979959019122347e-06, "loss": 1.3935, "step": 1701 }, { "epoch": 0.6594812385252365, "grad_norm": 0.1785903424024582, "learning_rate": 2.6924818050378954e-06, "loss": 1.3507, "step": 1702 }, { "epoch": 0.6598687128134417, "grad_norm": 0.1859111487865448, "learning_rate": 2.686971271777788e-06, "loss": 1.4111, "step": 1703 }, { "epoch": 0.6602561871016469, "grad_norm": 0.1948252171278, "learning_rate": 2.6814643106420967e-06, "loss": 1.3635, "step": 1704 }, { "epoch": 0.6606436613898521, "grad_norm": 0.18082992732524872, "learning_rate": 2.675960930135497e-06, "loss": 1.3754, "step": 1705 }, { "epoch": 0.6610311356780573, "grad_norm": 0.18512636423110962, "learning_rate": 2.670461138757129e-06, "loss": 1.3335, "step": 1706 }, { "epoch": 0.6614186099662624, "grad_norm": 0.19817864894866943, "learning_rate": 2.664964945000593e-06, "loss": 1.3838, "step": 1707 }, { "epoch": 0.6618060842544676, "grad_norm": 0.19560958445072174, "learning_rate": 2.6594723573539307e-06, "loss": 1.412, "step": 1708 }, { "epoch": 0.6621935585426729, "grad_norm": 0.18229524791240692, "learning_rate": 2.653983384299618e-06, "loss": 1.3476, "step": 1709 }, { "epoch": 0.6625810328308781, "grad_norm": 0.19071362912654877, "learning_rate": 2.648498034314545e-06, "loss": 1.3673, "step": 1710 }, { "epoch": 0.6629685071190833, "grad_norm": 0.18117819726467133, "learning_rate": 2.6430163158700116e-06, "loss": 1.3906, "step": 1711 }, { "epoch": 0.6633559814072885, "grad_norm": 0.1869923621416092, "learning_rate": 2.637538237431704e-06, "loss": 1.3928, "step": 1712 }, { "epoch": 0.6637434556954936, "grad_norm": 0.19701239466667175, "learning_rate": 2.632063807459687e-06, "loss": 1.4018, "step": 1713 }, { "epoch": 0.6641309299836988, "grad_norm": 0.17407658696174622, "learning_rate": 2.6265930344083977e-06, "loss": 1.4061, "step": 1714 }, { "epoch": 0.664518404271904, "grad_norm": 0.18824611604213715, "learning_rate": 2.6211259267266166e-06, "loss": 1.3868, "step": 1715 }, { "epoch": 0.6649058785601092, "grad_norm": 0.1742568463087082, "learning_rate": 2.615662492857471e-06, "loss": 1.3693, "step": 1716 }, { "epoch": 0.6652933528483144, "grad_norm": 0.1804109364748001, "learning_rate": 2.6102027412384077e-06, "loss": 1.3936, "step": 1717 }, { "epoch": 0.6656808271365195, "grad_norm": 0.1740296483039856, "learning_rate": 2.604746680301196e-06, "loss": 1.4807, "step": 1718 }, { "epoch": 0.6660683014247248, "grad_norm": 0.1844797134399414, "learning_rate": 2.599294318471895e-06, "loss": 1.3941, "step": 1719 }, { "epoch": 0.66645577571293, "grad_norm": 0.17477969825267792, "learning_rate": 2.5938456641708598e-06, "loss": 1.4263, "step": 1720 }, { "epoch": 0.6668432500011352, "grad_norm": 0.17553521692752838, "learning_rate": 2.588400725812713e-06, "loss": 1.3803, "step": 1721 }, { "epoch": 0.6672307242893404, "grad_norm": 0.18812841176986694, "learning_rate": 2.5829595118063456e-06, "loss": 1.3933, "step": 1722 }, { "epoch": 0.6676181985775456, "grad_norm": 0.19037482142448425, "learning_rate": 2.5775220305548874e-06, "loss": 1.4227, "step": 1723 }, { "epoch": 0.6680056728657507, "grad_norm": 0.18452249467372894, "learning_rate": 2.5720882904557156e-06, "loss": 1.3935, "step": 1724 }, { "epoch": 0.6683931471539559, "grad_norm": 0.16965578496456146, "learning_rate": 2.5666582999004163e-06, "loss": 1.3702, "step": 1725 }, { "epoch": 0.6687806214421611, "grad_norm": 0.175582155585289, "learning_rate": 2.561232067274798e-06, "loss": 1.4004, "step": 1726 }, { "epoch": 0.6691680957303663, "grad_norm": 0.18319489061832428, "learning_rate": 2.555809600958855e-06, "loss": 1.4071, "step": 1727 }, { "epoch": 0.6695555700185715, "grad_norm": 0.18731693923473358, "learning_rate": 2.5503909093267744e-06, "loss": 1.3928, "step": 1728 }, { "epoch": 0.6699430443067768, "grad_norm": 0.20159748196601868, "learning_rate": 2.544976000746904e-06, "loss": 1.3879, "step": 1729 }, { "epoch": 0.6703305185949819, "grad_norm": 0.1867716759443283, "learning_rate": 2.539564883581758e-06, "loss": 1.3996, "step": 1730 }, { "epoch": 0.6707179928831871, "grad_norm": 0.19294343888759613, "learning_rate": 2.534157566187988e-06, "loss": 1.4053, "step": 1731 }, { "epoch": 0.6711054671713923, "grad_norm": 0.18299168348312378, "learning_rate": 2.528754056916386e-06, "loss": 1.3793, "step": 1732 }, { "epoch": 0.6714929414595975, "grad_norm": 0.185139000415802, "learning_rate": 2.523354364111854e-06, "loss": 1.426, "step": 1733 }, { "epoch": 0.6718804157478027, "grad_norm": 0.21335925161838531, "learning_rate": 2.517958496113407e-06, "loss": 1.3933, "step": 1734 }, { "epoch": 0.6722678900360078, "grad_norm": 0.1808246225118637, "learning_rate": 2.512566461254147e-06, "loss": 1.3837, "step": 1735 }, { "epoch": 0.672655364324213, "grad_norm": 0.18069064617156982, "learning_rate": 2.5071782678612635e-06, "loss": 1.371, "step": 1736 }, { "epoch": 0.6730428386124182, "grad_norm": 0.18254321813583374, "learning_rate": 2.501793924256005e-06, "loss": 1.3724, "step": 1737 }, { "epoch": 0.6734303129006234, "grad_norm": 0.18520301580429077, "learning_rate": 2.4964134387536838e-06, "loss": 1.3816, "step": 1738 }, { "epoch": 0.6738177871888287, "grad_norm": 0.1921907514333725, "learning_rate": 2.491036819663647e-06, "loss": 1.3504, "step": 1739 }, { "epoch": 0.6742052614770339, "grad_norm": 0.1956491470336914, "learning_rate": 2.4856640752892702e-06, "loss": 1.3807, "step": 1740 }, { "epoch": 0.674592735765239, "grad_norm": 0.17396017909049988, "learning_rate": 2.480295213927952e-06, "loss": 1.4143, "step": 1741 }, { "epoch": 0.6749802100534442, "grad_norm": 0.17944970726966858, "learning_rate": 2.4749302438710865e-06, "loss": 1.3775, "step": 1742 }, { "epoch": 0.6753676843416494, "grad_norm": 0.18989521265029907, "learning_rate": 2.469569173404065e-06, "loss": 1.409, "step": 1743 }, { "epoch": 0.6757551586298546, "grad_norm": 0.18355347216129303, "learning_rate": 2.4642120108062494e-06, "loss": 1.3762, "step": 1744 }, { "epoch": 0.6761426329180598, "grad_norm": 0.191228449344635, "learning_rate": 2.4588587643509747e-06, "loss": 1.3715, "step": 1745 }, { "epoch": 0.6765301072062649, "grad_norm": 0.18514062464237213, "learning_rate": 2.4535094423055193e-06, "loss": 1.3843, "step": 1746 }, { "epoch": 0.6769175814944701, "grad_norm": 0.1899992972612381, "learning_rate": 2.4481640529311096e-06, "loss": 1.3902, "step": 1747 }, { "epoch": 0.6773050557826754, "grad_norm": 0.18289445340633392, "learning_rate": 2.4428226044828896e-06, "loss": 1.3465, "step": 1748 }, { "epoch": 0.6776925300708806, "grad_norm": 0.20967911183834076, "learning_rate": 2.4374851052099257e-06, "loss": 1.4127, "step": 1749 }, { "epoch": 0.6780800043590858, "grad_norm": 0.1993008553981781, "learning_rate": 2.4321515633551778e-06, "loss": 1.4291, "step": 1750 }, { "epoch": 0.678467478647291, "grad_norm": 0.1867089420557022, "learning_rate": 2.426821987155502e-06, "loss": 1.4318, "step": 1751 }, { "epoch": 0.6788549529354961, "grad_norm": 0.1918366551399231, "learning_rate": 2.4214963848416217e-06, "loss": 1.3702, "step": 1752 }, { "epoch": 0.6792424272237013, "grad_norm": 0.20817933976650238, "learning_rate": 2.416174764638131e-06, "loss": 1.3172, "step": 1753 }, { "epoch": 0.6796299015119065, "grad_norm": 0.18451210856437683, "learning_rate": 2.4108571347634674e-06, "loss": 1.4054, "step": 1754 }, { "epoch": 0.6800173758001117, "grad_norm": 0.19262884557247162, "learning_rate": 2.405543503429912e-06, "loss": 1.3436, "step": 1755 }, { "epoch": 0.6804048500883169, "grad_norm": 0.20268331468105316, "learning_rate": 2.4002338788435654e-06, "loss": 1.38, "step": 1756 }, { "epoch": 0.680792324376522, "grad_norm": 0.1805228292942047, "learning_rate": 2.394928269204345e-06, "loss": 1.41, "step": 1757 }, { "epoch": 0.6811797986647273, "grad_norm": 0.17801587283611298, "learning_rate": 2.3896266827059634e-06, "loss": 1.3595, "step": 1758 }, { "epoch": 0.6815672729529325, "grad_norm": 0.19603806734085083, "learning_rate": 2.3843291275359254e-06, "loss": 1.3994, "step": 1759 }, { "epoch": 0.6819547472411377, "grad_norm": 0.1952507495880127, "learning_rate": 2.379035611875503e-06, "loss": 1.4028, "step": 1760 }, { "epoch": 0.6823422215293429, "grad_norm": 0.1809936761856079, "learning_rate": 2.373746143899738e-06, "loss": 1.3688, "step": 1761 }, { "epoch": 0.682729695817548, "grad_norm": 0.18847240507602692, "learning_rate": 2.368460731777412e-06, "loss": 1.4139, "step": 1762 }, { "epoch": 0.6831171701057532, "grad_norm": 0.20092028379440308, "learning_rate": 2.3631793836710533e-06, "loss": 1.4136, "step": 1763 }, { "epoch": 0.6835046443939584, "grad_norm": 0.1812155693769455, "learning_rate": 2.3579021077369047e-06, "loss": 1.3681, "step": 1764 }, { "epoch": 0.6838921186821636, "grad_norm": 0.17776624858379364, "learning_rate": 2.352628912124923e-06, "loss": 1.384, "step": 1765 }, { "epoch": 0.6842795929703688, "grad_norm": 0.20200081169605255, "learning_rate": 2.347359804978768e-06, "loss": 1.3753, "step": 1766 }, { "epoch": 0.684667067258574, "grad_norm": 0.21137669682502747, "learning_rate": 2.3420947944357776e-06, "loss": 1.3836, "step": 1767 }, { "epoch": 0.6850545415467792, "grad_norm": 0.17844970524311066, "learning_rate": 2.336833888626972e-06, "loss": 1.3746, "step": 1768 }, { "epoch": 0.6854420158349844, "grad_norm": 0.184633269906044, "learning_rate": 2.331577095677024e-06, "loss": 1.3654, "step": 1769 }, { "epoch": 0.6858294901231896, "grad_norm": 0.18500974774360657, "learning_rate": 2.3263244237042616e-06, "loss": 1.3888, "step": 1770 }, { "epoch": 0.6862169644113948, "grad_norm": 0.18229837715625763, "learning_rate": 2.3210758808206414e-06, "loss": 1.3898, "step": 1771 }, { "epoch": 0.6866044386996, "grad_norm": 0.1910308599472046, "learning_rate": 2.315831475131751e-06, "loss": 1.3861, "step": 1772 }, { "epoch": 0.6869919129878052, "grad_norm": 0.202302947640419, "learning_rate": 2.3105912147367828e-06, "loss": 1.3798, "step": 1773 }, { "epoch": 0.6873793872760103, "grad_norm": 0.19056417047977448, "learning_rate": 2.305355107728531e-06, "loss": 1.3941, "step": 1774 }, { "epoch": 0.6877668615642155, "grad_norm": 0.18601156771183014, "learning_rate": 2.300123162193371e-06, "loss": 1.3911, "step": 1775 }, { "epoch": 0.6881543358524207, "grad_norm": 0.16975858807563782, "learning_rate": 2.2948953862112596e-06, "loss": 1.4394, "step": 1776 }, { "epoch": 0.6885418101406259, "grad_norm": 0.1895504593849182, "learning_rate": 2.289671787855704e-06, "loss": 1.3828, "step": 1777 }, { "epoch": 0.6889292844288312, "grad_norm": 0.18297523260116577, "learning_rate": 2.284452375193769e-06, "loss": 1.365, "step": 1778 }, { "epoch": 0.6893167587170363, "grad_norm": 0.18668992817401886, "learning_rate": 2.2792371562860475e-06, "loss": 1.434, "step": 1779 }, { "epoch": 0.6897042330052415, "grad_norm": 0.18666315078735352, "learning_rate": 2.2740261391866634e-06, "loss": 1.4069, "step": 1780 }, { "epoch": 0.6900917072934467, "grad_norm": 0.1692688763141632, "learning_rate": 2.2688193319432437e-06, "loss": 1.3793, "step": 1781 }, { "epoch": 0.6904791815816519, "grad_norm": 0.17253677546977997, "learning_rate": 2.2636167425969217e-06, "loss": 1.4205, "step": 1782 }, { "epoch": 0.6908666558698571, "grad_norm": 0.19875924289226532, "learning_rate": 2.2584183791823087e-06, "loss": 1.3458, "step": 1783 }, { "epoch": 0.6912541301580623, "grad_norm": 0.21354001760482788, "learning_rate": 2.253224249727498e-06, "loss": 1.4006, "step": 1784 }, { "epoch": 0.6916416044462674, "grad_norm": 0.17588774859905243, "learning_rate": 2.2480343622540364e-06, "loss": 1.4652, "step": 1785 }, { "epoch": 0.6920290787344726, "grad_norm": 0.18580208718776703, "learning_rate": 2.2428487247769267e-06, "loss": 1.3159, "step": 1786 }, { "epoch": 0.6924165530226778, "grad_norm": 0.18233737349510193, "learning_rate": 2.2376673453046005e-06, "loss": 1.3387, "step": 1787 }, { "epoch": 0.6928040273108831, "grad_norm": 0.1995122730731964, "learning_rate": 2.232490231838923e-06, "loss": 1.3929, "step": 1788 }, { "epoch": 0.6931915015990883, "grad_norm": 0.18206916749477386, "learning_rate": 2.2273173923751613e-06, "loss": 1.4172, "step": 1789 }, { "epoch": 0.6935789758872934, "grad_norm": 0.1846248209476471, "learning_rate": 2.2221488349019903e-06, "loss": 1.3924, "step": 1790 }, { "epoch": 0.6939664501754986, "grad_norm": 0.2047913521528244, "learning_rate": 2.216984567401465e-06, "loss": 1.388, "step": 1791 }, { "epoch": 0.6943539244637038, "grad_norm": 0.19773279130458832, "learning_rate": 2.211824597849021e-06, "loss": 1.4122, "step": 1792 }, { "epoch": 0.694741398751909, "grad_norm": 0.18524467945098877, "learning_rate": 2.2066689342134546e-06, "loss": 1.4074, "step": 1793 }, { "epoch": 0.6951288730401142, "grad_norm": 0.18059851229190826, "learning_rate": 2.2015175844569085e-06, "loss": 1.4167, "step": 1794 }, { "epoch": 0.6955163473283194, "grad_norm": 0.1840369999408722, "learning_rate": 2.19637055653487e-06, "loss": 1.3959, "step": 1795 }, { "epoch": 0.6959038216165245, "grad_norm": 0.18438908457756042, "learning_rate": 2.1912278583961454e-06, "loss": 1.3797, "step": 1796 }, { "epoch": 0.6962912959047297, "grad_norm": 0.18370600044727325, "learning_rate": 2.1860894979828594e-06, "loss": 1.3935, "step": 1797 }, { "epoch": 0.696678770192935, "grad_norm": 0.1891878992319107, "learning_rate": 2.180955483230433e-06, "loss": 1.3628, "step": 1798 }, { "epoch": 0.6970662444811402, "grad_norm": 0.18217825889587402, "learning_rate": 2.175825822067582e-06, "loss": 1.3262, "step": 1799 }, { "epoch": 0.6974537187693454, "grad_norm": 0.19523343443870544, "learning_rate": 2.1707005224162915e-06, "loss": 1.4046, "step": 1800 }, { "epoch": 0.6978411930575505, "grad_norm": 0.18181322515010834, "learning_rate": 2.1655795921918184e-06, "loss": 1.3525, "step": 1801 }, { "epoch": 0.6982286673457557, "grad_norm": 0.16629661619663239, "learning_rate": 2.160463039302664e-06, "loss": 1.3791, "step": 1802 }, { "epoch": 0.6986161416339609, "grad_norm": 0.18349102139472961, "learning_rate": 2.1553508716505784e-06, "loss": 1.3971, "step": 1803 }, { "epoch": 0.6990036159221661, "grad_norm": 0.17394539713859558, "learning_rate": 2.1502430971305288e-06, "loss": 1.381, "step": 1804 }, { "epoch": 0.6993910902103713, "grad_norm": 0.21239542961120605, "learning_rate": 2.1451397236307086e-06, "loss": 1.4307, "step": 1805 }, { "epoch": 0.6997785644985764, "grad_norm": 0.1874159574508667, "learning_rate": 2.140040759032505e-06, "loss": 1.3872, "step": 1806 }, { "epoch": 0.7001660387867816, "grad_norm": 0.17757219076156616, "learning_rate": 2.1349462112105047e-06, "loss": 1.3961, "step": 1807 }, { "epoch": 0.7005535130749869, "grad_norm": 0.18915829062461853, "learning_rate": 2.129856088032465e-06, "loss": 1.4194, "step": 1808 }, { "epoch": 0.7009409873631921, "grad_norm": 0.17844818532466888, "learning_rate": 2.124770397359319e-06, "loss": 1.3703, "step": 1809 }, { "epoch": 0.7013284616513973, "grad_norm": 0.18495051562786102, "learning_rate": 2.119689147045146e-06, "loss": 1.3975, "step": 1810 }, { "epoch": 0.7017159359396025, "grad_norm": 0.2033843696117401, "learning_rate": 2.1146123449371746e-06, "loss": 1.3815, "step": 1811 }, { "epoch": 0.7021034102278076, "grad_norm": 0.1911546140909195, "learning_rate": 2.1095399988757574e-06, "loss": 1.3887, "step": 1812 }, { "epoch": 0.7024908845160128, "grad_norm": 0.17965953052043915, "learning_rate": 2.1044721166943726e-06, "loss": 1.3911, "step": 1813 }, { "epoch": 0.702878358804218, "grad_norm": 0.18181200325489044, "learning_rate": 2.099408706219597e-06, "loss": 1.422, "step": 1814 }, { "epoch": 0.7032658330924232, "grad_norm": 0.1816393882036209, "learning_rate": 2.094349775271109e-06, "loss": 1.3089, "step": 1815 }, { "epoch": 0.7036533073806284, "grad_norm": 0.18407592177391052, "learning_rate": 2.0892953316616616e-06, "loss": 1.339, "step": 1816 }, { "epoch": 0.7040407816688335, "grad_norm": 0.17739444971084595, "learning_rate": 2.0842453831970854e-06, "loss": 1.3768, "step": 1817 }, { "epoch": 0.7044282559570388, "grad_norm": 0.1875084489583969, "learning_rate": 2.0791999376762627e-06, "loss": 1.3387, "step": 1818 }, { "epoch": 0.704815730245244, "grad_norm": 0.17605862021446228, "learning_rate": 2.074159002891123e-06, "loss": 1.4089, "step": 1819 }, { "epoch": 0.7052032045334492, "grad_norm": 0.19622202217578888, "learning_rate": 2.0691225866266335e-06, "loss": 1.3831, "step": 1820 }, { "epoch": 0.7055906788216544, "grad_norm": 0.17978356778621674, "learning_rate": 2.064090696660777e-06, "loss": 1.3784, "step": 1821 }, { "epoch": 0.7059781531098596, "grad_norm": 0.1699388474225998, "learning_rate": 2.059063340764554e-06, "loss": 1.4352, "step": 1822 }, { "epoch": 0.7063656273980647, "grad_norm": 0.19241872429847717, "learning_rate": 2.054040526701953e-06, "loss": 1.3718, "step": 1823 }, { "epoch": 0.7067531016862699, "grad_norm": 0.2001776099205017, "learning_rate": 2.0490222622299587e-06, "loss": 1.3929, "step": 1824 }, { "epoch": 0.7071405759744751, "grad_norm": 0.18134605884552002, "learning_rate": 2.0440085550985206e-06, "loss": 1.3763, "step": 1825 }, { "epoch": 0.7075280502626803, "grad_norm": 0.1762961596250534, "learning_rate": 2.0389994130505575e-06, "loss": 1.3957, "step": 1826 }, { "epoch": 0.7079155245508855, "grad_norm": 0.19062283635139465, "learning_rate": 2.033994843821931e-06, "loss": 1.3872, "step": 1827 }, { "epoch": 0.7083029988390908, "grad_norm": 0.19448524713516235, "learning_rate": 2.0289948551414486e-06, "loss": 1.4014, "step": 1828 }, { "epoch": 0.7086904731272959, "grad_norm": 0.1862853616476059, "learning_rate": 2.0239994547308344e-06, "loss": 1.3924, "step": 1829 }, { "epoch": 0.7090779474155011, "grad_norm": 0.1755864918231964, "learning_rate": 2.0190086503047374e-06, "loss": 1.402, "step": 1830 }, { "epoch": 0.7094654217037063, "grad_norm": 0.18028290569782257, "learning_rate": 2.0140224495706983e-06, "loss": 1.3893, "step": 1831 }, { "epoch": 0.7098528959919115, "grad_norm": 0.19942204654216766, "learning_rate": 2.009040860229157e-06, "loss": 1.3512, "step": 1832 }, { "epoch": 0.7102403702801167, "grad_norm": 0.17812877893447876, "learning_rate": 2.004063889973424e-06, "loss": 1.4058, "step": 1833 }, { "epoch": 0.7106278445683218, "grad_norm": 0.17900818586349487, "learning_rate": 1.999091546489684e-06, "loss": 1.3501, "step": 1834 }, { "epoch": 0.711015318856527, "grad_norm": 0.18721729516983032, "learning_rate": 1.9941238374569676e-06, "loss": 1.3634, "step": 1835 }, { "epoch": 0.7114027931447322, "grad_norm": 0.20497877895832062, "learning_rate": 1.989160770547159e-06, "loss": 1.3599, "step": 1836 }, { "epoch": 0.7117902674329374, "grad_norm": 0.1915593147277832, "learning_rate": 1.9842023534249615e-06, "loss": 1.3623, "step": 1837 }, { "epoch": 0.7121777417211427, "grad_norm": 0.1648300290107727, "learning_rate": 1.9792485937479077e-06, "loss": 1.415, "step": 1838 }, { "epoch": 0.7125652160093479, "grad_norm": 0.18885764479637146, "learning_rate": 1.9742994991663305e-06, "loss": 1.4021, "step": 1839 }, { "epoch": 0.712952690297553, "grad_norm": 0.17439673840999603, "learning_rate": 1.9693550773233634e-06, "loss": 1.4312, "step": 1840 }, { "epoch": 0.7133401645857582, "grad_norm": 0.1849389374256134, "learning_rate": 1.964415335854918e-06, "loss": 1.3389, "step": 1841 }, { "epoch": 0.7137276388739634, "grad_norm": 0.19328391551971436, "learning_rate": 1.959480282389684e-06, "loss": 1.4118, "step": 1842 }, { "epoch": 0.7141151131621686, "grad_norm": 0.169728085398674, "learning_rate": 1.9545499245491064e-06, "loss": 1.3786, "step": 1843 }, { "epoch": 0.7145025874503738, "grad_norm": 0.19239671528339386, "learning_rate": 1.949624269947378e-06, "loss": 1.4499, "step": 1844 }, { "epoch": 0.7148900617385789, "grad_norm": 0.18359020352363586, "learning_rate": 1.9447033261914356e-06, "loss": 1.399, "step": 1845 }, { "epoch": 0.7152775360267841, "grad_norm": 0.18383187055587769, "learning_rate": 1.9397871008809293e-06, "loss": 1.4409, "step": 1846 }, { "epoch": 0.7156650103149893, "grad_norm": 0.19532708823680878, "learning_rate": 1.9348756016082342e-06, "loss": 1.3575, "step": 1847 }, { "epoch": 0.7160524846031946, "grad_norm": 0.17538326978683472, "learning_rate": 1.9299688359584164e-06, "loss": 1.3894, "step": 1848 }, { "epoch": 0.7164399588913998, "grad_norm": 0.1870325803756714, "learning_rate": 1.9250668115092396e-06, "loss": 1.3812, "step": 1849 }, { "epoch": 0.716827433179605, "grad_norm": 0.18225093185901642, "learning_rate": 1.9201695358311396e-06, "loss": 1.3838, "step": 1850 }, { "epoch": 0.7172149074678101, "grad_norm": 0.19466179609298706, "learning_rate": 1.9152770164872233e-06, "loss": 1.4191, "step": 1851 }, { "epoch": 0.7176023817560153, "grad_norm": 0.17917987704277039, "learning_rate": 1.9103892610332467e-06, "loss": 1.3665, "step": 1852 }, { "epoch": 0.7179898560442205, "grad_norm": 0.17455139756202698, "learning_rate": 1.9055062770176158e-06, "loss": 1.3467, "step": 1853 }, { "epoch": 0.7183773303324257, "grad_norm": 0.16951416432857513, "learning_rate": 1.9006280719813596e-06, "loss": 1.4074, "step": 1854 }, { "epoch": 0.7187648046206309, "grad_norm": 0.19863559305667877, "learning_rate": 1.8957546534581346e-06, "loss": 1.3899, "step": 1855 }, { "epoch": 0.719152278908836, "grad_norm": 0.17250625789165497, "learning_rate": 1.8908860289741981e-06, "loss": 1.3903, "step": 1856 }, { "epoch": 0.7195397531970412, "grad_norm": 0.20413000881671906, "learning_rate": 1.8860222060484106e-06, "loss": 1.4047, "step": 1857 }, { "epoch": 0.7199272274852465, "grad_norm": 0.1826530396938324, "learning_rate": 1.8811631921922108e-06, "loss": 1.4006, "step": 1858 }, { "epoch": 0.7203147017734517, "grad_norm": 0.1723063588142395, "learning_rate": 1.8763089949096163e-06, "loss": 1.3766, "step": 1859 }, { "epoch": 0.7207021760616569, "grad_norm": 0.18734580278396606, "learning_rate": 1.8714596216972008e-06, "loss": 1.3738, "step": 1860 }, { "epoch": 0.721089650349862, "grad_norm": 0.18402554094791412, "learning_rate": 1.8666150800440935e-06, "loss": 1.3653, "step": 1861 }, { "epoch": 0.7214771246380672, "grad_norm": 0.1904211789369583, "learning_rate": 1.8617753774319564e-06, "loss": 1.4178, "step": 1862 }, { "epoch": 0.7218645989262724, "grad_norm": 0.19647128880023956, "learning_rate": 1.8569405213349838e-06, "loss": 1.3859, "step": 1863 }, { "epoch": 0.7222520732144776, "grad_norm": 0.19581244885921478, "learning_rate": 1.8521105192198795e-06, "loss": 1.3831, "step": 1864 }, { "epoch": 0.7226395475026828, "grad_norm": 0.18563339114189148, "learning_rate": 1.8472853785458579e-06, "loss": 1.4156, "step": 1865 }, { "epoch": 0.723027021790888, "grad_norm": 0.1957298070192337, "learning_rate": 1.8424651067646177e-06, "loss": 1.4013, "step": 1866 }, { "epoch": 0.7234144960790931, "grad_norm": 0.21004600822925568, "learning_rate": 1.8376497113203468e-06, "loss": 1.3745, "step": 1867 }, { "epoch": 0.7238019703672984, "grad_norm": 0.17564362287521362, "learning_rate": 1.8328391996496942e-06, "loss": 1.3999, "step": 1868 }, { "epoch": 0.7241894446555036, "grad_norm": 0.19748182594776154, "learning_rate": 1.8280335791817733e-06, "loss": 1.4348, "step": 1869 }, { "epoch": 0.7245769189437088, "grad_norm": 0.1921619176864624, "learning_rate": 1.8232328573381403e-06, "loss": 1.4684, "step": 1870 }, { "epoch": 0.724964393231914, "grad_norm": 0.1865427941083908, "learning_rate": 1.8184370415327846e-06, "loss": 1.4049, "step": 1871 }, { "epoch": 0.7253518675201192, "grad_norm": 0.18795761466026306, "learning_rate": 1.813646139172125e-06, "loss": 1.3734, "step": 1872 }, { "epoch": 0.7257393418083243, "grad_norm": 0.19113704562187195, "learning_rate": 1.808860157654984e-06, "loss": 1.3816, "step": 1873 }, { "epoch": 0.7261268160965295, "grad_norm": 0.19781114161014557, "learning_rate": 1.8040791043725935e-06, "loss": 1.4141, "step": 1874 }, { "epoch": 0.7265142903847347, "grad_norm": 0.19061891734600067, "learning_rate": 1.799302986708567e-06, "loss": 1.4283, "step": 1875 }, { "epoch": 0.7269017646729399, "grad_norm": 0.1983279287815094, "learning_rate": 1.794531812038901e-06, "loss": 1.3972, "step": 1876 }, { "epoch": 0.7272892389611451, "grad_norm": 0.19054526090621948, "learning_rate": 1.7897655877319531e-06, "loss": 1.381, "step": 1877 }, { "epoch": 0.7276767132493503, "grad_norm": 0.17853932082653046, "learning_rate": 1.7850043211484425e-06, "loss": 1.4177, "step": 1878 }, { "epoch": 0.7280641875375555, "grad_norm": 0.175220787525177, "learning_rate": 1.780248019641424e-06, "loss": 1.3845, "step": 1879 }, { "epoch": 0.7284516618257607, "grad_norm": 0.1800004094839096, "learning_rate": 1.7754966905562925e-06, "loss": 1.4107, "step": 1880 }, { "epoch": 0.7288391361139659, "grad_norm": 0.19163134694099426, "learning_rate": 1.770750341230757e-06, "loss": 1.427, "step": 1881 }, { "epoch": 0.7292266104021711, "grad_norm": 0.18915410339832306, "learning_rate": 1.766008978994842e-06, "loss": 1.3488, "step": 1882 }, { "epoch": 0.7296140846903763, "grad_norm": 0.18491138517856598, "learning_rate": 1.7612726111708633e-06, "loss": 1.3422, "step": 1883 }, { "epoch": 0.7300015589785814, "grad_norm": 0.1946297138929367, "learning_rate": 1.756541245073432e-06, "loss": 1.3785, "step": 1884 }, { "epoch": 0.7303890332667866, "grad_norm": 0.20290718972682953, "learning_rate": 1.7518148880094261e-06, "loss": 1.4053, "step": 1885 }, { "epoch": 0.7307765075549918, "grad_norm": 0.19586481153964996, "learning_rate": 1.7470935472779953e-06, "loss": 1.3557, "step": 1886 }, { "epoch": 0.731163981843197, "grad_norm": 0.19488950073719025, "learning_rate": 1.7423772301705366e-06, "loss": 1.3772, "step": 1887 }, { "epoch": 0.7315514561314023, "grad_norm": 0.19923752546310425, "learning_rate": 1.737665943970694e-06, "loss": 1.4144, "step": 1888 }, { "epoch": 0.7319389304196074, "grad_norm": 0.18074248731136322, "learning_rate": 1.732959695954336e-06, "loss": 1.373, "step": 1889 }, { "epoch": 0.7323264047078126, "grad_norm": 0.19697529077529907, "learning_rate": 1.7282584933895574e-06, "loss": 1.3847, "step": 1890 }, { "epoch": 0.7327138789960178, "grad_norm": 0.18085691332817078, "learning_rate": 1.7235623435366534e-06, "loss": 1.4665, "step": 1891 }, { "epoch": 0.733101353284223, "grad_norm": 0.18226982653141022, "learning_rate": 1.7188712536481233e-06, "loss": 1.3947, "step": 1892 }, { "epoch": 0.7334888275724282, "grad_norm": 0.18961602449417114, "learning_rate": 1.7141852309686457e-06, "loss": 1.3962, "step": 1893 }, { "epoch": 0.7338763018606334, "grad_norm": 0.19194835424423218, "learning_rate": 1.7095042827350806e-06, "loss": 1.4313, "step": 1894 }, { "epoch": 0.7342637761488385, "grad_norm": 0.18980608880519867, "learning_rate": 1.7048284161764428e-06, "loss": 1.357, "step": 1895 }, { "epoch": 0.7346512504370437, "grad_norm": 0.19339795410633087, "learning_rate": 1.7001576385139062e-06, "loss": 1.4532, "step": 1896 }, { "epoch": 0.7350387247252489, "grad_norm": 0.176658496260643, "learning_rate": 1.6954919569607852e-06, "loss": 1.3625, "step": 1897 }, { "epoch": 0.7354261990134542, "grad_norm": 0.20904052257537842, "learning_rate": 1.6908313787225183e-06, "loss": 1.4044, "step": 1898 }, { "epoch": 0.7358136733016594, "grad_norm": 0.19488981366157532, "learning_rate": 1.6861759109966697e-06, "loss": 1.3782, "step": 1899 }, { "epoch": 0.7362011475898645, "grad_norm": 0.18184497952461243, "learning_rate": 1.6815255609729047e-06, "loss": 1.3679, "step": 1900 }, { "epoch": 0.7365886218780697, "grad_norm": 0.17679250240325928, "learning_rate": 1.6768803358329922e-06, "loss": 1.4266, "step": 1901 }, { "epoch": 0.7369760961662749, "grad_norm": 0.18726807832717896, "learning_rate": 1.6722402427507788e-06, "loss": 1.4169, "step": 1902 }, { "epoch": 0.7373635704544801, "grad_norm": 0.1981021612882614, "learning_rate": 1.6676052888921928e-06, "loss": 1.3866, "step": 1903 }, { "epoch": 0.7377510447426853, "grad_norm": 0.1833338439464569, "learning_rate": 1.6629754814152182e-06, "loss": 1.3953, "step": 1904 }, { "epoch": 0.7381385190308904, "grad_norm": 0.1743917018175125, "learning_rate": 1.6583508274698995e-06, "loss": 1.3835, "step": 1905 }, { "epoch": 0.7385259933190956, "grad_norm": 0.1795787513256073, "learning_rate": 1.653731334198314e-06, "loss": 1.3828, "step": 1906 }, { "epoch": 0.7389134676073008, "grad_norm": 0.18003463745117188, "learning_rate": 1.6491170087345771e-06, "loss": 1.3865, "step": 1907 }, { "epoch": 0.7393009418955061, "grad_norm": 0.19006937742233276, "learning_rate": 1.6445078582048158e-06, "loss": 1.365, "step": 1908 }, { "epoch": 0.7396884161837113, "grad_norm": 0.18147625029087067, "learning_rate": 1.6399038897271712e-06, "loss": 1.359, "step": 1909 }, { "epoch": 0.7400758904719165, "grad_norm": 0.1925603598356247, "learning_rate": 1.6353051104117767e-06, "loss": 1.3756, "step": 1910 }, { "epoch": 0.7404633647601216, "grad_norm": 0.1855103075504303, "learning_rate": 1.6307115273607576e-06, "loss": 1.3962, "step": 1911 }, { "epoch": 0.7408508390483268, "grad_norm": 0.19282642006874084, "learning_rate": 1.6261231476682065e-06, "loss": 1.3926, "step": 1912 }, { "epoch": 0.741238313336532, "grad_norm": 0.18378175795078278, "learning_rate": 1.6215399784201891e-06, "loss": 1.3658, "step": 1913 }, { "epoch": 0.7416257876247372, "grad_norm": 0.19125710427761078, "learning_rate": 1.6169620266947161e-06, "loss": 1.3979, "step": 1914 }, { "epoch": 0.7420132619129424, "grad_norm": 0.22099870443344116, "learning_rate": 1.6123892995617474e-06, "loss": 1.3693, "step": 1915 }, { "epoch": 0.7424007362011475, "grad_norm": 0.2031087875366211, "learning_rate": 1.6078218040831678e-06, "loss": 1.4155, "step": 1916 }, { "epoch": 0.7427882104893527, "grad_norm": 0.18318665027618408, "learning_rate": 1.6032595473127892e-06, "loss": 1.4092, "step": 1917 }, { "epoch": 0.743175684777558, "grad_norm": 0.1857396513223648, "learning_rate": 1.5987025362963276e-06, "loss": 1.3886, "step": 1918 }, { "epoch": 0.7435631590657632, "grad_norm": 0.2030310034751892, "learning_rate": 1.5941507780714026e-06, "loss": 1.3685, "step": 1919 }, { "epoch": 0.7439506333539684, "grad_norm": 0.2054600715637207, "learning_rate": 1.5896042796675155e-06, "loss": 1.3693, "step": 1920 }, { "epoch": 0.7443381076421736, "grad_norm": 0.18526341021060944, "learning_rate": 1.585063048106052e-06, "loss": 1.3875, "step": 1921 }, { "epoch": 0.7447255819303787, "grad_norm": 0.17968983948230743, "learning_rate": 1.580527090400258e-06, "loss": 1.3606, "step": 1922 }, { "epoch": 0.7451130562185839, "grad_norm": 0.1995328813791275, "learning_rate": 1.5759964135552352e-06, "loss": 1.4309, "step": 1923 }, { "epoch": 0.7455005305067891, "grad_norm": 0.18367250263690948, "learning_rate": 1.5714710245679348e-06, "loss": 1.4239, "step": 1924 }, { "epoch": 0.7458880047949943, "grad_norm": 0.19297939538955688, "learning_rate": 1.5669509304271357e-06, "loss": 1.3843, "step": 1925 }, { "epoch": 0.7462754790831995, "grad_norm": 0.1960078477859497, "learning_rate": 1.5624361381134451e-06, "loss": 1.4546, "step": 1926 }, { "epoch": 0.7466629533714046, "grad_norm": 0.1967233270406723, "learning_rate": 1.557926654599276e-06, "loss": 1.3609, "step": 1927 }, { "epoch": 0.7470504276596099, "grad_norm": 0.18759670853614807, "learning_rate": 1.5534224868488507e-06, "loss": 1.4042, "step": 1928 }, { "epoch": 0.7474379019478151, "grad_norm": 0.1748625934123993, "learning_rate": 1.548923641818173e-06, "loss": 1.3661, "step": 1929 }, { "epoch": 0.7478253762360203, "grad_norm": 0.1868477463722229, "learning_rate": 1.5444301264550354e-06, "loss": 1.4234, "step": 1930 }, { "epoch": 0.7482128505242255, "grad_norm": 0.1994430422782898, "learning_rate": 1.5399419476989924e-06, "loss": 1.3558, "step": 1931 }, { "epoch": 0.7486003248124307, "grad_norm": 0.1858588010072708, "learning_rate": 1.5354591124813628e-06, "loss": 1.4206, "step": 1932 }, { "epoch": 0.7489877991006358, "grad_norm": 0.1771814525127411, "learning_rate": 1.5309816277252076e-06, "loss": 1.4115, "step": 1933 }, { "epoch": 0.749375273388841, "grad_norm": 0.18425697088241577, "learning_rate": 1.5265095003453295e-06, "loss": 1.4223, "step": 1934 }, { "epoch": 0.7497627476770462, "grad_norm": 0.17833995819091797, "learning_rate": 1.5220427372482538e-06, "loss": 1.3633, "step": 1935 }, { "epoch": 0.7501502219652514, "grad_norm": 0.20062021911144257, "learning_rate": 1.5175813453322252e-06, "loss": 1.3649, "step": 1936 }, { "epoch": 0.7505376962534566, "grad_norm": 0.1832384169101715, "learning_rate": 1.5131253314871886e-06, "loss": 1.3923, "step": 1937 }, { "epoch": 0.7509251705416619, "grad_norm": 0.18388360738754272, "learning_rate": 1.5086747025947895e-06, "loss": 1.3759, "step": 1938 }, { "epoch": 0.751312644829867, "grad_norm": 0.19308070838451385, "learning_rate": 1.5042294655283508e-06, "loss": 1.395, "step": 1939 }, { "epoch": 0.7517001191180722, "grad_norm": 0.19551512598991394, "learning_rate": 1.499789627152874e-06, "loss": 1.3553, "step": 1940 }, { "epoch": 0.7520875934062774, "grad_norm": 0.19648531079292297, "learning_rate": 1.4953551943250171e-06, "loss": 1.3656, "step": 1941 }, { "epoch": 0.7524750676944826, "grad_norm": 0.17298629879951477, "learning_rate": 1.490926173893098e-06, "loss": 1.3621, "step": 1942 }, { "epoch": 0.7528625419826878, "grad_norm": 0.18471895158290863, "learning_rate": 1.4865025726970678e-06, "loss": 1.4105, "step": 1943 }, { "epoch": 0.7532500162708929, "grad_norm": 0.17480257153511047, "learning_rate": 1.482084397568515e-06, "loss": 1.3477, "step": 1944 }, { "epoch": 0.7536374905590981, "grad_norm": 0.17901387810707092, "learning_rate": 1.477671655330643e-06, "loss": 1.3431, "step": 1945 }, { "epoch": 0.7540249648473033, "grad_norm": 0.18149064481258392, "learning_rate": 1.47326435279827e-06, "loss": 1.4361, "step": 1946 }, { "epoch": 0.7544124391355085, "grad_norm": 0.18966259062290192, "learning_rate": 1.4688624967778076e-06, "loss": 1.3682, "step": 1947 }, { "epoch": 0.7547999134237138, "grad_norm": 0.18058331310749054, "learning_rate": 1.4644660940672628e-06, "loss": 1.3238, "step": 1948 }, { "epoch": 0.755187387711919, "grad_norm": 0.2254488468170166, "learning_rate": 1.4600751514562155e-06, "loss": 1.3555, "step": 1949 }, { "epoch": 0.7555748620001241, "grad_norm": 0.18011733889579773, "learning_rate": 1.4556896757258139e-06, "loss": 1.3284, "step": 1950 }, { "epoch": 0.7559623362883293, "grad_norm": 0.19025824964046478, "learning_rate": 1.451309673648767e-06, "loss": 1.3718, "step": 1951 }, { "epoch": 0.7563498105765345, "grad_norm": 0.19601310789585114, "learning_rate": 1.4469351519893254e-06, "loss": 1.3399, "step": 1952 }, { "epoch": 0.7567372848647397, "grad_norm": 0.19138245284557343, "learning_rate": 1.4425661175032824e-06, "loss": 1.4178, "step": 1953 }, { "epoch": 0.7571247591529449, "grad_norm": 0.1972004771232605, "learning_rate": 1.4382025769379493e-06, "loss": 1.3699, "step": 1954 }, { "epoch": 0.75751223344115, "grad_norm": 0.1957542896270752, "learning_rate": 1.4338445370321619e-06, "loss": 1.327, "step": 1955 }, { "epoch": 0.7578997077293552, "grad_norm": 0.18611615896224976, "learning_rate": 1.4294920045162514e-06, "loss": 1.3313, "step": 1956 }, { "epoch": 0.7582871820175604, "grad_norm": 0.19279521703720093, "learning_rate": 1.4251449861120532e-06, "loss": 1.3383, "step": 1957 }, { "epoch": 0.7586746563057657, "grad_norm": 0.20895415544509888, "learning_rate": 1.4208034885328786e-06, "loss": 1.3732, "step": 1958 }, { "epoch": 0.7590621305939709, "grad_norm": 0.17796844244003296, "learning_rate": 1.4164675184835193e-06, "loss": 1.3866, "step": 1959 }, { "epoch": 0.759449604882176, "grad_norm": 0.1918809413909912, "learning_rate": 1.4121370826602254e-06, "loss": 1.3717, "step": 1960 }, { "epoch": 0.7598370791703812, "grad_norm": 0.1925715059041977, "learning_rate": 1.4078121877507056e-06, "loss": 1.327, "step": 1961 }, { "epoch": 0.7602245534585864, "grad_norm": 0.1721653938293457, "learning_rate": 1.4034928404341047e-06, "loss": 1.4136, "step": 1962 }, { "epoch": 0.7606120277467916, "grad_norm": 0.18496376276016235, "learning_rate": 1.3991790473810085e-06, "loss": 1.403, "step": 1963 }, { "epoch": 0.7609995020349968, "grad_norm": 0.18798154592514038, "learning_rate": 1.3948708152534163e-06, "loss": 1.3772, "step": 1964 }, { "epoch": 0.761386976323202, "grad_norm": 0.19420833885669708, "learning_rate": 1.3905681507047464e-06, "loss": 1.3968, "step": 1965 }, { "epoch": 0.7617744506114071, "grad_norm": 0.18298999965190887, "learning_rate": 1.386271060379814e-06, "loss": 1.4097, "step": 1966 }, { "epoch": 0.7621619248996123, "grad_norm": 0.1709355115890503, "learning_rate": 1.3819795509148303e-06, "loss": 1.3928, "step": 1967 }, { "epoch": 0.7625493991878176, "grad_norm": 0.17892108857631683, "learning_rate": 1.377693628937382e-06, "loss": 1.3536, "step": 1968 }, { "epoch": 0.7629368734760228, "grad_norm": 0.2113237977027893, "learning_rate": 1.3734133010664336e-06, "loss": 1.3631, "step": 1969 }, { "epoch": 0.763324347764228, "grad_norm": 0.19684268534183502, "learning_rate": 1.3691385739123026e-06, "loss": 1.4418, "step": 1970 }, { "epoch": 0.7637118220524332, "grad_norm": 0.19796247780323029, "learning_rate": 1.364869454076665e-06, "loss": 1.4053, "step": 1971 }, { "epoch": 0.7640992963406383, "grad_norm": 0.19157081842422485, "learning_rate": 1.3606059481525296e-06, "loss": 1.3872, "step": 1972 }, { "epoch": 0.7644867706288435, "grad_norm": 0.18911059200763702, "learning_rate": 1.3563480627242425e-06, "loss": 1.3359, "step": 1973 }, { "epoch": 0.7648742449170487, "grad_norm": 0.18464094400405884, "learning_rate": 1.3520958043674632e-06, "loss": 1.3814, "step": 1974 }, { "epoch": 0.7652617192052539, "grad_norm": 0.19011256098747253, "learning_rate": 1.3478491796491671e-06, "loss": 1.3858, "step": 1975 }, { "epoch": 0.7656491934934591, "grad_norm": 0.18932078778743744, "learning_rate": 1.3436081951276247e-06, "loss": 1.3375, "step": 1976 }, { "epoch": 0.7660366677816642, "grad_norm": 0.17562806606292725, "learning_rate": 1.3393728573523961e-06, "loss": 1.3574, "step": 1977 }, { "epoch": 0.7664241420698695, "grad_norm": 0.17574985325336456, "learning_rate": 1.335143172864326e-06, "loss": 1.375, "step": 1978 }, { "epoch": 0.7668116163580747, "grad_norm": 0.18676798045635223, "learning_rate": 1.3309191481955213e-06, "loss": 1.3681, "step": 1979 }, { "epoch": 0.7671990906462799, "grad_norm": 0.17724540829658508, "learning_rate": 1.3267007898693552e-06, "loss": 1.4174, "step": 1980 }, { "epoch": 0.7675865649344851, "grad_norm": 0.18915417790412903, "learning_rate": 1.3224881044004434e-06, "loss": 1.3398, "step": 1981 }, { "epoch": 0.7679740392226903, "grad_norm": 0.18637488782405853, "learning_rate": 1.318281098294647e-06, "loss": 1.3861, "step": 1982 }, { "epoch": 0.7683615135108954, "grad_norm": 0.1831217259168625, "learning_rate": 1.3140797780490506e-06, "loss": 1.373, "step": 1983 }, { "epoch": 0.7687489877991006, "grad_norm": 0.1765807569026947, "learning_rate": 1.3098841501519626e-06, "loss": 1.3946, "step": 1984 }, { "epoch": 0.7691364620873058, "grad_norm": 0.1784719079732895, "learning_rate": 1.3056942210828955e-06, "loss": 1.3419, "step": 1985 }, { "epoch": 0.769523936375511, "grad_norm": 0.17902636528015137, "learning_rate": 1.3015099973125667e-06, "loss": 1.3898, "step": 1986 }, { "epoch": 0.7699114106637163, "grad_norm": 0.1853409707546234, "learning_rate": 1.2973314853028768e-06, "loss": 1.3986, "step": 1987 }, { "epoch": 0.7702988849519214, "grad_norm": 0.19079044461250305, "learning_rate": 1.2931586915069106e-06, "loss": 1.3906, "step": 1988 }, { "epoch": 0.7706863592401266, "grad_norm": 0.1972050815820694, "learning_rate": 1.2889916223689165e-06, "loss": 1.3775, "step": 1989 }, { "epoch": 0.7710738335283318, "grad_norm": 0.18993988633155823, "learning_rate": 1.2848302843243083e-06, "loss": 1.3377, "step": 1990 }, { "epoch": 0.771461307816537, "grad_norm": 0.18999475240707397, "learning_rate": 1.2806746837996426e-06, "loss": 1.3768, "step": 1991 }, { "epoch": 0.7718487821047422, "grad_norm": 0.18518897891044617, "learning_rate": 1.2765248272126213e-06, "loss": 1.3702, "step": 1992 }, { "epoch": 0.7722362563929474, "grad_norm": 0.19447235763072968, "learning_rate": 1.2723807209720697e-06, "loss": 1.3942, "step": 1993 }, { "epoch": 0.7726237306811525, "grad_norm": 0.20154866576194763, "learning_rate": 1.268242371477939e-06, "loss": 1.3784, "step": 1994 }, { "epoch": 0.7730112049693577, "grad_norm": 0.18007682263851166, "learning_rate": 1.264109785121283e-06, "loss": 1.3698, "step": 1995 }, { "epoch": 0.7733986792575629, "grad_norm": 0.18510396778583527, "learning_rate": 1.2599829682842618e-06, "loss": 1.3793, "step": 1996 }, { "epoch": 0.7737861535457682, "grad_norm": 0.19009068608283997, "learning_rate": 1.2558619273401195e-06, "loss": 1.384, "step": 1997 }, { "epoch": 0.7741736278339734, "grad_norm": 0.18796385824680328, "learning_rate": 1.2517466686531866e-06, "loss": 1.3562, "step": 1998 }, { "epoch": 0.7745611021221785, "grad_norm": 0.19502688944339752, "learning_rate": 1.2476371985788566e-06, "loss": 1.4065, "step": 1999 }, { "epoch": 0.7749485764103837, "grad_norm": 0.18601472675800323, "learning_rate": 1.2435335234635892e-06, "loss": 1.3954, "step": 2000 }, { "epoch": 0.7753360506985889, "grad_norm": 0.18935821950435638, "learning_rate": 1.2394356496448906e-06, "loss": 1.3772, "step": 2001 }, { "epoch": 0.7757235249867941, "grad_norm": 0.18989016115665436, "learning_rate": 1.235343583451311e-06, "loss": 1.3818, "step": 2002 }, { "epoch": 0.7761109992749993, "grad_norm": 0.1821887493133545, "learning_rate": 1.2312573312024312e-06, "loss": 1.3984, "step": 2003 }, { "epoch": 0.7764984735632044, "grad_norm": 0.19072575867176056, "learning_rate": 1.227176899208849e-06, "loss": 1.3636, "step": 2004 }, { "epoch": 0.7768859478514096, "grad_norm": 0.22156307101249695, "learning_rate": 1.2231022937721804e-06, "loss": 1.3776, "step": 2005 }, { "epoch": 0.7772734221396148, "grad_norm": 0.18571196496486664, "learning_rate": 1.2190335211850363e-06, "loss": 1.4038, "step": 2006 }, { "epoch": 0.7776608964278201, "grad_norm": 0.2030322402715683, "learning_rate": 1.2149705877310264e-06, "loss": 1.4208, "step": 2007 }, { "epoch": 0.7780483707160253, "grad_norm": 0.17586307227611542, "learning_rate": 1.2109134996847366e-06, "loss": 1.3757, "step": 2008 }, { "epoch": 0.7784358450042305, "grad_norm": 0.19454358518123627, "learning_rate": 1.20686226331173e-06, "loss": 1.3438, "step": 2009 }, { "epoch": 0.7788233192924356, "grad_norm": 0.1932450383901596, "learning_rate": 1.2028168848685296e-06, "loss": 1.3099, "step": 2010 }, { "epoch": 0.7792107935806408, "grad_norm": 0.17931924760341644, "learning_rate": 1.1987773706026141e-06, "loss": 1.3667, "step": 2011 }, { "epoch": 0.779598267868846, "grad_norm": 0.17818355560302734, "learning_rate": 1.194743726752403e-06, "loss": 1.3669, "step": 2012 }, { "epoch": 0.7799857421570512, "grad_norm": 0.19869740307331085, "learning_rate": 1.1907159595472539e-06, "loss": 1.3918, "step": 2013 }, { "epoch": 0.7803732164452564, "grad_norm": 0.18980935215950012, "learning_rate": 1.1866940752074445e-06, "loss": 1.3837, "step": 2014 }, { "epoch": 0.7807606907334615, "grad_norm": 0.20783552527427673, "learning_rate": 1.1826780799441712e-06, "loss": 1.3759, "step": 2015 }, { "epoch": 0.7811481650216667, "grad_norm": 0.19700807332992554, "learning_rate": 1.1786679799595308e-06, "loss": 1.4282, "step": 2016 }, { "epoch": 0.781535639309872, "grad_norm": 0.18039366602897644, "learning_rate": 1.1746637814465223e-06, "loss": 1.3732, "step": 2017 }, { "epoch": 0.7819231135980772, "grad_norm": 0.19724373519420624, "learning_rate": 1.170665490589023e-06, "loss": 1.3455, "step": 2018 }, { "epoch": 0.7823105878862824, "grad_norm": 0.18228240311145782, "learning_rate": 1.1666731135617954e-06, "loss": 1.339, "step": 2019 }, { "epoch": 0.7826980621744876, "grad_norm": 0.17747077345848083, "learning_rate": 1.1626866565304594e-06, "loss": 1.4235, "step": 2020 }, { "epoch": 0.7830855364626927, "grad_norm": 0.18731571733951569, "learning_rate": 1.158706125651502e-06, "loss": 1.4255, "step": 2021 }, { "epoch": 0.7834730107508979, "grad_norm": 0.19773979485034943, "learning_rate": 1.1547315270722498e-06, "loss": 1.3841, "step": 2022 }, { "epoch": 0.7838604850391031, "grad_norm": 0.17859306931495667, "learning_rate": 1.1507628669308746e-06, "loss": 1.3872, "step": 2023 }, { "epoch": 0.7842479593273083, "grad_norm": 0.1993684619665146, "learning_rate": 1.1468001513563709e-06, "loss": 1.345, "step": 2024 }, { "epoch": 0.7846354336155135, "grad_norm": 0.17996801435947418, "learning_rate": 1.142843386468559e-06, "loss": 1.3889, "step": 2025 }, { "epoch": 0.7850229079037186, "grad_norm": 0.1930953860282898, "learning_rate": 1.1388925783780636e-06, "loss": 1.4111, "step": 2026 }, { "epoch": 0.7854103821919239, "grad_norm": 0.18003973364830017, "learning_rate": 1.134947733186315e-06, "loss": 1.3694, "step": 2027 }, { "epoch": 0.7857978564801291, "grad_norm": 0.18449963629245758, "learning_rate": 1.1310088569855315e-06, "loss": 1.3574, "step": 2028 }, { "epoch": 0.7861853307683343, "grad_norm": 0.1906992495059967, "learning_rate": 1.1270759558587124e-06, "loss": 1.3576, "step": 2029 }, { "epoch": 0.7865728050565395, "grad_norm": 0.173511803150177, "learning_rate": 1.1231490358796331e-06, "loss": 1.3931, "step": 2030 }, { "epoch": 0.7869602793447447, "grad_norm": 0.18085969984531403, "learning_rate": 1.1192281031128282e-06, "loss": 1.422, "step": 2031 }, { "epoch": 0.7873477536329498, "grad_norm": 0.187160924077034, "learning_rate": 1.1153131636135893e-06, "loss": 1.3597, "step": 2032 }, { "epoch": 0.787735227921155, "grad_norm": 0.1739964783191681, "learning_rate": 1.1114042234279492e-06, "loss": 1.452, "step": 2033 }, { "epoch": 0.7881227022093602, "grad_norm": 0.1982647031545639, "learning_rate": 1.1075012885926795e-06, "loss": 1.3486, "step": 2034 }, { "epoch": 0.7885101764975654, "grad_norm": 0.1798030287027359, "learning_rate": 1.1036043651352713e-06, "loss": 1.3825, "step": 2035 }, { "epoch": 0.7888976507857706, "grad_norm": 0.21166622638702393, "learning_rate": 1.09971345907394e-06, "loss": 1.3974, "step": 2036 }, { "epoch": 0.7892851250739759, "grad_norm": 0.18986453115940094, "learning_rate": 1.0958285764176002e-06, "loss": 1.4199, "step": 2037 }, { "epoch": 0.789672599362181, "grad_norm": 0.18075929582118988, "learning_rate": 1.091949723165871e-06, "loss": 1.3619, "step": 2038 }, { "epoch": 0.7900600736503862, "grad_norm": 0.19842058420181274, "learning_rate": 1.088076905309054e-06, "loss": 1.4164, "step": 2039 }, { "epoch": 0.7904475479385914, "grad_norm": 0.172822043299675, "learning_rate": 1.0842101288281359e-06, "loss": 1.3785, "step": 2040 }, { "epoch": 0.7908350222267966, "grad_norm": 0.20026449859142303, "learning_rate": 1.0803493996947678e-06, "loss": 1.3916, "step": 2041 }, { "epoch": 0.7912224965150018, "grad_norm": 0.19477605819702148, "learning_rate": 1.0764947238712664e-06, "loss": 1.3607, "step": 2042 }, { "epoch": 0.7916099708032069, "grad_norm": 0.1808597892522812, "learning_rate": 1.072646107310596e-06, "loss": 1.3931, "step": 2043 }, { "epoch": 0.7919974450914121, "grad_norm": 0.1839544177055359, "learning_rate": 1.068803555956367e-06, "loss": 1.3479, "step": 2044 }, { "epoch": 0.7923849193796173, "grad_norm": 0.2100498229265213, "learning_rate": 1.0649670757428183e-06, "loss": 1.3947, "step": 2045 }, { "epoch": 0.7927723936678225, "grad_norm": 0.1891070306301117, "learning_rate": 1.061136672594819e-06, "loss": 1.4297, "step": 2046 }, { "epoch": 0.7931598679560278, "grad_norm": 0.1771649420261383, "learning_rate": 1.0573123524278467e-06, "loss": 1.3623, "step": 2047 }, { "epoch": 0.793547342244233, "grad_norm": 0.1864607334136963, "learning_rate": 1.0534941211479905e-06, "loss": 1.3528, "step": 2048 }, { "epoch": 0.7939348165324381, "grad_norm": 0.17566578090190887, "learning_rate": 1.0496819846519307e-06, "loss": 1.4102, "step": 2049 }, { "epoch": 0.7943222908206433, "grad_norm": 0.1812853217124939, "learning_rate": 1.0458759488269405e-06, "loss": 1.3832, "step": 2050 }, { "epoch": 0.7947097651088485, "grad_norm": 0.1895604133605957, "learning_rate": 1.042076019550866e-06, "loss": 1.3942, "step": 2051 }, { "epoch": 0.7950972393970537, "grad_norm": 0.18721751868724823, "learning_rate": 1.0382822026921291e-06, "loss": 1.3771, "step": 2052 }, { "epoch": 0.7954847136852589, "grad_norm": 0.17143382132053375, "learning_rate": 1.0344945041097043e-06, "loss": 1.4093, "step": 2053 }, { "epoch": 0.795872187973464, "grad_norm": 0.1985645741224289, "learning_rate": 1.0307129296531248e-06, "loss": 1.3463, "step": 2054 }, { "epoch": 0.7962596622616692, "grad_norm": 0.1783856600522995, "learning_rate": 1.026937485162462e-06, "loss": 1.3534, "step": 2055 }, { "epoch": 0.7966471365498744, "grad_norm": 0.17570409178733826, "learning_rate": 1.0231681764683188e-06, "loss": 1.367, "step": 2056 }, { "epoch": 0.7970346108380797, "grad_norm": 0.2041919231414795, "learning_rate": 1.019405009391828e-06, "loss": 1.3457, "step": 2057 }, { "epoch": 0.7974220851262849, "grad_norm": 0.17341573536396027, "learning_rate": 1.0156479897446309e-06, "loss": 1.4028, "step": 2058 }, { "epoch": 0.79780955941449, "grad_norm": 0.19921515882015228, "learning_rate": 1.011897123328881e-06, "loss": 1.3612, "step": 2059 }, { "epoch": 0.7981970337026952, "grad_norm": 0.18647922575473785, "learning_rate": 1.0081524159372246e-06, "loss": 1.3474, "step": 2060 }, { "epoch": 0.7985845079909004, "grad_norm": 0.19360101222991943, "learning_rate": 1.0044138733528008e-06, "loss": 1.4233, "step": 2061 }, { "epoch": 0.7989719822791056, "grad_norm": 0.18398457765579224, "learning_rate": 1.0006815013492217e-06, "loss": 1.3881, "step": 2062 }, { "epoch": 0.7993594565673108, "grad_norm": 0.2141476571559906, "learning_rate": 9.969553056905774e-07, "loss": 1.324, "step": 2063 }, { "epoch": 0.799746930855516, "grad_norm": 0.20207369327545166, "learning_rate": 9.93235292131412e-07, "loss": 1.3717, "step": 2064 }, { "epoch": 0.8001344051437211, "grad_norm": 0.1810447871685028, "learning_rate": 9.895214664167296e-07, "loss": 1.4237, "step": 2065 }, { "epoch": 0.8005218794319263, "grad_norm": 0.18186503648757935, "learning_rate": 9.858138342819713e-07, "loss": 1.4194, "step": 2066 }, { "epoch": 0.8009093537201316, "grad_norm": 0.18787148594856262, "learning_rate": 9.821124014530191e-07, "loss": 1.3446, "step": 2067 }, { "epoch": 0.8012968280083368, "grad_norm": 0.16564732789993286, "learning_rate": 9.784171736461762e-07, "loss": 1.3483, "step": 2068 }, { "epoch": 0.801684302296542, "grad_norm": 0.18620049953460693, "learning_rate": 9.747281565681677e-07, "loss": 1.4002, "step": 2069 }, { "epoch": 0.8020717765847472, "grad_norm": 0.175953671336174, "learning_rate": 9.710453559161214e-07, "loss": 1.4135, "step": 2070 }, { "epoch": 0.8024592508729523, "grad_norm": 0.199793741106987, "learning_rate": 9.67368777377572e-07, "loss": 1.402, "step": 2071 }, { "epoch": 0.8028467251611575, "grad_norm": 0.20133745670318604, "learning_rate": 9.636984266304377e-07, "loss": 1.445, "step": 2072 }, { "epoch": 0.8032341994493627, "grad_norm": 0.1827705204486847, "learning_rate": 9.600343093430248e-07, "loss": 1.359, "step": 2073 }, { "epoch": 0.8036216737375679, "grad_norm": 0.1898559182882309, "learning_rate": 9.563764311740087e-07, "loss": 1.4087, "step": 2074 }, { "epoch": 0.8040091480257731, "grad_norm": 0.18602031469345093, "learning_rate": 9.52724797772433e-07, "loss": 1.4262, "step": 2075 }, { "epoch": 0.8043966223139782, "grad_norm": 0.20011256635189056, "learning_rate": 9.490794147776927e-07, "loss": 1.375, "step": 2076 }, { "epoch": 0.8047840966021835, "grad_norm": 0.1897335797548294, "learning_rate": 9.454402878195363e-07, "loss": 1.4026, "step": 2077 }, { "epoch": 0.8051715708903887, "grad_norm": 0.19902761280536652, "learning_rate": 9.418074225180435e-07, "loss": 1.4166, "step": 2078 }, { "epoch": 0.8055590451785939, "grad_norm": 0.1905522644519806, "learning_rate": 9.381808244836305e-07, "loss": 1.3883, "step": 2079 }, { "epoch": 0.8059465194667991, "grad_norm": 0.17259415984153748, "learning_rate": 9.345604993170299e-07, "loss": 1.347, "step": 2080 }, { "epoch": 0.8063339937550043, "grad_norm": 0.17366176843643188, "learning_rate": 9.309464526092876e-07, "loss": 1.3774, "step": 2081 }, { "epoch": 0.8067214680432094, "grad_norm": 0.21939122676849365, "learning_rate": 9.273386899417574e-07, "loss": 1.4525, "step": 2082 }, { "epoch": 0.8071089423314146, "grad_norm": 0.1839696168899536, "learning_rate": 9.23737216886082e-07, "loss": 1.4125, "step": 2083 }, { "epoch": 0.8074964166196198, "grad_norm": 0.18872377276420593, "learning_rate": 9.201420390041965e-07, "loss": 1.3612, "step": 2084 }, { "epoch": 0.807883890907825, "grad_norm": 0.17988112568855286, "learning_rate": 9.165531618483086e-07, "loss": 1.3706, "step": 2085 }, { "epoch": 0.8082713651960302, "grad_norm": 0.18984603881835938, "learning_rate": 9.129705909609027e-07, "loss": 1.4356, "step": 2086 }, { "epoch": 0.8086588394842354, "grad_norm": 0.17066648602485657, "learning_rate": 9.09394331874715e-07, "loss": 1.4205, "step": 2087 }, { "epoch": 0.8090463137724406, "grad_norm": 0.18167667090892792, "learning_rate": 9.058243901127434e-07, "loss": 1.3665, "step": 2088 }, { "epoch": 0.8094337880606458, "grad_norm": 0.2041814774274826, "learning_rate": 9.022607711882214e-07, "loss": 1.3778, "step": 2089 }, { "epoch": 0.809821262348851, "grad_norm": 0.18972869217395782, "learning_rate": 8.987034806046241e-07, "loss": 1.388, "step": 2090 }, { "epoch": 0.8102087366370562, "grad_norm": 0.20026427507400513, "learning_rate": 8.951525238556491e-07, "loss": 1.4052, "step": 2091 }, { "epoch": 0.8105962109252614, "grad_norm": 0.1770617514848709, "learning_rate": 8.916079064252164e-07, "loss": 1.4375, "step": 2092 }, { "epoch": 0.8109836852134665, "grad_norm": 0.19908426702022552, "learning_rate": 8.880696337874506e-07, "loss": 1.4175, "step": 2093 }, { "epoch": 0.8113711595016717, "grad_norm": 0.21062150597572327, "learning_rate": 8.845377114066844e-07, "loss": 1.3839, "step": 2094 }, { "epoch": 0.8117586337898769, "grad_norm": 0.18702737987041473, "learning_rate": 8.810121447374359e-07, "loss": 1.3963, "step": 2095 }, { "epoch": 0.8121461080780821, "grad_norm": 0.19425974786281586, "learning_rate": 8.774929392244158e-07, "loss": 1.3653, "step": 2096 }, { "epoch": 0.8125335823662874, "grad_norm": 0.19211648404598236, "learning_rate": 8.739801003025028e-07, "loss": 1.4115, "step": 2097 }, { "epoch": 0.8129210566544925, "grad_norm": 0.1723887324333191, "learning_rate": 8.704736333967506e-07, "loss": 1.3691, "step": 2098 }, { "epoch": 0.8133085309426977, "grad_norm": 0.1775594800710678, "learning_rate": 8.66973543922367e-07, "loss": 1.3735, "step": 2099 }, { "epoch": 0.8136960052309029, "grad_norm": 0.18804466724395752, "learning_rate": 8.634798372847148e-07, "loss": 1.4236, "step": 2100 }, { "epoch": 0.8140834795191081, "grad_norm": 0.1898200958967209, "learning_rate": 8.599925188792952e-07, "loss": 1.3889, "step": 2101 }, { "epoch": 0.8144709538073133, "grad_norm": 0.2067742943763733, "learning_rate": 8.56511594091749e-07, "loss": 1.4412, "step": 2102 }, { "epoch": 0.8148584280955185, "grad_norm": 0.19237643480300903, "learning_rate": 8.530370682978372e-07, "loss": 1.3497, "step": 2103 }, { "epoch": 0.8152459023837236, "grad_norm": 0.18965399265289307, "learning_rate": 8.495689468634444e-07, "loss": 1.3724, "step": 2104 }, { "epoch": 0.8156333766719288, "grad_norm": 0.21087372303009033, "learning_rate": 8.461072351445587e-07, "loss": 1.4148, "step": 2105 }, { "epoch": 0.816020850960134, "grad_norm": 0.19939257204532623, "learning_rate": 8.426519384872733e-07, "loss": 1.3578, "step": 2106 }, { "epoch": 0.8164083252483393, "grad_norm": 0.20584091544151306, "learning_rate": 8.392030622277758e-07, "loss": 1.3948, "step": 2107 }, { "epoch": 0.8167957995365445, "grad_norm": 0.18284158408641815, "learning_rate": 8.357606116923328e-07, "loss": 1.4138, "step": 2108 }, { "epoch": 0.8171832738247496, "grad_norm": 0.20052944123744965, "learning_rate": 8.323245921972928e-07, "loss": 1.3694, "step": 2109 }, { "epoch": 0.8175707481129548, "grad_norm": 0.1735670268535614, "learning_rate": 8.288950090490683e-07, "loss": 1.4173, "step": 2110 }, { "epoch": 0.81795822240116, "grad_norm": 0.1978902667760849, "learning_rate": 8.254718675441359e-07, "loss": 1.3698, "step": 2111 }, { "epoch": 0.8183456966893652, "grad_norm": 0.191811203956604, "learning_rate": 8.220551729690196e-07, "loss": 1.4516, "step": 2112 }, { "epoch": 0.8187331709775704, "grad_norm": 0.18897974491119385, "learning_rate": 8.18644930600292e-07, "loss": 1.3706, "step": 2113 }, { "epoch": 0.8191206452657755, "grad_norm": 0.18350236117839813, "learning_rate": 8.152411457045567e-07, "loss": 1.3544, "step": 2114 }, { "epoch": 0.8195081195539807, "grad_norm": 0.18616795539855957, "learning_rate": 8.118438235384479e-07, "loss": 1.3808, "step": 2115 }, { "epoch": 0.8198955938421859, "grad_norm": 0.1764839142560959, "learning_rate": 8.084529693486171e-07, "loss": 1.4106, "step": 2116 }, { "epoch": 0.8202830681303912, "grad_norm": 0.1984432339668274, "learning_rate": 8.050685883717286e-07, "loss": 1.3848, "step": 2117 }, { "epoch": 0.8206705424185964, "grad_norm": 0.1876644343137741, "learning_rate": 8.016906858344475e-07, "loss": 1.3533, "step": 2118 }, { "epoch": 0.8210580167068016, "grad_norm": 0.18554827570915222, "learning_rate": 7.983192669534379e-07, "loss": 1.3846, "step": 2119 }, { "epoch": 0.8214454909950067, "grad_norm": 0.1875038594007492, "learning_rate": 7.949543369353452e-07, "loss": 1.4064, "step": 2120 }, { "epoch": 0.8218329652832119, "grad_norm": 0.18036127090454102, "learning_rate": 7.915959009767998e-07, "loss": 1.3371, "step": 2121 }, { "epoch": 0.8222204395714171, "grad_norm": 0.18170301616191864, "learning_rate": 7.882439642643975e-07, "loss": 1.4244, "step": 2122 }, { "epoch": 0.8226079138596223, "grad_norm": 0.2010061889886856, "learning_rate": 7.848985319747022e-07, "loss": 1.4423, "step": 2123 }, { "epoch": 0.8229953881478275, "grad_norm": 0.19050715863704681, "learning_rate": 7.815596092742278e-07, "loss": 1.3554, "step": 2124 }, { "epoch": 0.8233828624360326, "grad_norm": 0.1828613132238388, "learning_rate": 7.782272013194397e-07, "loss": 1.3663, "step": 2125 }, { "epoch": 0.8237703367242378, "grad_norm": 0.18448325991630554, "learning_rate": 7.749013132567374e-07, "loss": 1.3755, "step": 2126 }, { "epoch": 0.8241578110124431, "grad_norm": 0.1927332729101181, "learning_rate": 7.715819502224564e-07, "loss": 1.414, "step": 2127 }, { "epoch": 0.8245452853006483, "grad_norm": 0.16939938068389893, "learning_rate": 7.682691173428503e-07, "loss": 1.3673, "step": 2128 }, { "epoch": 0.8249327595888535, "grad_norm": 0.21315915882587433, "learning_rate": 7.649628197340931e-07, "loss": 1.3686, "step": 2129 }, { "epoch": 0.8253202338770587, "grad_norm": 0.1892424076795578, "learning_rate": 7.616630625022609e-07, "loss": 1.3537, "step": 2130 }, { "epoch": 0.8257077081652638, "grad_norm": 0.17962083220481873, "learning_rate": 7.58369850743334e-07, "loss": 1.3944, "step": 2131 }, { "epoch": 0.826095182453469, "grad_norm": 0.18284156918525696, "learning_rate": 7.550831895431799e-07, "loss": 1.3092, "step": 2132 }, { "epoch": 0.8264826567416742, "grad_norm": 0.1986715942621231, "learning_rate": 7.518030839775536e-07, "loss": 1.3829, "step": 2133 }, { "epoch": 0.8268701310298794, "grad_norm": 0.18789222836494446, "learning_rate": 7.485295391120823e-07, "loss": 1.313, "step": 2134 }, { "epoch": 0.8272576053180846, "grad_norm": 0.18721547722816467, "learning_rate": 7.452625600022629e-07, "loss": 1.3803, "step": 2135 }, { "epoch": 0.8276450796062897, "grad_norm": 0.20958904922008514, "learning_rate": 7.420021516934539e-07, "loss": 1.3736, "step": 2136 }, { "epoch": 0.828032553894495, "grad_norm": 0.18238689005374908, "learning_rate": 7.387483192208627e-07, "loss": 1.4007, "step": 2137 }, { "epoch": 0.8284200281827002, "grad_norm": 0.18637992441654205, "learning_rate": 7.355010676095459e-07, "loss": 1.4034, "step": 2138 }, { "epoch": 0.8288075024709054, "grad_norm": 0.18678662180900574, "learning_rate": 7.322604018743923e-07, "loss": 1.3421, "step": 2139 }, { "epoch": 0.8291949767591106, "grad_norm": 0.19561077654361725, "learning_rate": 7.290263270201231e-07, "loss": 1.3741, "step": 2140 }, { "epoch": 0.8295824510473158, "grad_norm": 0.1857004314661026, "learning_rate": 7.257988480412786e-07, "loss": 1.4452, "step": 2141 }, { "epoch": 0.8299699253355209, "grad_norm": 0.20182818174362183, "learning_rate": 7.225779699222157e-07, "loss": 1.3742, "step": 2142 }, { "epoch": 0.8303573996237261, "grad_norm": 0.18142816424369812, "learning_rate": 7.193636976370933e-07, "loss": 1.4215, "step": 2143 }, { "epoch": 0.8307448739119313, "grad_norm": 0.17777538299560547, "learning_rate": 7.161560361498732e-07, "loss": 1.3813, "step": 2144 }, { "epoch": 0.8311323482001365, "grad_norm": 0.18798433244228363, "learning_rate": 7.129549904143024e-07, "loss": 1.3947, "step": 2145 }, { "epoch": 0.8315198224883417, "grad_norm": 0.19445791840553284, "learning_rate": 7.097605653739165e-07, "loss": 1.4068, "step": 2146 }, { "epoch": 0.831907296776547, "grad_norm": 0.18334689736366272, "learning_rate": 7.065727659620214e-07, "loss": 1.3401, "step": 2147 }, { "epoch": 0.8322947710647521, "grad_norm": 0.20408478379249573, "learning_rate": 7.033915971016952e-07, "loss": 1.4171, "step": 2148 }, { "epoch": 0.8326822453529573, "grad_norm": 0.1972094029188156, "learning_rate": 7.00217063705772e-07, "loss": 1.3666, "step": 2149 }, { "epoch": 0.8330697196411625, "grad_norm": 0.18835145235061646, "learning_rate": 6.970491706768423e-07, "loss": 1.3528, "step": 2150 }, { "epoch": 0.8334571939293677, "grad_norm": 0.18333540856838226, "learning_rate": 6.938879229072382e-07, "loss": 1.3525, "step": 2151 }, { "epoch": 0.8338446682175729, "grad_norm": 0.18176987767219543, "learning_rate": 6.907333252790316e-07, "loss": 1.3411, "step": 2152 }, { "epoch": 0.834232142505778, "grad_norm": 0.1868908703327179, "learning_rate": 6.875853826640222e-07, "loss": 1.3947, "step": 2153 }, { "epoch": 0.8346196167939832, "grad_norm": 0.19085721671581268, "learning_rate": 6.84444099923735e-07, "loss": 1.3921, "step": 2154 }, { "epoch": 0.8350070910821884, "grad_norm": 0.18766993284225464, "learning_rate": 6.813094819094052e-07, "loss": 1.4161, "step": 2155 }, { "epoch": 0.8353945653703936, "grad_norm": 0.2007542848587036, "learning_rate": 6.781815334619812e-07, "loss": 1.3563, "step": 2156 }, { "epoch": 0.8357820396585989, "grad_norm": 0.19007812440395355, "learning_rate": 6.750602594121053e-07, "loss": 1.3955, "step": 2157 }, { "epoch": 0.836169513946804, "grad_norm": 0.19417883455753326, "learning_rate": 6.719456645801175e-07, "loss": 1.3927, "step": 2158 }, { "epoch": 0.8365569882350092, "grad_norm": 0.18135857582092285, "learning_rate": 6.688377537760393e-07, "loss": 1.3999, "step": 2159 }, { "epoch": 0.8369444625232144, "grad_norm": 0.18324914574623108, "learning_rate": 6.657365317995696e-07, "loss": 1.3743, "step": 2160 }, { "epoch": 0.8373319368114196, "grad_norm": 0.18201661109924316, "learning_rate": 6.626420034400804e-07, "loss": 1.3606, "step": 2161 }, { "epoch": 0.8377194110996248, "grad_norm": 0.22554682195186615, "learning_rate": 6.595541734766031e-07, "loss": 1.3714, "step": 2162 }, { "epoch": 0.83810688538783, "grad_norm": 0.172357439994812, "learning_rate": 6.564730466778274e-07, "loss": 1.4051, "step": 2163 }, { "epoch": 0.8384943596760351, "grad_norm": 0.19340857863426208, "learning_rate": 6.533986278020876e-07, "loss": 1.3499, "step": 2164 }, { "epoch": 0.8388818339642403, "grad_norm": 0.20012931525707245, "learning_rate": 6.503309215973624e-07, "loss": 1.4055, "step": 2165 }, { "epoch": 0.8392693082524455, "grad_norm": 0.18399831652641296, "learning_rate": 6.472699328012605e-07, "loss": 1.4027, "step": 2166 }, { "epoch": 0.8396567825406508, "grad_norm": 0.20474746823310852, "learning_rate": 6.442156661410193e-07, "loss": 1.3678, "step": 2167 }, { "epoch": 0.840044256828856, "grad_norm": 0.186457559466362, "learning_rate": 6.411681263334913e-07, "loss": 1.3929, "step": 2168 }, { "epoch": 0.8404317311170612, "grad_norm": 0.17734526097774506, "learning_rate": 6.381273180851455e-07, "loss": 1.3853, "step": 2169 }, { "epoch": 0.8408192054052663, "grad_norm": 0.19623078405857086, "learning_rate": 6.350932460920494e-07, "loss": 1.3719, "step": 2170 }, { "epoch": 0.8412066796934715, "grad_norm": 0.1726803183555603, "learning_rate": 6.320659150398728e-07, "loss": 1.4109, "step": 2171 }, { "epoch": 0.8415941539816767, "grad_norm": 0.1902024894952774, "learning_rate": 6.290453296038702e-07, "loss": 1.3496, "step": 2172 }, { "epoch": 0.8419816282698819, "grad_norm": 0.1820477992296219, "learning_rate": 6.260314944488822e-07, "loss": 1.3412, "step": 2173 }, { "epoch": 0.8423691025580871, "grad_norm": 0.17347818613052368, "learning_rate": 6.230244142293218e-07, "loss": 1.355, "step": 2174 }, { "epoch": 0.8427565768462922, "grad_norm": 0.18612945079803467, "learning_rate": 6.200240935891733e-07, "loss": 1.355, "step": 2175 }, { "epoch": 0.8431440511344974, "grad_norm": 0.18403567373752594, "learning_rate": 6.170305371619773e-07, "loss": 1.4044, "step": 2176 }, { "epoch": 0.8435315254227027, "grad_norm": 0.19498930871486664, "learning_rate": 6.140437495708335e-07, "loss": 1.3947, "step": 2177 }, { "epoch": 0.8439189997109079, "grad_norm": 0.17546892166137695, "learning_rate": 6.110637354283832e-07, "loss": 1.3925, "step": 2178 }, { "epoch": 0.8443064739991131, "grad_norm": 0.17703303694725037, "learning_rate": 6.080904993368114e-07, "loss": 1.3769, "step": 2179 }, { "epoch": 0.8446939482873183, "grad_norm": 0.1767992377281189, "learning_rate": 6.051240458878316e-07, "loss": 1.4326, "step": 2180 }, { "epoch": 0.8450814225755234, "grad_norm": 0.2026299387216568, "learning_rate": 6.021643796626852e-07, "loss": 1.4592, "step": 2181 }, { "epoch": 0.8454688968637286, "grad_norm": 0.18758268654346466, "learning_rate": 5.992115052321295e-07, "loss": 1.3923, "step": 2182 }, { "epoch": 0.8458563711519338, "grad_norm": 0.17891953885555267, "learning_rate": 5.962654271564367e-07, "loss": 1.4057, "step": 2183 }, { "epoch": 0.846243845440139, "grad_norm": 0.17969344556331635, "learning_rate": 5.933261499853777e-07, "loss": 1.3928, "step": 2184 }, { "epoch": 0.8466313197283442, "grad_norm": 0.18647554516792297, "learning_rate": 5.903936782582253e-07, "loss": 1.3417, "step": 2185 }, { "epoch": 0.8470187940165493, "grad_norm": 0.20736242830753326, "learning_rate": 5.874680165037399e-07, "loss": 1.3682, "step": 2186 }, { "epoch": 0.8474062683047546, "grad_norm": 0.17584945261478424, "learning_rate": 5.845491692401645e-07, "loss": 1.3625, "step": 2187 }, { "epoch": 0.8477937425929598, "grad_norm": 0.2027437388896942, "learning_rate": 5.816371409752203e-07, "loss": 1.4608, "step": 2188 }, { "epoch": 0.848181216881165, "grad_norm": 0.20891185104846954, "learning_rate": 5.787319362060944e-07, "loss": 1.4032, "step": 2189 }, { "epoch": 0.8485686911693702, "grad_norm": 0.19032149016857147, "learning_rate": 5.758335594194403e-07, "loss": 1.3692, "step": 2190 }, { "epoch": 0.8489561654575754, "grad_norm": 0.19217224419116974, "learning_rate": 5.729420150913617e-07, "loss": 1.3581, "step": 2191 }, { "epoch": 0.8493436397457805, "grad_norm": 0.18971851468086243, "learning_rate": 5.700573076874155e-07, "loss": 1.3483, "step": 2192 }, { "epoch": 0.8497311140339857, "grad_norm": 0.1926295906305313, "learning_rate": 5.671794416625964e-07, "loss": 1.3826, "step": 2193 }, { "epoch": 0.8501185883221909, "grad_norm": 0.17854365706443787, "learning_rate": 5.64308421461336e-07, "loss": 1.3989, "step": 2194 }, { "epoch": 0.8505060626103961, "grad_norm": 0.18496280908584595, "learning_rate": 5.614442515174906e-07, "loss": 1.3585, "step": 2195 }, { "epoch": 0.8508935368986013, "grad_norm": 0.17855706810951233, "learning_rate": 5.585869362543416e-07, "loss": 1.4449, "step": 2196 }, { "epoch": 0.8512810111868065, "grad_norm": 0.19106937944889069, "learning_rate": 5.557364800845793e-07, "loss": 1.3857, "step": 2197 }, { "epoch": 0.8516684854750117, "grad_norm": 0.19243881106376648, "learning_rate": 5.528928874103074e-07, "loss": 1.4035, "step": 2198 }, { "epoch": 0.8520559597632169, "grad_norm": 0.18587085604667664, "learning_rate": 5.500561626230222e-07, "loss": 1.3625, "step": 2199 }, { "epoch": 0.8524434340514221, "grad_norm": 0.1924731284379959, "learning_rate": 5.472263101036212e-07, "loss": 1.4325, "step": 2200 }, { "epoch": 0.8528309083396273, "grad_norm": 0.17984230816364288, "learning_rate": 5.444033342223832e-07, "loss": 1.4107, "step": 2201 }, { "epoch": 0.8532183826278325, "grad_norm": 0.1888173371553421, "learning_rate": 5.41587239338971e-07, "loss": 1.3489, "step": 2202 }, { "epoch": 0.8536058569160376, "grad_norm": 0.19261683523654938, "learning_rate": 5.38778029802417e-07, "loss": 1.3745, "step": 2203 }, { "epoch": 0.8539933312042428, "grad_norm": 0.19721238315105438, "learning_rate": 5.359757099511237e-07, "loss": 1.3864, "step": 2204 }, { "epoch": 0.854380805492448, "grad_norm": 0.1889958679676056, "learning_rate": 5.331802841128503e-07, "loss": 1.4102, "step": 2205 }, { "epoch": 0.8547682797806532, "grad_norm": 0.17564605176448822, "learning_rate": 5.303917566047129e-07, "loss": 1.3555, "step": 2206 }, { "epoch": 0.8551557540688585, "grad_norm": 0.17853306233882904, "learning_rate": 5.276101317331706e-07, "loss": 1.3519, "step": 2207 }, { "epoch": 0.8555432283570636, "grad_norm": 0.1821247935295105, "learning_rate": 5.248354137940248e-07, "loss": 1.4081, "step": 2208 }, { "epoch": 0.8559307026452688, "grad_norm": 0.21494100987911224, "learning_rate": 5.220676070724079e-07, "loss": 1.3916, "step": 2209 }, { "epoch": 0.856318176933474, "grad_norm": 0.19391751289367676, "learning_rate": 5.193067158427833e-07, "loss": 1.3834, "step": 2210 }, { "epoch": 0.8567056512216792, "grad_norm": 0.19965922832489014, "learning_rate": 5.165527443689283e-07, "loss": 1.3783, "step": 2211 }, { "epoch": 0.8570931255098844, "grad_norm": 0.1913292557001114, "learning_rate": 5.138056969039384e-07, "loss": 1.3615, "step": 2212 }, { "epoch": 0.8574805997980895, "grad_norm": 0.192038893699646, "learning_rate": 5.110655776902157e-07, "loss": 1.3784, "step": 2213 }, { "epoch": 0.8578680740862947, "grad_norm": 0.17227479815483093, "learning_rate": 5.083323909594601e-07, "loss": 1.4334, "step": 2214 }, { "epoch": 0.8582555483744999, "grad_norm": 0.18706350028514862, "learning_rate": 5.056061409326674e-07, "loss": 1.3883, "step": 2215 }, { "epoch": 0.8586430226627051, "grad_norm": 0.22453884780406952, "learning_rate": 5.028868318201191e-07, "loss": 1.3773, "step": 2216 }, { "epoch": 0.8590304969509104, "grad_norm": 0.18744371831417084, "learning_rate": 5.001744678213799e-07, "loss": 1.4239, "step": 2217 }, { "epoch": 0.8594179712391156, "grad_norm": 0.19523978233337402, "learning_rate": 4.974690531252863e-07, "loss": 1.375, "step": 2218 }, { "epoch": 0.8598054455273207, "grad_norm": 0.18941566348075867, "learning_rate": 4.947705919099444e-07, "loss": 1.3764, "step": 2219 }, { "epoch": 0.8601929198155259, "grad_norm": 0.19355204701423645, "learning_rate": 4.920790883427201e-07, "loss": 1.3968, "step": 2220 }, { "epoch": 0.8605803941037311, "grad_norm": 0.19370625913143158, "learning_rate": 4.893945465802369e-07, "loss": 1.3409, "step": 2221 }, { "epoch": 0.8609678683919363, "grad_norm": 0.18125969171524048, "learning_rate": 4.867169707683627e-07, "loss": 1.3763, "step": 2222 }, { "epoch": 0.8613553426801415, "grad_norm": 0.17867474257946014, "learning_rate": 4.840463650422123e-07, "loss": 1.3898, "step": 2223 }, { "epoch": 0.8617428169683466, "grad_norm": 0.19338251650333405, "learning_rate": 4.813827335261323e-07, "loss": 1.4302, "step": 2224 }, { "epoch": 0.8621302912565518, "grad_norm": 0.19936856627464294, "learning_rate": 4.787260803337018e-07, "loss": 1.409, "step": 2225 }, { "epoch": 0.862517765544757, "grad_norm": 0.20847347378730774, "learning_rate": 4.760764095677195e-07, "loss": 1.3665, "step": 2226 }, { "epoch": 0.8629052398329623, "grad_norm": 0.18103733658790588, "learning_rate": 4.734337253202048e-07, "loss": 1.4435, "step": 2227 }, { "epoch": 0.8632927141211675, "grad_norm": 0.1873212456703186, "learning_rate": 4.707980316723837e-07, "loss": 1.4385, "step": 2228 }, { "epoch": 0.8636801884093727, "grad_norm": 0.18973317742347717, "learning_rate": 4.6816933269468957e-07, "loss": 1.3807, "step": 2229 }, { "epoch": 0.8640676626975778, "grad_norm": 0.21119017899036407, "learning_rate": 4.6554763244675003e-07, "loss": 1.4249, "step": 2230 }, { "epoch": 0.864455136985783, "grad_norm": 0.1995018571615219, "learning_rate": 4.629329349773881e-07, "loss": 1.3706, "step": 2231 }, { "epoch": 0.8648426112739882, "grad_norm": 0.18122310936450958, "learning_rate": 4.603252443246081e-07, "loss": 1.392, "step": 2232 }, { "epoch": 0.8652300855621934, "grad_norm": 0.19559161365032196, "learning_rate": 4.577245645155981e-07, "loss": 1.409, "step": 2233 }, { "epoch": 0.8656175598503986, "grad_norm": 0.20451034605503082, "learning_rate": 4.5513089956671365e-07, "loss": 1.3494, "step": 2234 }, { "epoch": 0.8660050341386037, "grad_norm": 0.19394412636756897, "learning_rate": 4.525442534834812e-07, "loss": 1.3646, "step": 2235 }, { "epoch": 0.866392508426809, "grad_norm": 0.1905307173728943, "learning_rate": 4.4996463026058476e-07, "loss": 1.4141, "step": 2236 }, { "epoch": 0.8667799827150142, "grad_norm": 0.19262070953845978, "learning_rate": 4.473920338818649e-07, "loss": 1.4199, "step": 2237 }, { "epoch": 0.8671674570032194, "grad_norm": 0.19115054607391357, "learning_rate": 4.4482646832030905e-07, "loss": 1.4053, "step": 2238 }, { "epoch": 0.8675549312914246, "grad_norm": 0.21004930138587952, "learning_rate": 4.422679375380451e-07, "loss": 1.3328, "step": 2239 }, { "epoch": 0.8679424055796298, "grad_norm": 0.1859438419342041, "learning_rate": 4.3971644548634116e-07, "loss": 1.3723, "step": 2240 }, { "epoch": 0.8683298798678349, "grad_norm": 0.1804945021867752, "learning_rate": 4.3717199610558925e-07, "loss": 1.3821, "step": 2241 }, { "epoch": 0.8687173541560401, "grad_norm": 0.18882215023040771, "learning_rate": 4.34634593325311e-07, "loss": 1.3362, "step": 2242 }, { "epoch": 0.8691048284442453, "grad_norm": 0.16948330402374268, "learning_rate": 4.321042410641402e-07, "loss": 1.4166, "step": 2243 }, { "epoch": 0.8694923027324505, "grad_norm": 0.18349164724349976, "learning_rate": 4.2958094322982703e-07, "loss": 1.3684, "step": 2244 }, { "epoch": 0.8698797770206557, "grad_norm": 0.17899218201637268, "learning_rate": 4.2706470371922327e-07, "loss": 1.3826, "step": 2245 }, { "epoch": 0.870267251308861, "grad_norm": 0.20072785019874573, "learning_rate": 4.2455552641828266e-07, "loss": 1.4135, "step": 2246 }, { "epoch": 0.8706547255970661, "grad_norm": 0.18715061247348785, "learning_rate": 4.2205341520205055e-07, "loss": 1.4028, "step": 2247 }, { "epoch": 0.8710421998852713, "grad_norm": 0.17559021711349487, "learning_rate": 4.1955837393466206e-07, "loss": 1.4198, "step": 2248 }, { "epoch": 0.8714296741734765, "grad_norm": 0.20305302739143372, "learning_rate": 4.170704064693315e-07, "loss": 1.366, "step": 2249 }, { "epoch": 0.8718171484616817, "grad_norm": 0.19841820001602173, "learning_rate": 4.1458951664835046e-07, "loss": 1.3942, "step": 2250 }, { "epoch": 0.8722046227498869, "grad_norm": 0.16927757859230042, "learning_rate": 4.1211570830307914e-07, "loss": 1.4161, "step": 2251 }, { "epoch": 0.872592097038092, "grad_norm": 0.20701687037944794, "learning_rate": 4.096489852539426e-07, "loss": 1.378, "step": 2252 }, { "epoch": 0.8729795713262972, "grad_norm": 0.1815432757139206, "learning_rate": 4.071893513104214e-07, "loss": 1.3994, "step": 2253 }, { "epoch": 0.8733670456145024, "grad_norm": 0.19866657257080078, "learning_rate": 4.0473681027105137e-07, "loss": 1.3876, "step": 2254 }, { "epoch": 0.8737545199027076, "grad_norm": 0.19193154573440552, "learning_rate": 4.0229136592341124e-07, "loss": 1.3914, "step": 2255 }, { "epoch": 0.8741419941909129, "grad_norm": 0.19163289666175842, "learning_rate": 3.9985302204412266e-07, "loss": 1.392, "step": 2256 }, { "epoch": 0.874529468479118, "grad_norm": 0.1814761906862259, "learning_rate": 3.974217823988391e-07, "loss": 1.3598, "step": 2257 }, { "epoch": 0.8749169427673232, "grad_norm": 0.18263262510299683, "learning_rate": 3.9499765074224595e-07, "loss": 1.4015, "step": 2258 }, { "epoch": 0.8753044170555284, "grad_norm": 0.20960010588169098, "learning_rate": 3.925806308180469e-07, "loss": 1.3949, "step": 2259 }, { "epoch": 0.8756918913437336, "grad_norm": 0.1961923986673355, "learning_rate": 3.9017072635896716e-07, "loss": 1.3945, "step": 2260 }, { "epoch": 0.8760793656319388, "grad_norm": 0.19517390429973602, "learning_rate": 3.877679410867391e-07, "loss": 1.3493, "step": 2261 }, { "epoch": 0.876466839920144, "grad_norm": 0.20391945540905, "learning_rate": 3.85372278712105e-07, "loss": 1.4068, "step": 2262 }, { "epoch": 0.8768543142083491, "grad_norm": 0.18867452442646027, "learning_rate": 3.829837429348021e-07, "loss": 1.3949, "step": 2263 }, { "epoch": 0.8772417884965543, "grad_norm": 0.19683776795864105, "learning_rate": 3.8060233744356634e-07, "loss": 1.4076, "step": 2264 }, { "epoch": 0.8776292627847595, "grad_norm": 0.17153973877429962, "learning_rate": 3.782280659161186e-07, "loss": 1.3565, "step": 2265 }, { "epoch": 0.8780167370729648, "grad_norm": 0.19019557535648346, "learning_rate": 3.758609320191631e-07, "loss": 1.3805, "step": 2266 }, { "epoch": 0.87840421136117, "grad_norm": 0.20609362423419952, "learning_rate": 3.735009394083822e-07, "loss": 1.4251, "step": 2267 }, { "epoch": 0.8787916856493752, "grad_norm": 0.18304771184921265, "learning_rate": 3.7114809172842827e-07, "loss": 1.3867, "step": 2268 }, { "epoch": 0.8791791599375803, "grad_norm": 0.1756540834903717, "learning_rate": 3.6880239261292137e-07, "loss": 1.3335, "step": 2269 }, { "epoch": 0.8795666342257855, "grad_norm": 0.17781776189804077, "learning_rate": 3.664638456844394e-07, "loss": 1.3732, "step": 2270 }, { "epoch": 0.8799541085139907, "grad_norm": 0.1821458488702774, "learning_rate": 3.641324545545166e-07, "loss": 1.3326, "step": 2271 }, { "epoch": 0.8803415828021959, "grad_norm": 0.20749196410179138, "learning_rate": 3.618082228236336e-07, "loss": 1.3686, "step": 2272 }, { "epoch": 0.8807290570904011, "grad_norm": 0.18168814480304718, "learning_rate": 3.594911540812185e-07, "loss": 1.3912, "step": 2273 }, { "epoch": 0.8811165313786062, "grad_norm": 0.18250681459903717, "learning_rate": 3.5718125190563334e-07, "loss": 1.4073, "step": 2274 }, { "epoch": 0.8815040056668114, "grad_norm": 0.1817607879638672, "learning_rate": 3.5487851986417466e-07, "loss": 1.4027, "step": 2275 }, { "epoch": 0.8818914799550167, "grad_norm": 0.18076026439666748, "learning_rate": 3.5258296151306495e-07, "loss": 1.3746, "step": 2276 }, { "epoch": 0.8822789542432219, "grad_norm": 0.21720124781131744, "learning_rate": 3.502945803974489e-07, "loss": 1.3925, "step": 2277 }, { "epoch": 0.8826664285314271, "grad_norm": 0.20023687183856964, "learning_rate": 3.4801338005138465e-07, "loss": 1.4462, "step": 2278 }, { "epoch": 0.8830539028196323, "grad_norm": 0.19758468866348267, "learning_rate": 3.4573936399784514e-07, "loss": 1.4434, "step": 2279 }, { "epoch": 0.8834413771078374, "grad_norm": 0.17449496686458588, "learning_rate": 3.434725357487029e-07, "loss": 1.4305, "step": 2280 }, { "epoch": 0.8838288513960426, "grad_norm": 0.18172089755535126, "learning_rate": 3.412128988047347e-07, "loss": 1.3504, "step": 2281 }, { "epoch": 0.8842163256842478, "grad_norm": 0.18336263298988342, "learning_rate": 3.389604566556082e-07, "loss": 1.3907, "step": 2282 }, { "epoch": 0.884603799972453, "grad_norm": 0.19550377130508423, "learning_rate": 3.367152127798817e-07, "loss": 1.3848, "step": 2283 }, { "epoch": 0.8849912742606582, "grad_norm": 0.19315418601036072, "learning_rate": 3.3447717064499565e-07, "loss": 1.3283, "step": 2284 }, { "epoch": 0.8853787485488633, "grad_norm": 0.2042272835969925, "learning_rate": 3.3224633370726956e-07, "loss": 1.3999, "step": 2285 }, { "epoch": 0.8857662228370686, "grad_norm": 0.19872333109378815, "learning_rate": 3.3002270541189376e-07, "loss": 1.3963, "step": 2286 }, { "epoch": 0.8861536971252738, "grad_norm": 0.19852720201015472, "learning_rate": 3.2780628919292844e-07, "loss": 1.399, "step": 2287 }, { "epoch": 0.886541171413479, "grad_norm": 0.19084501266479492, "learning_rate": 3.25597088473294e-07, "loss": 1.4307, "step": 2288 }, { "epoch": 0.8869286457016842, "grad_norm": 0.18562327325344086, "learning_rate": 3.233951066647684e-07, "loss": 1.3683, "step": 2289 }, { "epoch": 0.8873161199898894, "grad_norm": 0.1994224488735199, "learning_rate": 3.212003471679803e-07, "loss": 1.3445, "step": 2290 }, { "epoch": 0.8877035942780945, "grad_norm": 0.19138382375240326, "learning_rate": 3.1901281337240575e-07, "loss": 1.4589, "step": 2291 }, { "epoch": 0.8880910685662997, "grad_norm": 0.20558443665504456, "learning_rate": 3.168325086563612e-07, "loss": 1.4261, "step": 2292 }, { "epoch": 0.8884785428545049, "grad_norm": 0.1788492500782013, "learning_rate": 3.1465943638699814e-07, "loss": 1.4042, "step": 2293 }, { "epoch": 0.8888660171427101, "grad_norm": 0.17853665351867676, "learning_rate": 3.124935999202999e-07, "loss": 1.3882, "step": 2294 }, { "epoch": 0.8892534914309153, "grad_norm": 0.20118752121925354, "learning_rate": 3.1033500260107373e-07, "loss": 1.3907, "step": 2295 }, { "epoch": 0.8896409657191205, "grad_norm": 0.18405501544475555, "learning_rate": 3.081836477629491e-07, "loss": 1.4396, "step": 2296 }, { "epoch": 0.8900284400073257, "grad_norm": 0.18373125791549683, "learning_rate": 3.060395387283688e-07, "loss": 1.3831, "step": 2297 }, { "epoch": 0.8904159142955309, "grad_norm": 0.1796003133058548, "learning_rate": 3.039026788085869e-07, "loss": 1.3704, "step": 2298 }, { "epoch": 0.8908033885837361, "grad_norm": 0.18761853873729706, "learning_rate": 3.0177307130366095e-07, "loss": 1.3898, "step": 2299 }, { "epoch": 0.8911908628719413, "grad_norm": 0.1788151115179062, "learning_rate": 2.996507195024495e-07, "loss": 1.4091, "step": 2300 }, { "epoch": 0.8915783371601465, "grad_norm": 0.1960129737854004, "learning_rate": 2.9753562668260407e-07, "loss": 1.4282, "step": 2301 }, { "epoch": 0.8919658114483516, "grad_norm": 0.18485024571418762, "learning_rate": 2.954277961105684e-07, "loss": 1.403, "step": 2302 }, { "epoch": 0.8923532857365568, "grad_norm": 0.17966486513614655, "learning_rate": 2.9332723104156746e-07, "loss": 1.4203, "step": 2303 }, { "epoch": 0.892740760024762, "grad_norm": 0.18248330056667328, "learning_rate": 2.912339347196097e-07, "loss": 1.3816, "step": 2304 }, { "epoch": 0.8931282343129672, "grad_norm": 0.1792861521244049, "learning_rate": 2.8914791037747415e-07, "loss": 1.3974, "step": 2305 }, { "epoch": 0.8935157086011725, "grad_norm": 0.1979595273733139, "learning_rate": 2.870691612367127e-07, "loss": 1.3909, "step": 2306 }, { "epoch": 0.8939031828893776, "grad_norm": 0.17562276124954224, "learning_rate": 2.849976905076385e-07, "loss": 1.3805, "step": 2307 }, { "epoch": 0.8942906571775828, "grad_norm": 0.20317678153514862, "learning_rate": 2.8293350138932805e-07, "loss": 1.3899, "step": 2308 }, { "epoch": 0.894678131465788, "grad_norm": 0.17962642014026642, "learning_rate": 2.808765970696081e-07, "loss": 1.3388, "step": 2309 }, { "epoch": 0.8950656057539932, "grad_norm": 0.18500442802906036, "learning_rate": 2.7882698072505985e-07, "loss": 1.3735, "step": 2310 }, { "epoch": 0.8954530800421984, "grad_norm": 0.20137101411819458, "learning_rate": 2.7678465552100464e-07, "loss": 1.4195, "step": 2311 }, { "epoch": 0.8958405543304035, "grad_norm": 0.18785245716571808, "learning_rate": 2.7474962461150835e-07, "loss": 1.3178, "step": 2312 }, { "epoch": 0.8962280286186087, "grad_norm": 0.1813557893037796, "learning_rate": 2.727218911393681e-07, "loss": 1.3806, "step": 2313 }, { "epoch": 0.8966155029068139, "grad_norm": 0.17787601053714752, "learning_rate": 2.7070145823611393e-07, "loss": 1.3691, "step": 2314 }, { "epoch": 0.8970029771950191, "grad_norm": 0.1896829456090927, "learning_rate": 2.686883290219988e-07, "loss": 1.364, "step": 2315 }, { "epoch": 0.8973904514832244, "grad_norm": 0.19009171426296234, "learning_rate": 2.666825066059986e-07, "loss": 1.3523, "step": 2316 }, { "epoch": 0.8977779257714296, "grad_norm": 0.19092103838920593, "learning_rate": 2.646839940858026e-07, "loss": 1.377, "step": 2317 }, { "epoch": 0.8981654000596347, "grad_norm": 0.19161881506443024, "learning_rate": 2.626927945478136e-07, "loss": 1.4251, "step": 2318 }, { "epoch": 0.8985528743478399, "grad_norm": 0.19761398434638977, "learning_rate": 2.6070891106713904e-07, "loss": 1.3815, "step": 2319 }, { "epoch": 0.8989403486360451, "grad_norm": 0.2209191918373108, "learning_rate": 2.5873234670758753e-07, "loss": 1.3518, "step": 2320 }, { "epoch": 0.8993278229242503, "grad_norm": 0.1952473670244217, "learning_rate": 2.567631045216662e-07, "loss": 1.393, "step": 2321 }, { "epoch": 0.8997152972124555, "grad_norm": 0.19823935627937317, "learning_rate": 2.548011875505707e-07, "loss": 1.408, "step": 2322 }, { "epoch": 0.9001027715006606, "grad_norm": 0.1793571561574936, "learning_rate": 2.5284659882418904e-07, "loss": 1.4282, "step": 2323 }, { "epoch": 0.9004902457888658, "grad_norm": 0.18608565628528595, "learning_rate": 2.5089934136108665e-07, "loss": 1.4339, "step": 2324 }, { "epoch": 0.900877720077071, "grad_norm": 0.2039819210767746, "learning_rate": 2.489594181685107e-07, "loss": 1.3572, "step": 2325 }, { "epoch": 0.9012651943652763, "grad_norm": 0.17617961764335632, "learning_rate": 2.4702683224237965e-07, "loss": 1.3946, "step": 2326 }, { "epoch": 0.9016526686534815, "grad_norm": 0.19163084030151367, "learning_rate": 2.4510158656728234e-07, "loss": 1.4227, "step": 2327 }, { "epoch": 0.9020401429416867, "grad_norm": 0.19425897300243378, "learning_rate": 2.431836841164686e-07, "loss": 1.4138, "step": 2328 }, { "epoch": 0.9024276172298918, "grad_norm": 0.19779826700687408, "learning_rate": 2.412731278518526e-07, "loss": 1.403, "step": 2329 }, { "epoch": 0.902815091518097, "grad_norm": 0.18723465502262115, "learning_rate": 2.39369920723998e-07, "loss": 1.4152, "step": 2330 }, { "epoch": 0.9032025658063022, "grad_norm": 0.19121690094470978, "learning_rate": 2.3747406567212416e-07, "loss": 1.3637, "step": 2331 }, { "epoch": 0.9035900400945074, "grad_norm": 0.21122907102108002, "learning_rate": 2.3558556562409074e-07, "loss": 1.3914, "step": 2332 }, { "epoch": 0.9039775143827126, "grad_norm": 0.19267496466636658, "learning_rate": 2.337044234964042e-07, "loss": 1.3756, "step": 2333 }, { "epoch": 0.9043649886709177, "grad_norm": 0.19919922947883606, "learning_rate": 2.3183064219420293e-07, "loss": 1.371, "step": 2334 }, { "epoch": 0.9047524629591229, "grad_norm": 0.1856105923652649, "learning_rate": 2.2996422461126156e-07, "loss": 1.4026, "step": 2335 }, { "epoch": 0.9051399372473282, "grad_norm": 0.1857329159975052, "learning_rate": 2.2810517362997997e-07, "loss": 1.4523, "step": 2336 }, { "epoch": 0.9055274115355334, "grad_norm": 0.19401539862155914, "learning_rate": 2.262534921213827e-07, "loss": 1.362, "step": 2337 }, { "epoch": 0.9059148858237386, "grad_norm": 0.1994173526763916, "learning_rate": 2.2440918294511283e-07, "loss": 1.4029, "step": 2338 }, { "epoch": 0.9063023601119438, "grad_norm": 0.19102615118026733, "learning_rate": 2.2257224894942808e-07, "loss": 1.4039, "step": 2339 }, { "epoch": 0.9066898344001489, "grad_norm": 0.17196735739707947, "learning_rate": 2.2074269297119588e-07, "loss": 1.3558, "step": 2340 }, { "epoch": 0.9070773086883541, "grad_norm": 0.19711962342262268, "learning_rate": 2.1892051783589052e-07, "loss": 1.3987, "step": 2341 }, { "epoch": 0.9074647829765593, "grad_norm": 0.18971295654773712, "learning_rate": 2.1710572635758597e-07, "loss": 1.3765, "step": 2342 }, { "epoch": 0.9078522572647645, "grad_norm": 0.18122664093971252, "learning_rate": 2.152983213389559e-07, "loss": 1.3895, "step": 2343 }, { "epoch": 0.9082397315529697, "grad_norm": 0.19917121529579163, "learning_rate": 2.134983055712636e-07, "loss": 1.3824, "step": 2344 }, { "epoch": 0.9086272058411748, "grad_norm": 0.18983851373195648, "learning_rate": 2.1170568183436214e-07, "loss": 1.3499, "step": 2345 }, { "epoch": 0.9090146801293801, "grad_norm": 0.1867242157459259, "learning_rate": 2.0992045289669027e-07, "loss": 1.4299, "step": 2346 }, { "epoch": 0.9094021544175853, "grad_norm": 0.19261285662651062, "learning_rate": 2.0814262151526376e-07, "loss": 1.3741, "step": 2347 }, { "epoch": 0.9097896287057905, "grad_norm": 0.16974659264087677, "learning_rate": 2.0637219043567636e-07, "loss": 1.3674, "step": 2348 }, { "epoch": 0.9101771029939957, "grad_norm": 0.17933203279972076, "learning_rate": 2.046091623920915e-07, "loss": 1.3663, "step": 2349 }, { "epoch": 0.9105645772822009, "grad_norm": 0.19478751718997955, "learning_rate": 2.0285354010724067e-07, "loss": 1.4375, "step": 2350 }, { "epoch": 0.910952051570406, "grad_norm": 0.19045008718967438, "learning_rate": 2.011053262924173e-07, "loss": 1.3658, "step": 2351 }, { "epoch": 0.9113395258586112, "grad_norm": 0.20001545548439026, "learning_rate": 1.9936452364747561e-07, "loss": 1.3877, "step": 2352 }, { "epoch": 0.9117270001468164, "grad_norm": 0.18265724182128906, "learning_rate": 1.9763113486082231e-07, "loss": 1.379, "step": 2353 }, { "epoch": 0.9121144744350216, "grad_norm": 0.1790803223848343, "learning_rate": 1.9590516260941494e-07, "loss": 1.3761, "step": 2354 }, { "epoch": 0.9125019487232268, "grad_norm": 0.19919297099113464, "learning_rate": 1.9418660955875802e-07, "loss": 1.3844, "step": 2355 }, { "epoch": 0.912889423011432, "grad_norm": 0.17883121967315674, "learning_rate": 1.9247547836289792e-07, "loss": 1.3669, "step": 2356 }, { "epoch": 0.9132768972996372, "grad_norm": 0.1922868937253952, "learning_rate": 1.9077177166441863e-07, "loss": 1.3776, "step": 2357 }, { "epoch": 0.9136643715878424, "grad_norm": 0.19339559972286224, "learning_rate": 1.8907549209443875e-07, "loss": 1.4236, "step": 2358 }, { "epoch": 0.9140518458760476, "grad_norm": 0.18768443167209625, "learning_rate": 1.8738664227260674e-07, "loss": 1.3773, "step": 2359 }, { "epoch": 0.9144393201642528, "grad_norm": 0.19161351025104523, "learning_rate": 1.857052248070962e-07, "loss": 1.3485, "step": 2360 }, { "epoch": 0.914826794452458, "grad_norm": 0.17993073165416718, "learning_rate": 1.8403124229460335e-07, "loss": 1.3572, "step": 2361 }, { "epoch": 0.9152142687406631, "grad_norm": 0.19223424792289734, "learning_rate": 1.8236469732034245e-07, "loss": 1.3969, "step": 2362 }, { "epoch": 0.9156017430288683, "grad_norm": 0.18728625774383545, "learning_rate": 1.8070559245803977e-07, "loss": 1.3978, "step": 2363 }, { "epoch": 0.9159892173170735, "grad_norm": 0.18078351020812988, "learning_rate": 1.7905393026993513e-07, "loss": 1.4178, "step": 2364 }, { "epoch": 0.9163766916052787, "grad_norm": 0.18518458306789398, "learning_rate": 1.7740971330676925e-07, "loss": 1.3219, "step": 2365 }, { "epoch": 0.916764165893484, "grad_norm": 0.19140051305294037, "learning_rate": 1.757729441077899e-07, "loss": 1.3915, "step": 2366 }, { "epoch": 0.9171516401816892, "grad_norm": 0.18475285172462463, "learning_rate": 1.741436252007389e-07, "loss": 1.3636, "step": 2367 }, { "epoch": 0.9175391144698943, "grad_norm": 0.17945599555969238, "learning_rate": 1.725217591018552e-07, "loss": 1.4297, "step": 2368 }, { "epoch": 0.9179265887580995, "grad_norm": 0.1893254816532135, "learning_rate": 1.709073483158652e-07, "loss": 1.3861, "step": 2369 }, { "epoch": 0.9183140630463047, "grad_norm": 0.1870526671409607, "learning_rate": 1.6930039533598453e-07, "loss": 1.4055, "step": 2370 }, { "epoch": 0.9187015373345099, "grad_norm": 0.19463349878787994, "learning_rate": 1.6770090264390914e-07, "loss": 1.403, "step": 2371 }, { "epoch": 0.9190890116227151, "grad_norm": 0.17807702720165253, "learning_rate": 1.6610887270981425e-07, "loss": 1.4008, "step": 2372 }, { "epoch": 0.9194764859109202, "grad_norm": 0.18695859611034393, "learning_rate": 1.6452430799235143e-07, "loss": 1.3637, "step": 2373 }, { "epoch": 0.9198639601991254, "grad_norm": 0.1834324300289154, "learning_rate": 1.6294721093864097e-07, "loss": 1.4016, "step": 2374 }, { "epoch": 0.9202514344873306, "grad_norm": 0.18657153844833374, "learning_rate": 1.6137758398427296e-07, "loss": 1.3979, "step": 2375 }, { "epoch": 0.9206389087755359, "grad_norm": 0.19671691954135895, "learning_rate": 1.598154295532983e-07, "loss": 1.3735, "step": 2376 }, { "epoch": 0.9210263830637411, "grad_norm": 0.1884845644235611, "learning_rate": 1.5826075005823006e-07, "loss": 1.3661, "step": 2377 }, { "epoch": 0.9214138573519463, "grad_norm": 0.18711332976818085, "learning_rate": 1.567135479000359e-07, "loss": 1.3517, "step": 2378 }, { "epoch": 0.9218013316401514, "grad_norm": 0.1870834082365036, "learning_rate": 1.5517382546813786e-07, "loss": 1.4057, "step": 2379 }, { "epoch": 0.9221888059283566, "grad_norm": 0.1910185068845749, "learning_rate": 1.5364158514040328e-07, "loss": 1.4613, "step": 2380 }, { "epoch": 0.9225762802165618, "grad_norm": 0.18857303261756897, "learning_rate": 1.5211682928314874e-07, "loss": 1.317, "step": 2381 }, { "epoch": 0.922963754504767, "grad_norm": 0.1719486117362976, "learning_rate": 1.5059956025112788e-07, "loss": 1.383, "step": 2382 }, { "epoch": 0.9233512287929722, "grad_norm": 0.2005373239517212, "learning_rate": 1.490897803875352e-07, "loss": 1.3655, "step": 2383 }, { "epoch": 0.9237387030811773, "grad_norm": 0.18170447647571564, "learning_rate": 1.4758749202399837e-07, "loss": 1.3696, "step": 2384 }, { "epoch": 0.9241261773693825, "grad_norm": 0.1893673539161682, "learning_rate": 1.4609269748057541e-07, "loss": 1.3801, "step": 2385 }, { "epoch": 0.9245136516575878, "grad_norm": 0.19896461069583893, "learning_rate": 1.4460539906575033e-07, "loss": 1.3519, "step": 2386 }, { "epoch": 0.924901125945793, "grad_norm": 0.17872628569602966, "learning_rate": 1.4312559907643298e-07, "loss": 1.3761, "step": 2387 }, { "epoch": 0.9252886002339982, "grad_norm": 0.19443117082118988, "learning_rate": 1.4165329979794972e-07, "loss": 1.3781, "step": 2388 }, { "epoch": 0.9256760745222034, "grad_norm": 0.18370164930820465, "learning_rate": 1.4018850350404735e-07, "loss": 1.3329, "step": 2389 }, { "epoch": 0.9260635488104085, "grad_norm": 0.17142583429813385, "learning_rate": 1.387312124568807e-07, "loss": 1.3834, "step": 2390 }, { "epoch": 0.9264510230986137, "grad_norm": 0.19186586141586304, "learning_rate": 1.372814289070179e-07, "loss": 1.4008, "step": 2391 }, { "epoch": 0.9268384973868189, "grad_norm": 0.19334448873996735, "learning_rate": 1.358391550934307e-07, "loss": 1.3957, "step": 2392 }, { "epoch": 0.9272259716750241, "grad_norm": 0.1964937299489975, "learning_rate": 1.3440439324349353e-07, "loss": 1.328, "step": 2393 }, { "epoch": 0.9276134459632293, "grad_norm": 0.18075934052467346, "learning_rate": 1.3297714557297947e-07, "loss": 1.3693, "step": 2394 }, { "epoch": 0.9280009202514344, "grad_norm": 0.2084513008594513, "learning_rate": 1.315574142860593e-07, "loss": 1.3516, "step": 2395 }, { "epoch": 0.9283883945396397, "grad_norm": 0.19044069945812225, "learning_rate": 1.3014520157529244e-07, "loss": 1.3886, "step": 2396 }, { "epoch": 0.9287758688278449, "grad_norm": 0.1836361289024353, "learning_rate": 1.2874050962162876e-07, "loss": 1.367, "step": 2397 }, { "epoch": 0.9291633431160501, "grad_norm": 0.19582076370716095, "learning_rate": 1.2734334059440468e-07, "loss": 1.3825, "step": 2398 }, { "epoch": 0.9295508174042553, "grad_norm": 0.18300044536590576, "learning_rate": 1.2595369665133528e-07, "loss": 1.4073, "step": 2399 }, { "epoch": 0.9299382916924605, "grad_norm": 0.19931936264038086, "learning_rate": 1.2457157993851832e-07, "loss": 1.3462, "step": 2400 }, { "epoch": 0.9303257659806656, "grad_norm": 0.18335454165935516, "learning_rate": 1.231969925904236e-07, "loss": 1.4466, "step": 2401 }, { "epoch": 0.9307132402688708, "grad_norm": 0.1852782517671585, "learning_rate": 1.2182993672989473e-07, "loss": 1.3758, "step": 2402 }, { "epoch": 0.931100714557076, "grad_norm": 0.17808982729911804, "learning_rate": 1.204704144681429e-07, "loss": 1.4366, "step": 2403 }, { "epoch": 0.9314881888452812, "grad_norm": 0.1940883845090866, "learning_rate": 1.1911842790474637e-07, "loss": 1.3321, "step": 2404 }, { "epoch": 0.9318756631334864, "grad_norm": 0.19932667911052704, "learning_rate": 1.1777397912764388e-07, "loss": 1.3773, "step": 2405 }, { "epoch": 0.9322631374216916, "grad_norm": 0.17850585281848907, "learning_rate": 1.1643707021313455e-07, "loss": 1.3967, "step": 2406 }, { "epoch": 0.9326506117098968, "grad_norm": 0.18462300300598145, "learning_rate": 1.151077032258724e-07, "loss": 1.3623, "step": 2407 }, { "epoch": 0.933038085998102, "grad_norm": 0.19249114394187927, "learning_rate": 1.137858802188646e-07, "loss": 1.3932, "step": 2408 }, { "epoch": 0.9334255602863072, "grad_norm": 0.19899222254753113, "learning_rate": 1.1247160323346774e-07, "loss": 1.3641, "step": 2409 }, { "epoch": 0.9338130345745124, "grad_norm": 0.17665335536003113, "learning_rate": 1.1116487429938538e-07, "loss": 1.3665, "step": 2410 }, { "epoch": 0.9342005088627175, "grad_norm": 0.19277323782444, "learning_rate": 1.0986569543466219e-07, "loss": 1.4324, "step": 2411 }, { "epoch": 0.9345879831509227, "grad_norm": 0.21167293190956116, "learning_rate": 1.0857406864568488e-07, "loss": 1.4339, "step": 2412 }, { "epoch": 0.9349754574391279, "grad_norm": 0.18011729419231415, "learning_rate": 1.0728999592717615e-07, "loss": 1.3892, "step": 2413 }, { "epoch": 0.9353629317273331, "grad_norm": 0.1797398030757904, "learning_rate": 1.0601347926219362e-07, "loss": 1.385, "step": 2414 }, { "epoch": 0.9357504060155383, "grad_norm": 0.19546552002429962, "learning_rate": 1.047445206221237e-07, "loss": 1.3749, "step": 2415 }, { "epoch": 0.9361378803037436, "grad_norm": 0.18500368297100067, "learning_rate": 1.034831219666832e-07, "loss": 1.4238, "step": 2416 }, { "epoch": 0.9365253545919487, "grad_norm": 0.17948807775974274, "learning_rate": 1.0222928524391107e-07, "loss": 1.3633, "step": 2417 }, { "epoch": 0.9369128288801539, "grad_norm": 0.20541715621948242, "learning_rate": 1.0098301239017006e-07, "loss": 1.4043, "step": 2418 }, { "epoch": 0.9373003031683591, "grad_norm": 0.18013399839401245, "learning_rate": 9.974430533014057e-08, "loss": 1.3871, "step": 2419 }, { "epoch": 0.9376877774565643, "grad_norm": 0.20438027381896973, "learning_rate": 9.851316597681959e-08, "loss": 1.3731, "step": 2420 }, { "epoch": 0.9380752517447695, "grad_norm": 0.19327746331691742, "learning_rate": 9.728959623151569e-08, "loss": 1.4407, "step": 2421 }, { "epoch": 0.9384627260329746, "grad_norm": 0.18995197117328644, "learning_rate": 9.607359798384785e-08, "loss": 1.4259, "step": 2422 }, { "epoch": 0.9388502003211798, "grad_norm": 0.1815672218799591, "learning_rate": 9.486517311174281e-08, "loss": 1.3138, "step": 2423 }, { "epoch": 0.939237674609385, "grad_norm": 0.18125326931476593, "learning_rate": 9.36643234814305e-08, "loss": 1.4042, "step": 2424 }, { "epoch": 0.9396251488975902, "grad_norm": 0.19386091828346252, "learning_rate": 9.247105094744246e-08, "loss": 1.4133, "step": 2425 }, { "epoch": 0.9400126231857955, "grad_norm": 0.17724569141864777, "learning_rate": 9.128535735260735e-08, "loss": 1.3614, "step": 2426 }, { "epoch": 0.9404000974740007, "grad_norm": 0.20258016884326935, "learning_rate": 9.010724452805209e-08, "loss": 1.361, "step": 2427 }, { "epoch": 0.9407875717622058, "grad_norm": 0.20104849338531494, "learning_rate": 8.893671429319294e-08, "loss": 1.4145, "step": 2428 }, { "epoch": 0.941175046050411, "grad_norm": 0.1829189956188202, "learning_rate": 8.777376845573837e-08, "loss": 1.3575, "step": 2429 }, { "epoch": 0.9415625203386162, "grad_norm": 0.1826576292514801, "learning_rate": 8.661840881168226e-08, "loss": 1.3428, "step": 2430 }, { "epoch": 0.9419499946268214, "grad_norm": 0.1806420385837555, "learning_rate": 8.547063714530456e-08, "loss": 1.3657, "step": 2431 }, { "epoch": 0.9423374689150266, "grad_norm": 0.1858508437871933, "learning_rate": 8.433045522916462e-08, "loss": 1.382, "step": 2432 }, { "epoch": 0.9427249432032317, "grad_norm": 0.1968177706003189, "learning_rate": 8.319786482410219e-08, "loss": 1.3937, "step": 2433 }, { "epoch": 0.9431124174914369, "grad_norm": 0.1925126016139984, "learning_rate": 8.207286767923261e-08, "loss": 1.3579, "step": 2434 }, { "epoch": 0.9434998917796421, "grad_norm": 0.19983117282390594, "learning_rate": 8.095546553194444e-08, "loss": 1.4142, "step": 2435 }, { "epoch": 0.9438873660678474, "grad_norm": 0.19138000905513763, "learning_rate": 7.984566010789673e-08, "loss": 1.3249, "step": 2436 }, { "epoch": 0.9442748403560526, "grad_norm": 0.18531103432178497, "learning_rate": 7.874345312101684e-08, "loss": 1.3872, "step": 2437 }, { "epoch": 0.9446623146442578, "grad_norm": 0.1888168752193451, "learning_rate": 7.764884627349756e-08, "loss": 1.3814, "step": 2438 }, { "epoch": 0.9450497889324629, "grad_norm": 0.20210161805152893, "learning_rate": 7.656184125579446e-08, "loss": 1.4105, "step": 2439 }, { "epoch": 0.9454372632206681, "grad_norm": 0.20239752531051636, "learning_rate": 7.54824397466225e-08, "loss": 1.3874, "step": 2440 }, { "epoch": 0.9458247375088733, "grad_norm": 0.19475099444389343, "learning_rate": 7.441064341295489e-08, "loss": 1.4151, "step": 2441 }, { "epoch": 0.9462122117970785, "grad_norm": 0.1863378882408142, "learning_rate": 7.334645391001982e-08, "loss": 1.3598, "step": 2442 }, { "epoch": 0.9465996860852837, "grad_norm": 0.1826794594526291, "learning_rate": 7.228987288129763e-08, "loss": 1.425, "step": 2443 }, { "epoch": 0.9469871603734888, "grad_norm": 0.18822477757930756, "learning_rate": 7.124090195851807e-08, "loss": 1.363, "step": 2444 }, { "epoch": 0.947374634661694, "grad_norm": 0.19228999316692352, "learning_rate": 7.019954276165919e-08, "loss": 1.3788, "step": 2445 }, { "epoch": 0.9477621089498993, "grad_norm": 0.18887577950954437, "learning_rate": 6.916579689894343e-08, "loss": 1.3188, "step": 2446 }, { "epoch": 0.9481495832381045, "grad_norm": 0.1833743155002594, "learning_rate": 6.813966596683541e-08, "loss": 1.4131, "step": 2447 }, { "epoch": 0.9485370575263097, "grad_norm": 0.17585456371307373, "learning_rate": 6.712115155003973e-08, "loss": 1.3409, "step": 2448 }, { "epoch": 0.9489245318145149, "grad_norm": 0.17213395237922668, "learning_rate": 6.611025522149872e-08, "loss": 1.4084, "step": 2449 }, { "epoch": 0.94931200610272, "grad_norm": 0.17978613078594208, "learning_rate": 6.510697854238912e-08, "loss": 1.3605, "step": 2450 }, { "epoch": 0.9496994803909252, "grad_norm": 0.1891060471534729, "learning_rate": 6.411132306212042e-08, "loss": 1.3718, "step": 2451 }, { "epoch": 0.9500869546791304, "grad_norm": 0.20348595082759857, "learning_rate": 6.31232903183332e-08, "loss": 1.2936, "step": 2452 }, { "epoch": 0.9504744289673356, "grad_norm": 0.18475224077701569, "learning_rate": 6.214288183689466e-08, "loss": 1.376, "step": 2453 }, { "epoch": 0.9508619032555408, "grad_norm": 0.18256455659866333, "learning_rate": 6.117009913189809e-08, "loss": 1.3685, "step": 2454 }, { "epoch": 0.9512493775437459, "grad_norm": 0.1991414576768875, "learning_rate": 6.020494370565954e-08, "loss": 1.3471, "step": 2455 }, { "epoch": 0.9516368518319512, "grad_norm": 0.18084731698036194, "learning_rate": 5.9247417048717284e-08, "loss": 1.3532, "step": 2456 }, { "epoch": 0.9520243261201564, "grad_norm": 0.18697726726531982, "learning_rate": 5.8297520639825636e-08, "loss": 1.3634, "step": 2457 }, { "epoch": 0.9524118004083616, "grad_norm": 0.1845630556344986, "learning_rate": 5.735525594595781e-08, "loss": 1.4079, "step": 2458 }, { "epoch": 0.9527992746965668, "grad_norm": 0.19373515248298645, "learning_rate": 5.642062442229868e-08, "loss": 1.3374, "step": 2459 }, { "epoch": 0.953186748984772, "grad_norm": 0.17854730784893036, "learning_rate": 5.549362751224585e-08, "loss": 1.3802, "step": 2460 }, { "epoch": 0.9535742232729771, "grad_norm": 0.18951798975467682, "learning_rate": 5.457426664740695e-08, "loss": 1.4237, "step": 2461 }, { "epoch": 0.9539616975611823, "grad_norm": 0.1941017210483551, "learning_rate": 5.366254324759623e-08, "loss": 1.3562, "step": 2462 }, { "epoch": 0.9543491718493875, "grad_norm": 0.1863836795091629, "learning_rate": 5.2758458720832405e-08, "loss": 1.3623, "step": 2463 }, { "epoch": 0.9547366461375927, "grad_norm": 0.1880989670753479, "learning_rate": 5.1862014463338605e-08, "loss": 1.4078, "step": 2464 }, { "epoch": 0.9551241204257979, "grad_norm": 0.20520684123039246, "learning_rate": 5.0973211859537966e-08, "loss": 1.3559, "step": 2465 }, { "epoch": 0.9555115947140032, "grad_norm": 0.20508934557437897, "learning_rate": 5.009205228205194e-08, "loss": 1.4477, "step": 2466 }, { "epoch": 0.9558990690022083, "grad_norm": 0.19455364346504211, "learning_rate": 4.9218537091698106e-08, "loss": 1.3906, "step": 2467 }, { "epoch": 0.9562865432904135, "grad_norm": 0.2117154598236084, "learning_rate": 4.8352667637490694e-08, "loss": 1.3931, "step": 2468 }, { "epoch": 0.9566740175786187, "grad_norm": 0.17757678031921387, "learning_rate": 4.749444525663338e-08, "loss": 1.3889, "step": 2469 }, { "epoch": 0.9570614918668239, "grad_norm": 0.19035592675209045, "learning_rate": 4.6643871274521525e-08, "loss": 1.4116, "step": 2470 }, { "epoch": 0.9574489661550291, "grad_norm": 0.18089327216148376, "learning_rate": 4.5800947004738806e-08, "loss": 1.4567, "step": 2471 }, { "epoch": 0.9578364404432342, "grad_norm": 0.16921097040176392, "learning_rate": 4.4965673749054474e-08, "loss": 1.3548, "step": 2472 }, { "epoch": 0.9582239147314394, "grad_norm": 0.17760907113552094, "learning_rate": 4.4138052797422225e-08, "loss": 1.3602, "step": 2473 }, { "epoch": 0.9586113890196446, "grad_norm": 0.1899784654378891, "learning_rate": 4.331808542797855e-08, "loss": 1.3683, "step": 2474 }, { "epoch": 0.9589988633078498, "grad_norm": 0.18728189170360565, "learning_rate": 4.2505772907038836e-08, "loss": 1.3418, "step": 2475 }, { "epoch": 0.9593863375960551, "grad_norm": 0.18516357243061066, "learning_rate": 4.170111648909736e-08, "loss": 1.3903, "step": 2476 }, { "epoch": 0.9597738118842603, "grad_norm": 0.20792566239833832, "learning_rate": 4.090411741682565e-08, "loss": 1.3801, "step": 2477 }, { "epoch": 0.9601612861724654, "grad_norm": 0.19542264938354492, "learning_rate": 4.0114776921067465e-08, "loss": 1.4094, "step": 2478 }, { "epoch": 0.9605487604606706, "grad_norm": 0.21897444128990173, "learning_rate": 3.933309622084103e-08, "loss": 1.4193, "step": 2479 }, { "epoch": 0.9609362347488758, "grad_norm": 0.20047014951705933, "learning_rate": 3.855907652333402e-08, "loss": 1.3831, "step": 2480 }, { "epoch": 0.961323709037081, "grad_norm": 0.17488102614879608, "learning_rate": 3.7792719023904136e-08, "loss": 1.3843, "step": 2481 }, { "epoch": 0.9617111833252862, "grad_norm": 0.18657329678535461, "learning_rate": 3.703402490607411e-08, "loss": 1.3628, "step": 2482 }, { "epoch": 0.9620986576134913, "grad_norm": 0.19084499776363373, "learning_rate": 3.628299534153334e-08, "loss": 1.4135, "step": 2483 }, { "epoch": 0.9624861319016965, "grad_norm": 0.2035205215215683, "learning_rate": 3.553963149013295e-08, "loss": 1.3705, "step": 2484 }, { "epoch": 0.9628736061899018, "grad_norm": 0.21629628539085388, "learning_rate": 3.480393449988739e-08, "loss": 1.3944, "step": 2485 }, { "epoch": 0.963261080478107, "grad_norm": 0.21561451256275177, "learning_rate": 3.407590550696949e-08, "loss": 1.395, "step": 2486 }, { "epoch": 0.9636485547663122, "grad_norm": 0.20237629115581512, "learning_rate": 3.3355545635710974e-08, "loss": 1.4192, "step": 2487 }, { "epoch": 0.9640360290545174, "grad_norm": 0.16946636140346527, "learning_rate": 3.264285599859751e-08, "loss": 1.3409, "step": 2488 }, { "epoch": 0.9644235033427225, "grad_norm": 0.20045141875743866, "learning_rate": 3.1937837696272566e-08, "loss": 1.3886, "step": 2489 }, { "epoch": 0.9648109776309277, "grad_norm": 0.17656320333480835, "learning_rate": 3.124049181752964e-08, "loss": 1.3432, "step": 2490 }, { "epoch": 0.9651984519191329, "grad_norm": 0.18393300473690033, "learning_rate": 3.055081943931504e-08, "loss": 1.4055, "step": 2491 }, { "epoch": 0.9655859262073381, "grad_norm": 0.19178509712219238, "learning_rate": 2.986882162672344e-08, "loss": 1.3998, "step": 2492 }, { "epoch": 0.9659734004955433, "grad_norm": 0.18330824375152588, "learning_rate": 2.9194499432997903e-08, "loss": 1.4221, "step": 2493 }, { "epoch": 0.9663608747837484, "grad_norm": 0.19309501349925995, "learning_rate": 2.8527853899527614e-08, "loss": 1.422, "step": 2494 }, { "epoch": 0.9667483490719537, "grad_norm": 0.19292105734348297, "learning_rate": 2.7868886055845702e-08, "loss": 1.4051, "step": 2495 }, { "epoch": 0.9671358233601589, "grad_norm": 0.20281581580638885, "learning_rate": 2.721759691962922e-08, "loss": 1.3822, "step": 2496 }, { "epoch": 0.9675232976483641, "grad_norm": 0.1911431849002838, "learning_rate": 2.657398749669582e-08, "loss": 1.3884, "step": 2497 }, { "epoch": 0.9679107719365693, "grad_norm": 0.19114592671394348, "learning_rate": 2.5938058781003194e-08, "loss": 1.4163, "step": 2498 }, { "epoch": 0.9682982462247745, "grad_norm": 0.18455259501934052, "learning_rate": 2.5309811754647973e-08, "loss": 1.3999, "step": 2499 }, { "epoch": 0.9686857205129796, "grad_norm": 0.17854705452919006, "learning_rate": 2.4689247387862934e-08, "loss": 1.3792, "step": 2500 }, { "epoch": 0.9690731948011848, "grad_norm": 0.18395787477493286, "learning_rate": 2.4076366639015914e-08, "loss": 1.4078, "step": 2501 }, { "epoch": 0.96946066908939, "grad_norm": 0.17991270124912262, "learning_rate": 2.347117045460867e-08, "loss": 1.4528, "step": 2502 }, { "epoch": 0.9698481433775952, "grad_norm": 0.18635353446006775, "learning_rate": 2.2873659769276356e-08, "loss": 1.4252, "step": 2503 }, { "epoch": 0.9702356176658004, "grad_norm": 0.1862461268901825, "learning_rate": 2.2283835505783612e-08, "loss": 1.411, "step": 2504 }, { "epoch": 0.9706230919540056, "grad_norm": 0.18639056384563446, "learning_rate": 2.1701698575024576e-08, "loss": 1.3828, "step": 2505 }, { "epoch": 0.9710105662422108, "grad_norm": 0.1920725405216217, "learning_rate": 2.112724987602399e-08, "loss": 1.4011, "step": 2506 }, { "epoch": 0.971398040530416, "grad_norm": 0.1831391304731369, "learning_rate": 2.0560490295929437e-08, "loss": 1.4018, "step": 2507 }, { "epoch": 0.9717855148186212, "grad_norm": 0.2045011967420578, "learning_rate": 2.000142071001632e-08, "loss": 1.3595, "step": 2508 }, { "epoch": 0.9721729891068264, "grad_norm": 0.20824892818927765, "learning_rate": 1.945004198168343e-08, "loss": 1.3727, "step": 2509 }, { "epoch": 0.9725604633950315, "grad_norm": 0.19160808622837067, "learning_rate": 1.890635496245241e-08, "loss": 1.3884, "step": 2510 }, { "epoch": 0.9729479376832367, "grad_norm": 0.19323407113552094, "learning_rate": 1.83703604919655e-08, "loss": 1.3458, "step": 2511 }, { "epoch": 0.9733354119714419, "grad_norm": 0.1756855547428131, "learning_rate": 1.7842059397985555e-08, "loss": 1.4194, "step": 2512 }, { "epoch": 0.9737228862596471, "grad_norm": 0.19845746457576752, "learning_rate": 1.7321452496394386e-08, "loss": 1.3729, "step": 2513 }, { "epoch": 0.9741103605478523, "grad_norm": 0.1945827454328537, "learning_rate": 1.6808540591190524e-08, "loss": 1.3257, "step": 2514 }, { "epoch": 0.9744978348360576, "grad_norm": 0.18492230772972107, "learning_rate": 1.6303324474489233e-08, "loss": 1.3581, "step": 2515 }, { "epoch": 0.9748853091242627, "grad_norm": 0.21283277869224548, "learning_rate": 1.580580492652084e-08, "loss": 1.359, "step": 2516 }, { "epoch": 0.9752727834124679, "grad_norm": 0.17812290787696838, "learning_rate": 1.5315982715629064e-08, "loss": 1.3641, "step": 2517 }, { "epoch": 0.9756602577006731, "grad_norm": 0.1970386952161789, "learning_rate": 1.4833858598271022e-08, "loss": 1.3895, "step": 2518 }, { "epoch": 0.9760477319888783, "grad_norm": 0.1888445019721985, "learning_rate": 1.4359433319013905e-08, "loss": 1.4364, "step": 2519 }, { "epoch": 0.9764352062770835, "grad_norm": 0.18193256855010986, "learning_rate": 1.3892707610536627e-08, "loss": 1.3838, "step": 2520 }, { "epoch": 0.9768226805652886, "grad_norm": 0.2001849114894867, "learning_rate": 1.3433682193627062e-08, "loss": 1.362, "step": 2521 }, { "epoch": 0.9772101548534938, "grad_norm": 0.19239579141139984, "learning_rate": 1.2982357777180376e-08, "loss": 1.3766, "step": 2522 }, { "epoch": 0.977597629141699, "grad_norm": 0.1744755208492279, "learning_rate": 1.2538735058199026e-08, "loss": 1.4228, "step": 2523 }, { "epoch": 0.9779851034299042, "grad_norm": 0.1837005466222763, "learning_rate": 1.2102814721791645e-08, "loss": 1.3706, "step": 2524 }, { "epoch": 0.9783725777181095, "grad_norm": 0.21075516939163208, "learning_rate": 1.1674597441171942e-08, "loss": 1.3606, "step": 2525 }, { "epoch": 0.9787600520063147, "grad_norm": 0.18546679615974426, "learning_rate": 1.1254083877656475e-08, "loss": 1.4528, "step": 2526 }, { "epoch": 0.9791475262945198, "grad_norm": 0.19161982834339142, "learning_rate": 1.0841274680664649e-08, "loss": 1.4006, "step": 2527 }, { "epoch": 0.979535000582725, "grad_norm": 0.1808328777551651, "learning_rate": 1.0436170487719278e-08, "loss": 1.3403, "step": 2528 }, { "epoch": 0.9799224748709302, "grad_norm": 0.18864895403385162, "learning_rate": 1.0038771924442136e-08, "loss": 1.3726, "step": 2529 }, { "epoch": 0.9803099491591354, "grad_norm": 0.18211407959461212, "learning_rate": 9.649079604555078e-09, "loss": 1.4013, "step": 2530 }, { "epoch": 0.9806974234473406, "grad_norm": 0.17185856401920319, "learning_rate": 9.267094129879472e-09, "loss": 1.3908, "step": 2531 }, { "epoch": 0.9810848977355457, "grad_norm": 0.1727750301361084, "learning_rate": 8.8928160903351e-09, "loss": 1.445, "step": 2532 }, { "epoch": 0.9814723720237509, "grad_norm": 0.19742737710475922, "learning_rate": 8.526246063936817e-09, "loss": 1.3637, "step": 2533 }, { "epoch": 0.9818598463119561, "grad_norm": 0.17293642461299896, "learning_rate": 8.167384616797336e-09, "loss": 1.3922, "step": 2534 }, { "epoch": 0.9822473206001614, "grad_norm": 0.18810218572616577, "learning_rate": 7.816232303123894e-09, "loss": 1.3765, "step": 2535 }, { "epoch": 0.9826347948883666, "grad_norm": 0.185270294547081, "learning_rate": 7.472789665218805e-09, "loss": 1.3788, "step": 2536 }, { "epoch": 0.9830222691765718, "grad_norm": 0.1759500354528427, "learning_rate": 7.137057233477241e-09, "loss": 1.4188, "step": 2537 }, { "epoch": 0.9834097434647769, "grad_norm": 0.18677230179309845, "learning_rate": 6.809035526387231e-09, "loss": 1.3265, "step": 2538 }, { "epoch": 0.9837972177529821, "grad_norm": 0.18839004635810852, "learning_rate": 6.48872505052911e-09, "loss": 1.373, "step": 2539 }, { "epoch": 0.9841846920411873, "grad_norm": 0.17947262525558472, "learning_rate": 6.176126300573848e-09, "loss": 1.3664, "step": 2540 }, { "epoch": 0.9845721663293925, "grad_norm": 0.19162359833717346, "learning_rate": 5.871239759283054e-09, "loss": 1.3443, "step": 2541 }, { "epoch": 0.9849596406175977, "grad_norm": 0.2002682238817215, "learning_rate": 5.574065897508973e-09, "loss": 1.3404, "step": 2542 }, { "epoch": 0.9853471149058028, "grad_norm": 0.17283864319324493, "learning_rate": 5.284605174190605e-09, "loss": 1.332, "step": 2543 }, { "epoch": 0.985734589194008, "grad_norm": 0.17972779273986816, "learning_rate": 5.002858036357028e-09, "loss": 1.3896, "step": 2544 }, { "epoch": 0.9861220634822133, "grad_norm": 0.19459164142608643, "learning_rate": 4.72882491912463e-09, "loss": 1.3911, "step": 2545 }, { "epoch": 0.9865095377704185, "grad_norm": 0.1872740387916565, "learning_rate": 4.462506245695441e-09, "loss": 1.3892, "step": 2546 }, { "epoch": 0.9868970120586237, "grad_norm": 0.178998202085495, "learning_rate": 4.203902427359352e-09, "loss": 1.4239, "step": 2547 }, { "epoch": 0.9872844863468289, "grad_norm": 0.2021799087524414, "learning_rate": 3.953013863490784e-09, "loss": 1.3595, "step": 2548 }, { "epoch": 0.987671960635034, "grad_norm": 0.1911792904138565, "learning_rate": 3.7098409415486923e-09, "loss": 1.4176, "step": 2549 }, { "epoch": 0.9880594349232392, "grad_norm": 0.18347306549549103, "learning_rate": 3.4743840370776718e-09, "loss": 1.3904, "step": 2550 }, { "epoch": 0.9884469092114444, "grad_norm": 0.21155084669589996, "learning_rate": 3.2466435137057385e-09, "loss": 1.3901, "step": 2551 }, { "epoch": 0.9888343834996496, "grad_norm": 0.18863387405872345, "learning_rate": 3.026619723142665e-09, "loss": 1.3861, "step": 2552 }, { "epoch": 0.9892218577878548, "grad_norm": 0.17852704226970673, "learning_rate": 2.814313005183311e-09, "loss": 1.3933, "step": 2553 }, { "epoch": 0.9896093320760599, "grad_norm": 0.20603954792022705, "learning_rate": 2.6097236877026254e-09, "loss": 1.3768, "step": 2554 }, { "epoch": 0.9899968063642652, "grad_norm": 0.2049267292022705, "learning_rate": 2.4128520866578686e-09, "loss": 1.3645, "step": 2555 }, { "epoch": 0.9903842806524704, "grad_norm": 0.20086944103240967, "learning_rate": 2.223698506088612e-09, "loss": 1.409, "step": 2556 }, { "epoch": 0.9907717549406756, "grad_norm": 0.181266188621521, "learning_rate": 2.0422632381128514e-09, "loss": 1.3837, "step": 2557 }, { "epoch": 0.9911592292288808, "grad_norm": 0.18233245611190796, "learning_rate": 1.86854656293034e-09, "loss": 1.3762, "step": 2558 }, { "epoch": 0.991546703517086, "grad_norm": 0.17277930676937103, "learning_rate": 1.7025487488209203e-09, "loss": 1.3658, "step": 2559 }, { "epoch": 0.9919341778052911, "grad_norm": 0.17349141836166382, "learning_rate": 1.5442700521428599e-09, "loss": 1.3406, "step": 2560 }, { "epoch": 0.9923216520934963, "grad_norm": 0.19254106283187866, "learning_rate": 1.3937107173334076e-09, "loss": 1.3803, "step": 2561 }, { "epoch": 0.9927091263817015, "grad_norm": 0.20064422488212585, "learning_rate": 1.2508709769093464e-09, "loss": 1.3469, "step": 2562 }, { "epoch": 0.9930966006699067, "grad_norm": 0.1746414452791214, "learning_rate": 1.115751051464775e-09, "loss": 1.3771, "step": 2563 }, { "epoch": 0.9934840749581119, "grad_norm": 0.18434514105319977, "learning_rate": 9.883511496722176e-10, "loss": 1.3614, "step": 2564 }, { "epoch": 0.9938715492463172, "grad_norm": 0.18092720210552216, "learning_rate": 8.686714682815123e-10, "loss": 1.4125, "step": 2565 }, { "epoch": 0.9942590235345223, "grad_norm": 0.18365001678466797, "learning_rate": 7.567121921198129e-10, "loss": 1.3532, "step": 2566 }, { "epoch": 0.9946464978227275, "grad_norm": 0.16699053347110748, "learning_rate": 6.524734940915877e-10, "loss": 1.4045, "step": 2567 }, { "epoch": 0.9950339721109327, "grad_norm": 0.18793420493602753, "learning_rate": 5.559555351780655e-10, "loss": 1.4324, "step": 2568 }, { "epoch": 0.9954214463991379, "grad_norm": 0.19253648817539215, "learning_rate": 4.671584644355687e-10, "loss": 1.426, "step": 2569 }, { "epoch": 0.9958089206873431, "grad_norm": 0.18221616744995117, "learning_rate": 3.8608241899940056e-10, "loss": 1.3659, "step": 2570 }, { "epoch": 0.9961963949755482, "grad_norm": 0.1748841404914856, "learning_rate": 3.1272752407773834e-10, "loss": 1.4263, "step": 2571 }, { "epoch": 0.9965838692637534, "grad_norm": 0.18521229922771454, "learning_rate": 2.470938929571842e-10, "loss": 1.4405, "step": 2572 }, { "epoch": 0.9969713435519586, "grad_norm": 0.18423818051815033, "learning_rate": 1.8918162699887997e-10, "loss": 1.3955, "step": 2573 }, { "epoch": 0.9973588178401638, "grad_norm": 0.17565150558948517, "learning_rate": 1.3899081563906182e-10, "loss": 1.3862, "step": 2574 }, { "epoch": 0.9977462921283691, "grad_norm": 0.18880826234817505, "learning_rate": 9.65215363907257e-11, "loss": 1.3653, "step": 2575 }, { "epoch": 0.9981337664165743, "grad_norm": 0.18108192086219788, "learning_rate": 6.177385484029685e-11, "loss": 1.3969, "step": 2576 }, { "epoch": 0.9985212407047794, "grad_norm": 0.19256563484668732, "learning_rate": 3.474782465096027e-11, "loss": 1.3432, "step": 2577 }, { "epoch": 0.9989087149929846, "grad_norm": 0.19377000629901886, "learning_rate": 1.544348756044034e-11, "loss": 1.4043, "step": 2578 }, { "epoch": 0.9992961892811898, "grad_norm": 0.18626244366168976, "learning_rate": 3.860873380445718e-12, "loss": 1.3605, "step": 2579 }, { "epoch": 0.999683663569395, "grad_norm": 0.17782603204250336, "learning_rate": 0.0, "loss": 1.3834, "step": 2580 } ], "logging_steps": 1.0, "max_steps": 2580, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 4, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.55147909551824e+18, "train_batch_size": 1, "trial_name": null, "trial_params": null }