{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9960681520314547, "eval_steps": 500, "global_step": 1524, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.001965923984272608, "grad_norm": 5.710505962371826, "learning_rate": 6.535947712418302e-08, "loss": 0.8567, "step": 1 }, { "epoch": 0.003931847968545216, "grad_norm": 5.58311653137207, "learning_rate": 1.3071895424836603e-07, "loss": 0.8409, "step": 2 }, { "epoch": 0.005897771952817824, "grad_norm": 5.551272869110107, "learning_rate": 1.9607843137254904e-07, "loss": 0.8184, "step": 3 }, { "epoch": 0.007863695937090432, "grad_norm": 5.5394158363342285, "learning_rate": 2.6143790849673207e-07, "loss": 0.8196, "step": 4 }, { "epoch": 0.009829619921363041, "grad_norm": 5.637419700622559, "learning_rate": 3.267973856209151e-07, "loss": 0.8381, "step": 5 }, { "epoch": 0.011795543905635648, "grad_norm": 5.617730617523193, "learning_rate": 3.921568627450981e-07, "loss": 0.849, "step": 6 }, { "epoch": 0.013761467889908258, "grad_norm": 5.583267688751221, "learning_rate": 4.5751633986928105e-07, "loss": 0.8528, "step": 7 }, { "epoch": 0.015727391874180863, "grad_norm": 5.440237045288086, "learning_rate": 5.228758169934641e-07, "loss": 0.8135, "step": 8 }, { "epoch": 0.017693315858453473, "grad_norm": 5.594210147857666, "learning_rate": 5.882352941176471e-07, "loss": 0.841, "step": 9 }, { "epoch": 0.019659239842726082, "grad_norm": 5.199444770812988, "learning_rate": 6.535947712418302e-07, "loss": 0.8116, "step": 10 }, { "epoch": 0.02162516382699869, "grad_norm": 5.102973937988281, "learning_rate": 7.189542483660131e-07, "loss": 0.8378, "step": 11 }, { "epoch": 0.023591087811271297, "grad_norm": 5.083095550537109, "learning_rate": 7.843137254901962e-07, "loss": 0.8191, "step": 12 }, { "epoch": 0.025557011795543906, "grad_norm": 4.040502071380615, "learning_rate": 8.496732026143792e-07, "loss": 0.8056, "step": 13 }, { "epoch": 0.027522935779816515, "grad_norm": 3.962697982788086, "learning_rate": 9.150326797385621e-07, "loss": 0.7871, "step": 14 }, { "epoch": 0.02948885976408912, "grad_norm": 3.821044921875, "learning_rate": 9.80392156862745e-07, "loss": 0.8056, "step": 15 }, { "epoch": 0.03145478374836173, "grad_norm": 3.665053367614746, "learning_rate": 1.0457516339869283e-06, "loss": 0.7729, "step": 16 }, { "epoch": 0.033420707732634336, "grad_norm": 2.3340535163879395, "learning_rate": 1.111111111111111e-06, "loss": 0.7517, "step": 17 }, { "epoch": 0.035386631716906945, "grad_norm": 2.262815475463867, "learning_rate": 1.1764705882352942e-06, "loss": 0.764, "step": 18 }, { "epoch": 0.037352555701179554, "grad_norm": 2.1935653686523438, "learning_rate": 1.2418300653594772e-06, "loss": 0.7601, "step": 19 }, { "epoch": 0.039318479685452164, "grad_norm": 2.0895442962646484, "learning_rate": 1.3071895424836604e-06, "loss": 0.7611, "step": 20 }, { "epoch": 0.04128440366972477, "grad_norm": 1.9166232347488403, "learning_rate": 1.3725490196078434e-06, "loss": 0.7507, "step": 21 }, { "epoch": 0.04325032765399738, "grad_norm": 1.8988736867904663, "learning_rate": 1.4379084967320261e-06, "loss": 0.7888, "step": 22 }, { "epoch": 0.045216251638269984, "grad_norm": 1.5350834131240845, "learning_rate": 1.5032679738562091e-06, "loss": 0.7509, "step": 23 }, { "epoch": 0.047182175622542594, "grad_norm": 2.2456037998199463, "learning_rate": 1.5686274509803923e-06, "loss": 0.7112, "step": 24 }, { "epoch": 0.0491480996068152, "grad_norm": 2.773117780685425, "learning_rate": 1.6339869281045753e-06, "loss": 0.7437, "step": 25 }, { "epoch": 0.05111402359108781, "grad_norm": 2.6303365230560303, "learning_rate": 1.6993464052287585e-06, "loss": 0.6998, "step": 26 }, { "epoch": 0.05307994757536042, "grad_norm": 2.7553372383117676, "learning_rate": 1.7647058823529414e-06, "loss": 0.7304, "step": 27 }, { "epoch": 0.05504587155963303, "grad_norm": 2.382776975631714, "learning_rate": 1.8300653594771242e-06, "loss": 0.7093, "step": 28 }, { "epoch": 0.05701179554390563, "grad_norm": 2.04976487159729, "learning_rate": 1.8954248366013072e-06, "loss": 0.6828, "step": 29 }, { "epoch": 0.05897771952817824, "grad_norm": 1.8414767980575562, "learning_rate": 1.96078431372549e-06, "loss": 0.7195, "step": 30 }, { "epoch": 0.06094364351245085, "grad_norm": 1.3379838466644287, "learning_rate": 2.0261437908496734e-06, "loss": 0.6714, "step": 31 }, { "epoch": 0.06290956749672345, "grad_norm": 1.0512365102767944, "learning_rate": 2.0915032679738565e-06, "loss": 0.6816, "step": 32 }, { "epoch": 0.06487549148099607, "grad_norm": 1.1513341665267944, "learning_rate": 2.1568627450980393e-06, "loss": 0.6715, "step": 33 }, { "epoch": 0.06684141546526867, "grad_norm": 1.170161485671997, "learning_rate": 2.222222222222222e-06, "loss": 0.6749, "step": 34 }, { "epoch": 0.06880733944954129, "grad_norm": 1.1509945392608643, "learning_rate": 2.2875816993464053e-06, "loss": 0.6724, "step": 35 }, { "epoch": 0.07077326343381389, "grad_norm": 1.0026973485946655, "learning_rate": 2.3529411764705885e-06, "loss": 0.6533, "step": 36 }, { "epoch": 0.0727391874180865, "grad_norm": 0.8049423098564148, "learning_rate": 2.4183006535947716e-06, "loss": 0.6687, "step": 37 }, { "epoch": 0.07470511140235911, "grad_norm": 0.8071291446685791, "learning_rate": 2.4836601307189544e-06, "loss": 0.6419, "step": 38 }, { "epoch": 0.07667103538663171, "grad_norm": 0.7793786525726318, "learning_rate": 2.549019607843137e-06, "loss": 0.6659, "step": 39 }, { "epoch": 0.07863695937090433, "grad_norm": 0.6068173050880432, "learning_rate": 2.6143790849673208e-06, "loss": 0.6398, "step": 40 }, { "epoch": 0.08060288335517693, "grad_norm": 0.589724600315094, "learning_rate": 2.6797385620915036e-06, "loss": 0.6064, "step": 41 }, { "epoch": 0.08256880733944955, "grad_norm": 0.6173176765441895, "learning_rate": 2.7450980392156867e-06, "loss": 0.648, "step": 42 }, { "epoch": 0.08453473132372215, "grad_norm": 0.6198132634162903, "learning_rate": 2.8104575163398695e-06, "loss": 0.6018, "step": 43 }, { "epoch": 0.08650065530799476, "grad_norm": 0.7025796175003052, "learning_rate": 2.8758169934640523e-06, "loss": 0.6285, "step": 44 }, { "epoch": 0.08846657929226737, "grad_norm": 0.49610280990600586, "learning_rate": 2.9411764705882355e-06, "loss": 0.6006, "step": 45 }, { "epoch": 0.09043250327653997, "grad_norm": 0.45890048146247864, "learning_rate": 3.0065359477124182e-06, "loss": 0.6071, "step": 46 }, { "epoch": 0.09239842726081259, "grad_norm": 0.5086219310760498, "learning_rate": 3.071895424836602e-06, "loss": 0.5908, "step": 47 }, { "epoch": 0.09436435124508519, "grad_norm": 0.5073956847190857, "learning_rate": 3.1372549019607846e-06, "loss": 0.5975, "step": 48 }, { "epoch": 0.0963302752293578, "grad_norm": 0.49690189957618713, "learning_rate": 3.2026143790849674e-06, "loss": 0.5953, "step": 49 }, { "epoch": 0.0982961992136304, "grad_norm": 0.49608007073402405, "learning_rate": 3.2679738562091506e-06, "loss": 0.6189, "step": 50 }, { "epoch": 0.10026212319790301, "grad_norm": 0.42027363181114197, "learning_rate": 3.3333333333333333e-06, "loss": 0.5825, "step": 51 }, { "epoch": 0.10222804718217562, "grad_norm": 0.3997390568256378, "learning_rate": 3.398692810457517e-06, "loss": 0.5903, "step": 52 }, { "epoch": 0.10419397116644823, "grad_norm": 0.3815591335296631, "learning_rate": 3.4640522875816997e-06, "loss": 0.6104, "step": 53 }, { "epoch": 0.10615989515072084, "grad_norm": 0.40735822916030884, "learning_rate": 3.529411764705883e-06, "loss": 0.5808, "step": 54 }, { "epoch": 0.10812581913499344, "grad_norm": 0.4196593165397644, "learning_rate": 3.5947712418300657e-06, "loss": 0.5827, "step": 55 }, { "epoch": 0.11009174311926606, "grad_norm": 0.40242332220077515, "learning_rate": 3.6601307189542484e-06, "loss": 0.5751, "step": 56 }, { "epoch": 0.11205766710353866, "grad_norm": 0.3715681731700897, "learning_rate": 3.7254901960784316e-06, "loss": 0.5979, "step": 57 }, { "epoch": 0.11402359108781127, "grad_norm": 0.34788185358047485, "learning_rate": 3.7908496732026144e-06, "loss": 0.5718, "step": 58 }, { "epoch": 0.11598951507208388, "grad_norm": 0.37109532952308655, "learning_rate": 3.856209150326798e-06, "loss": 0.5671, "step": 59 }, { "epoch": 0.11795543905635648, "grad_norm": 0.35452768206596375, "learning_rate": 3.92156862745098e-06, "loss": 0.5621, "step": 60 }, { "epoch": 0.1199213630406291, "grad_norm": 0.36002615094184875, "learning_rate": 3.986928104575164e-06, "loss": 0.5881, "step": 61 }, { "epoch": 0.1218872870249017, "grad_norm": 0.3341769874095917, "learning_rate": 4.052287581699347e-06, "loss": 0.5599, "step": 62 }, { "epoch": 0.12385321100917432, "grad_norm": 0.34028100967407227, "learning_rate": 4.11764705882353e-06, "loss": 0.5593, "step": 63 }, { "epoch": 0.1258191349934469, "grad_norm": 0.3311789631843567, "learning_rate": 4.183006535947713e-06, "loss": 0.5632, "step": 64 }, { "epoch": 0.12778505897771952, "grad_norm": 0.34246182441711426, "learning_rate": 4.2483660130718954e-06, "loss": 0.6082, "step": 65 }, { "epoch": 0.12975098296199214, "grad_norm": 0.295360803604126, "learning_rate": 4.313725490196079e-06, "loss": 0.5482, "step": 66 }, { "epoch": 0.13171690694626476, "grad_norm": 0.3409435451030731, "learning_rate": 4.379084967320262e-06, "loss": 0.5675, "step": 67 }, { "epoch": 0.13368283093053734, "grad_norm": 0.34775587916374207, "learning_rate": 4.444444444444444e-06, "loss": 0.5521, "step": 68 }, { "epoch": 0.13564875491480996, "grad_norm": 0.3222702741622925, "learning_rate": 4.509803921568628e-06, "loss": 0.5765, "step": 69 }, { "epoch": 0.13761467889908258, "grad_norm": 0.2791072130203247, "learning_rate": 4.5751633986928105e-06, "loss": 0.5511, "step": 70 }, { "epoch": 0.13958060288335516, "grad_norm": 0.3028191030025482, "learning_rate": 4.640522875816994e-06, "loss": 0.5749, "step": 71 }, { "epoch": 0.14154652686762778, "grad_norm": 0.28149256110191345, "learning_rate": 4.705882352941177e-06, "loss": 0.5296, "step": 72 }, { "epoch": 0.1435124508519004, "grad_norm": 0.3133794665336609, "learning_rate": 4.77124183006536e-06, "loss": 0.6033, "step": 73 }, { "epoch": 0.145478374836173, "grad_norm": 0.2812778949737549, "learning_rate": 4.836601307189543e-06, "loss": 0.5632, "step": 74 }, { "epoch": 0.1474442988204456, "grad_norm": 0.30257362127304077, "learning_rate": 4.901960784313726e-06, "loss": 0.5605, "step": 75 }, { "epoch": 0.14941022280471822, "grad_norm": 0.25866395235061646, "learning_rate": 4.967320261437909e-06, "loss": 0.5735, "step": 76 }, { "epoch": 0.15137614678899083, "grad_norm": 0.28107836842536926, "learning_rate": 5.032679738562092e-06, "loss": 0.5742, "step": 77 }, { "epoch": 0.15334207077326342, "grad_norm": 0.29915159940719604, "learning_rate": 5.098039215686274e-06, "loss": 0.5426, "step": 78 }, { "epoch": 0.15530799475753604, "grad_norm": 0.2864636778831482, "learning_rate": 5.163398692810458e-06, "loss": 0.5499, "step": 79 }, { "epoch": 0.15727391874180865, "grad_norm": 0.31190046668052673, "learning_rate": 5.2287581699346416e-06, "loss": 0.547, "step": 80 }, { "epoch": 0.15923984272608127, "grad_norm": 0.30313733220100403, "learning_rate": 5.294117647058824e-06, "loss": 0.5517, "step": 81 }, { "epoch": 0.16120576671035386, "grad_norm": 0.2760246694087982, "learning_rate": 5.359477124183007e-06, "loss": 0.5477, "step": 82 }, { "epoch": 0.16317169069462648, "grad_norm": 0.3234259784221649, "learning_rate": 5.4248366013071894e-06, "loss": 0.5667, "step": 83 }, { "epoch": 0.1651376146788991, "grad_norm": 0.32582947611808777, "learning_rate": 5.4901960784313735e-06, "loss": 0.5612, "step": 84 }, { "epoch": 0.16710353866317168, "grad_norm": 0.29849186539649963, "learning_rate": 5.555555555555557e-06, "loss": 0.5855, "step": 85 }, { "epoch": 0.1690694626474443, "grad_norm": 0.3021292984485626, "learning_rate": 5.620915032679739e-06, "loss": 0.5514, "step": 86 }, { "epoch": 0.1710353866317169, "grad_norm": 0.30864256620407104, "learning_rate": 5.686274509803922e-06, "loss": 0.5377, "step": 87 }, { "epoch": 0.17300131061598953, "grad_norm": 0.28641265630722046, "learning_rate": 5.7516339869281045e-06, "loss": 0.5208, "step": 88 }, { "epoch": 0.17496723460026212, "grad_norm": 0.307472825050354, "learning_rate": 5.816993464052289e-06, "loss": 0.5354, "step": 89 }, { "epoch": 0.17693315858453473, "grad_norm": 0.3084348440170288, "learning_rate": 5.882352941176471e-06, "loss": 0.5328, "step": 90 }, { "epoch": 0.17889908256880735, "grad_norm": 0.3037461042404175, "learning_rate": 5.947712418300654e-06, "loss": 0.5287, "step": 91 }, { "epoch": 0.18086500655307994, "grad_norm": 0.28996726870536804, "learning_rate": 6.0130718954248365e-06, "loss": 0.5582, "step": 92 }, { "epoch": 0.18283093053735255, "grad_norm": 0.3222200572490692, "learning_rate": 6.07843137254902e-06, "loss": 0.5666, "step": 93 }, { "epoch": 0.18479685452162517, "grad_norm": 0.2793603837490082, "learning_rate": 6.143790849673204e-06, "loss": 0.5416, "step": 94 }, { "epoch": 0.18676277850589776, "grad_norm": 0.2899613380432129, "learning_rate": 6.209150326797386e-06, "loss": 0.5213, "step": 95 }, { "epoch": 0.18872870249017037, "grad_norm": 0.3074812591075897, "learning_rate": 6.274509803921569e-06, "loss": 0.5353, "step": 96 }, { "epoch": 0.190694626474443, "grad_norm": 0.3134816586971283, "learning_rate": 6.3398692810457515e-06, "loss": 0.5232, "step": 97 }, { "epoch": 0.1926605504587156, "grad_norm": 0.3031688630580902, "learning_rate": 6.405228758169935e-06, "loss": 0.5484, "step": 98 }, { "epoch": 0.1946264744429882, "grad_norm": 0.2818717956542969, "learning_rate": 6.470588235294119e-06, "loss": 0.5255, "step": 99 }, { "epoch": 0.1965923984272608, "grad_norm": 0.2932426333427429, "learning_rate": 6.535947712418301e-06, "loss": 0.5307, "step": 100 }, { "epoch": 0.19855832241153343, "grad_norm": 0.2900339663028717, "learning_rate": 6.601307189542484e-06, "loss": 0.5319, "step": 101 }, { "epoch": 0.20052424639580602, "grad_norm": 0.2933003902435303, "learning_rate": 6.666666666666667e-06, "loss": 0.5313, "step": 102 }, { "epoch": 0.20249017038007863, "grad_norm": 0.3101281523704529, "learning_rate": 6.732026143790851e-06, "loss": 0.5337, "step": 103 }, { "epoch": 0.20445609436435125, "grad_norm": 0.2710820138454437, "learning_rate": 6.797385620915034e-06, "loss": 0.5459, "step": 104 }, { "epoch": 0.20642201834862386, "grad_norm": 0.31979817152023315, "learning_rate": 6.862745098039216e-06, "loss": 0.5247, "step": 105 }, { "epoch": 0.20838794233289645, "grad_norm": 0.2835678160190582, "learning_rate": 6.928104575163399e-06, "loss": 0.5182, "step": 106 }, { "epoch": 0.21035386631716907, "grad_norm": 0.4005821645259857, "learning_rate": 6.993464052287582e-06, "loss": 0.5273, "step": 107 }, { "epoch": 0.21231979030144169, "grad_norm": 0.30453720688819885, "learning_rate": 7.058823529411766e-06, "loss": 0.5205, "step": 108 }, { "epoch": 0.21428571428571427, "grad_norm": 0.3164799213409424, "learning_rate": 7.124183006535948e-06, "loss": 0.5297, "step": 109 }, { "epoch": 0.2162516382699869, "grad_norm": 0.3417559862136841, "learning_rate": 7.189542483660131e-06, "loss": 0.5178, "step": 110 }, { "epoch": 0.2182175622542595, "grad_norm": 0.28603699803352356, "learning_rate": 7.2549019607843145e-06, "loss": 0.5394, "step": 111 }, { "epoch": 0.22018348623853212, "grad_norm": 0.41378653049468994, "learning_rate": 7.320261437908497e-06, "loss": 0.5381, "step": 112 }, { "epoch": 0.2221494102228047, "grad_norm": 0.31651490926742554, "learning_rate": 7.385620915032681e-06, "loss": 0.5334, "step": 113 }, { "epoch": 0.22411533420707733, "grad_norm": 0.44894519448280334, "learning_rate": 7.450980392156863e-06, "loss": 0.536, "step": 114 }, { "epoch": 0.22608125819134994, "grad_norm": 0.3745504319667816, "learning_rate": 7.516339869281046e-06, "loss": 0.5289, "step": 115 }, { "epoch": 0.22804718217562253, "grad_norm": 0.3731832206249237, "learning_rate": 7.581699346405229e-06, "loss": 0.5122, "step": 116 }, { "epoch": 0.23001310615989515, "grad_norm": 0.37562766671180725, "learning_rate": 7.647058823529411e-06, "loss": 0.5206, "step": 117 }, { "epoch": 0.23197903014416776, "grad_norm": 0.32179543375968933, "learning_rate": 7.712418300653596e-06, "loss": 0.5253, "step": 118 }, { "epoch": 0.23394495412844038, "grad_norm": 0.46252328157424927, "learning_rate": 7.77777777777778e-06, "loss": 0.5188, "step": 119 }, { "epoch": 0.23591087811271297, "grad_norm": 0.29633471369743347, "learning_rate": 7.84313725490196e-06, "loss": 0.5129, "step": 120 }, { "epoch": 0.23787680209698558, "grad_norm": 0.39629998803138733, "learning_rate": 7.908496732026144e-06, "loss": 0.5448, "step": 121 }, { "epoch": 0.2398427260812582, "grad_norm": 0.30213412642478943, "learning_rate": 7.973856209150329e-06, "loss": 0.5174, "step": 122 }, { "epoch": 0.2418086500655308, "grad_norm": 0.46073678135871887, "learning_rate": 8.03921568627451e-06, "loss": 0.5264, "step": 123 }, { "epoch": 0.2437745740498034, "grad_norm": 0.3474709093570709, "learning_rate": 8.104575163398693e-06, "loss": 0.5149, "step": 124 }, { "epoch": 0.24574049803407602, "grad_norm": 0.37764114141464233, "learning_rate": 8.169934640522877e-06, "loss": 0.5174, "step": 125 }, { "epoch": 0.24770642201834864, "grad_norm": 0.3189232647418976, "learning_rate": 8.23529411764706e-06, "loss": 0.5257, "step": 126 }, { "epoch": 0.24967234600262123, "grad_norm": 0.3779374063014984, "learning_rate": 8.300653594771243e-06, "loss": 0.5307, "step": 127 }, { "epoch": 0.2516382699868938, "grad_norm": 0.2898067235946655, "learning_rate": 8.366013071895426e-06, "loss": 0.5098, "step": 128 }, { "epoch": 0.25360419397116646, "grad_norm": 0.3959129750728607, "learning_rate": 8.43137254901961e-06, "loss": 0.529, "step": 129 }, { "epoch": 0.25557011795543905, "grad_norm": 0.33046963810920715, "learning_rate": 8.496732026143791e-06, "loss": 0.5165, "step": 130 }, { "epoch": 0.2575360419397117, "grad_norm": 0.3353389799594879, "learning_rate": 8.562091503267974e-06, "loss": 0.5178, "step": 131 }, { "epoch": 0.2595019659239843, "grad_norm": 0.36417704820632935, "learning_rate": 8.627450980392157e-06, "loss": 0.5153, "step": 132 }, { "epoch": 0.26146788990825687, "grad_norm": 0.3149842321872711, "learning_rate": 8.69281045751634e-06, "loss": 0.5339, "step": 133 }, { "epoch": 0.2634338138925295, "grad_norm": 0.40793469548225403, "learning_rate": 8.758169934640524e-06, "loss": 0.5169, "step": 134 }, { "epoch": 0.2653997378768021, "grad_norm": 0.3299751877784729, "learning_rate": 8.823529411764707e-06, "loss": 0.5399, "step": 135 }, { "epoch": 0.2673656618610747, "grad_norm": 0.34630492329597473, "learning_rate": 8.888888888888888e-06, "loss": 0.525, "step": 136 }, { "epoch": 0.26933158584534733, "grad_norm": 0.3971831500530243, "learning_rate": 8.954248366013073e-06, "loss": 0.5317, "step": 137 }, { "epoch": 0.2712975098296199, "grad_norm": 0.3247168958187103, "learning_rate": 9.019607843137256e-06, "loss": 0.4959, "step": 138 }, { "epoch": 0.2732634338138925, "grad_norm": 0.3679560422897339, "learning_rate": 9.084967320261438e-06, "loss": 0.5231, "step": 139 }, { "epoch": 0.27522935779816515, "grad_norm": 0.37172260880470276, "learning_rate": 9.150326797385621e-06, "loss": 0.5165, "step": 140 }, { "epoch": 0.27719528178243774, "grad_norm": 0.33269190788269043, "learning_rate": 9.215686274509804e-06, "loss": 0.5179, "step": 141 }, { "epoch": 0.27916120576671033, "grad_norm": 0.35121285915374756, "learning_rate": 9.281045751633987e-06, "loss": 0.5173, "step": 142 }, { "epoch": 0.281127129750983, "grad_norm": 0.32511773705482483, "learning_rate": 9.34640522875817e-06, "loss": 0.5253, "step": 143 }, { "epoch": 0.28309305373525556, "grad_norm": 0.34771692752838135, "learning_rate": 9.411764705882354e-06, "loss": 0.5297, "step": 144 }, { "epoch": 0.2850589777195282, "grad_norm": 0.280225932598114, "learning_rate": 9.477124183006537e-06, "loss": 0.5309, "step": 145 }, { "epoch": 0.2870249017038008, "grad_norm": 0.3082393407821655, "learning_rate": 9.54248366013072e-06, "loss": 0.5029, "step": 146 }, { "epoch": 0.2889908256880734, "grad_norm": 0.3622486889362335, "learning_rate": 9.607843137254903e-06, "loss": 0.5293, "step": 147 }, { "epoch": 0.290956749672346, "grad_norm": 0.3383775055408478, "learning_rate": 9.673202614379087e-06, "loss": 0.5041, "step": 148 }, { "epoch": 0.2929226736566186, "grad_norm": 0.427328497171402, "learning_rate": 9.738562091503268e-06, "loss": 0.5209, "step": 149 }, { "epoch": 0.2948885976408912, "grad_norm": 0.29876646399497986, "learning_rate": 9.803921568627451e-06, "loss": 0.505, "step": 150 }, { "epoch": 0.29685452162516385, "grad_norm": 0.4383123815059662, "learning_rate": 9.869281045751634e-06, "loss": 0.5321, "step": 151 }, { "epoch": 0.29882044560943644, "grad_norm": 0.40653786063194275, "learning_rate": 9.934640522875818e-06, "loss": 0.5166, "step": 152 }, { "epoch": 0.300786369593709, "grad_norm": 0.35615018010139465, "learning_rate": 1e-05, "loss": 0.5018, "step": 153 }, { "epoch": 0.30275229357798167, "grad_norm": 0.3174058198928833, "learning_rate": 9.999986873025113e-06, "loss": 0.4764, "step": 154 }, { "epoch": 0.30471821756225426, "grad_norm": 0.363629549741745, "learning_rate": 9.999947492169374e-06, "loss": 0.5066, "step": 155 }, { "epoch": 0.30668414154652685, "grad_norm": 0.3160753846168518, "learning_rate": 9.999881857639567e-06, "loss": 0.514, "step": 156 }, { "epoch": 0.3086500655307995, "grad_norm": 0.3479490876197815, "learning_rate": 9.999789969780322e-06, "loss": 0.5266, "step": 157 }, { "epoch": 0.3106159895150721, "grad_norm": 0.37828707695007324, "learning_rate": 9.999671829074127e-06, "loss": 0.5034, "step": 158 }, { "epoch": 0.31258191349934467, "grad_norm": 0.35347092151641846, "learning_rate": 9.999527436141312e-06, "loss": 0.5032, "step": 159 }, { "epoch": 0.3145478374836173, "grad_norm": 0.3980875611305237, "learning_rate": 9.999356791740052e-06, "loss": 0.5113, "step": 160 }, { "epoch": 0.3165137614678899, "grad_norm": 0.3677966892719269, "learning_rate": 9.999159896766368e-06, "loss": 0.5131, "step": 161 }, { "epoch": 0.31847968545216254, "grad_norm": 0.33659663796424866, "learning_rate": 9.998936752254111e-06, "loss": 0.4874, "step": 162 }, { "epoch": 0.32044560943643513, "grad_norm": 0.3887609541416168, "learning_rate": 9.99868735937497e-06, "loss": 0.5195, "step": 163 }, { "epoch": 0.3224115334207077, "grad_norm": 0.4096239507198334, "learning_rate": 9.998411719438451e-06, "loss": 0.5047, "step": 164 }, { "epoch": 0.32437745740498036, "grad_norm": 0.39740556478500366, "learning_rate": 9.998109833891883e-06, "loss": 0.5211, "step": 165 }, { "epoch": 0.32634338138925295, "grad_norm": 0.3286636173725128, "learning_rate": 9.997781704320404e-06, "loss": 0.5267, "step": 166 }, { "epoch": 0.32830930537352554, "grad_norm": 0.37281227111816406, "learning_rate": 9.997427332446953e-06, "loss": 0.5271, "step": 167 }, { "epoch": 0.3302752293577982, "grad_norm": 0.40283092856407166, "learning_rate": 9.997046720132262e-06, "loss": 0.5109, "step": 168 }, { "epoch": 0.33224115334207077, "grad_norm": 0.3566778302192688, "learning_rate": 9.996639869374844e-06, "loss": 0.5119, "step": 169 }, { "epoch": 0.33420707732634336, "grad_norm": 0.4530947804450989, "learning_rate": 9.996206782310993e-06, "loss": 0.4852, "step": 170 }, { "epoch": 0.336173001310616, "grad_norm": 0.5231772065162659, "learning_rate": 9.995747461214752e-06, "loss": 0.5207, "step": 171 }, { "epoch": 0.3381389252948886, "grad_norm": 0.368431955575943, "learning_rate": 9.995261908497924e-06, "loss": 0.4918, "step": 172 }, { "epoch": 0.3401048492791612, "grad_norm": 0.5586115717887878, "learning_rate": 9.994750126710042e-06, "loss": 0.5079, "step": 173 }, { "epoch": 0.3420707732634338, "grad_norm": 0.39342769980430603, "learning_rate": 9.994212118538364e-06, "loss": 0.5113, "step": 174 }, { "epoch": 0.3440366972477064, "grad_norm": 0.5048351287841797, "learning_rate": 9.99364788680786e-06, "loss": 0.5225, "step": 175 }, { "epoch": 0.34600262123197906, "grad_norm": 0.34715694189071655, "learning_rate": 9.99305743448119e-06, "loss": 0.5115, "step": 176 }, { "epoch": 0.34796854521625165, "grad_norm": 0.48474031686782837, "learning_rate": 9.992440764658697e-06, "loss": 0.5279, "step": 177 }, { "epoch": 0.34993446920052423, "grad_norm": 0.37137722969055176, "learning_rate": 9.991797880578386e-06, "loss": 0.5229, "step": 178 }, { "epoch": 0.3519003931847969, "grad_norm": 0.37795570492744446, "learning_rate": 9.991128785615903e-06, "loss": 0.5286, "step": 179 }, { "epoch": 0.35386631716906947, "grad_norm": 0.3694796562194824, "learning_rate": 9.990433483284527e-06, "loss": 0.496, "step": 180 }, { "epoch": 0.35583224115334205, "grad_norm": 0.326913058757782, "learning_rate": 9.989711977235144e-06, "loss": 0.5149, "step": 181 }, { "epoch": 0.3577981651376147, "grad_norm": 0.3359353840351105, "learning_rate": 9.98896427125623e-06, "loss": 0.5189, "step": 182 }, { "epoch": 0.3597640891218873, "grad_norm": 0.3222823143005371, "learning_rate": 9.988190369273834e-06, "loss": 0.5131, "step": 183 }, { "epoch": 0.3617300131061599, "grad_norm": 0.36565500497817993, "learning_rate": 9.987390275351549e-06, "loss": 0.4966, "step": 184 }, { "epoch": 0.3636959370904325, "grad_norm": 0.37140581011772156, "learning_rate": 9.986563993690506e-06, "loss": 0.5059, "step": 185 }, { "epoch": 0.3656618610747051, "grad_norm": 0.39855438470840454, "learning_rate": 9.985711528629332e-06, "loss": 0.5185, "step": 186 }, { "epoch": 0.3676277850589777, "grad_norm": 0.4295189678668976, "learning_rate": 9.984832884644141e-06, "loss": 0.4974, "step": 187 }, { "epoch": 0.36959370904325034, "grad_norm": 0.32439813017845154, "learning_rate": 9.983928066348514e-06, "loss": 0.5009, "step": 188 }, { "epoch": 0.37155963302752293, "grad_norm": 0.4018779695034027, "learning_rate": 9.982997078493457e-06, "loss": 0.4995, "step": 189 }, { "epoch": 0.3735255570117955, "grad_norm": 0.31894901394844055, "learning_rate": 9.98203992596739e-06, "loss": 0.4826, "step": 190 }, { "epoch": 0.37549148099606816, "grad_norm": 0.3858886957168579, "learning_rate": 9.981056613796125e-06, "loss": 0.4977, "step": 191 }, { "epoch": 0.37745740498034075, "grad_norm": 0.42188769578933716, "learning_rate": 9.980047147142824e-06, "loss": 0.5067, "step": 192 }, { "epoch": 0.3794233289646134, "grad_norm": 0.3689168095588684, "learning_rate": 9.979011531307986e-06, "loss": 0.4952, "step": 193 }, { "epoch": 0.381389252948886, "grad_norm": 0.40118807554244995, "learning_rate": 9.977949771729411e-06, "loss": 0.4987, "step": 194 }, { "epoch": 0.38335517693315857, "grad_norm": 0.33744242787361145, "learning_rate": 9.976861873982177e-06, "loss": 0.5017, "step": 195 }, { "epoch": 0.3853211009174312, "grad_norm": 0.3832445740699768, "learning_rate": 9.975747843778606e-06, "loss": 0.4903, "step": 196 }, { "epoch": 0.3872870249017038, "grad_norm": 0.3638055622577667, "learning_rate": 9.974607686968239e-06, "loss": 0.499, "step": 197 }, { "epoch": 0.3892529488859764, "grad_norm": 0.3829182982444763, "learning_rate": 9.973441409537795e-06, "loss": 0.5031, "step": 198 }, { "epoch": 0.39121887287024903, "grad_norm": 0.3455500304698944, "learning_rate": 9.972249017611153e-06, "loss": 0.5183, "step": 199 }, { "epoch": 0.3931847968545216, "grad_norm": 0.36498287320137024, "learning_rate": 9.971030517449314e-06, "loss": 0.5076, "step": 200 }, { "epoch": 0.3951507208387942, "grad_norm": 0.3443746864795685, "learning_rate": 9.969785915450368e-06, "loss": 0.4911, "step": 201 }, { "epoch": 0.39711664482306686, "grad_norm": 0.3428663909435272, "learning_rate": 9.968515218149455e-06, "loss": 0.5154, "step": 202 }, { "epoch": 0.39908256880733944, "grad_norm": 0.34340935945510864, "learning_rate": 9.967218432218742e-06, "loss": 0.4961, "step": 203 }, { "epoch": 0.40104849279161203, "grad_norm": 0.3325728178024292, "learning_rate": 9.965895564467381e-06, "loss": 0.5166, "step": 204 }, { "epoch": 0.4030144167758847, "grad_norm": 0.29527515172958374, "learning_rate": 9.96454662184147e-06, "loss": 0.4952, "step": 205 }, { "epoch": 0.40498034076015726, "grad_norm": 0.3524538576602936, "learning_rate": 9.963171611424022e-06, "loss": 0.4947, "step": 206 }, { "epoch": 0.4069462647444299, "grad_norm": 0.32383114099502563, "learning_rate": 9.961770540434931e-06, "loss": 0.472, "step": 207 }, { "epoch": 0.4089121887287025, "grad_norm": 0.3288062810897827, "learning_rate": 9.960343416230926e-06, "loss": 0.5072, "step": 208 }, { "epoch": 0.4108781127129751, "grad_norm": 0.34645599126815796, "learning_rate": 9.958890246305534e-06, "loss": 0.5052, "step": 209 }, { "epoch": 0.41284403669724773, "grad_norm": 0.3500404953956604, "learning_rate": 9.95741103828905e-06, "loss": 0.4891, "step": 210 }, { "epoch": 0.4148099606815203, "grad_norm": 0.3329164683818817, "learning_rate": 9.95590579994848e-06, "loss": 0.4768, "step": 211 }, { "epoch": 0.4167758846657929, "grad_norm": 0.34561702609062195, "learning_rate": 9.954374539187516e-06, "loss": 0.5009, "step": 212 }, { "epoch": 0.41874180865006555, "grad_norm": 0.30753183364868164, "learning_rate": 9.952817264046486e-06, "loss": 0.5008, "step": 213 }, { "epoch": 0.42070773263433814, "grad_norm": 0.30183136463165283, "learning_rate": 9.951233982702315e-06, "loss": 0.4982, "step": 214 }, { "epoch": 0.4226736566186107, "grad_norm": 0.3075961768627167, "learning_rate": 9.949624703468482e-06, "loss": 0.4795, "step": 215 }, { "epoch": 0.42463958060288337, "grad_norm": 0.32029989361763, "learning_rate": 9.947989434794973e-06, "loss": 0.5324, "step": 216 }, { "epoch": 0.42660550458715596, "grad_norm": 0.32862916588783264, "learning_rate": 9.946328185268242e-06, "loss": 0.4994, "step": 217 }, { "epoch": 0.42857142857142855, "grad_norm": 0.3346083462238312, "learning_rate": 9.944640963611156e-06, "loss": 0.4903, "step": 218 }, { "epoch": 0.4305373525557012, "grad_norm": 0.33467158675193787, "learning_rate": 9.942927778682968e-06, "loss": 0.4751, "step": 219 }, { "epoch": 0.4325032765399738, "grad_norm": 0.2937348186969757, "learning_rate": 9.94118863947925e-06, "loss": 0.4885, "step": 220 }, { "epoch": 0.43446920052424637, "grad_norm": 0.3598921000957489, "learning_rate": 9.939423555131856e-06, "loss": 0.5071, "step": 221 }, { "epoch": 0.436435124508519, "grad_norm": 0.2920673191547394, "learning_rate": 9.937632534908872e-06, "loss": 0.485, "step": 222 }, { "epoch": 0.4384010484927916, "grad_norm": 0.32336753606796265, "learning_rate": 9.935815588214573e-06, "loss": 0.4877, "step": 223 }, { "epoch": 0.44036697247706424, "grad_norm": 0.30055752396583557, "learning_rate": 9.93397272458936e-06, "loss": 0.4767, "step": 224 }, { "epoch": 0.44233289646133683, "grad_norm": 0.30270230770111084, "learning_rate": 9.932103953709724e-06, "loss": 0.5157, "step": 225 }, { "epoch": 0.4442988204456094, "grad_norm": 0.30203357338905334, "learning_rate": 9.93020928538819e-06, "loss": 0.4799, "step": 226 }, { "epoch": 0.44626474442988207, "grad_norm": 0.32589325308799744, "learning_rate": 9.928288729573264e-06, "loss": 0.4841, "step": 227 }, { "epoch": 0.44823066841415465, "grad_norm": 0.2911665141582489, "learning_rate": 9.926342296349378e-06, "loss": 0.4773, "step": 228 }, { "epoch": 0.45019659239842724, "grad_norm": 0.3272002041339874, "learning_rate": 9.924369995936846e-06, "loss": 0.4848, "step": 229 }, { "epoch": 0.4521625163826999, "grad_norm": 0.2837628722190857, "learning_rate": 9.922371838691803e-06, "loss": 0.4823, "step": 230 }, { "epoch": 0.4541284403669725, "grad_norm": 0.306533545255661, "learning_rate": 9.920347835106152e-06, "loss": 0.4891, "step": 231 }, { "epoch": 0.45609436435124506, "grad_norm": 0.3979865610599518, "learning_rate": 9.918297995807511e-06, "loss": 0.4827, "step": 232 }, { "epoch": 0.4580602883355177, "grad_norm": 0.28756773471832275, "learning_rate": 9.916222331559158e-06, "loss": 0.5067, "step": 233 }, { "epoch": 0.4600262123197903, "grad_norm": 0.44354233145713806, "learning_rate": 9.914120853259968e-06, "loss": 0.4736, "step": 234 }, { "epoch": 0.4619921363040629, "grad_norm": 0.3389144241809845, "learning_rate": 9.911993571944361e-06, "loss": 0.4674, "step": 235 }, { "epoch": 0.4639580602883355, "grad_norm": 0.40141353011131287, "learning_rate": 9.909840498782246e-06, "loss": 0.4988, "step": 236 }, { "epoch": 0.4659239842726081, "grad_norm": 0.3574008345603943, "learning_rate": 9.90766164507896e-06, "loss": 0.4823, "step": 237 }, { "epoch": 0.46788990825688076, "grad_norm": 0.32266223430633545, "learning_rate": 9.9054570222752e-06, "loss": 0.4858, "step": 238 }, { "epoch": 0.46985583224115335, "grad_norm": 0.34144648909568787, "learning_rate": 9.903226641946982e-06, "loss": 0.4908, "step": 239 }, { "epoch": 0.47182175622542594, "grad_norm": 0.30944961309432983, "learning_rate": 9.900970515805564e-06, "loss": 0.4907, "step": 240 }, { "epoch": 0.4737876802096986, "grad_norm": 0.38036513328552246, "learning_rate": 9.89868865569739e-06, "loss": 0.5004, "step": 241 }, { "epoch": 0.47575360419397117, "grad_norm": 0.3423393964767456, "learning_rate": 9.89638107360403e-06, "loss": 0.4859, "step": 242 }, { "epoch": 0.47771952817824376, "grad_norm": 0.38603687286376953, "learning_rate": 9.89404778164211e-06, "loss": 0.4858, "step": 243 }, { "epoch": 0.4796854521625164, "grad_norm": 0.2823747992515564, "learning_rate": 9.891688792063254e-06, "loss": 0.481, "step": 244 }, { "epoch": 0.481651376146789, "grad_norm": 0.39549919962882996, "learning_rate": 9.889304117254027e-06, "loss": 0.5326, "step": 245 }, { "epoch": 0.4836173001310616, "grad_norm": 0.3132837116718292, "learning_rate": 9.886893769735852e-06, "loss": 0.4933, "step": 246 }, { "epoch": 0.4855832241153342, "grad_norm": 0.36471930146217346, "learning_rate": 9.884457762164958e-06, "loss": 0.4974, "step": 247 }, { "epoch": 0.4875491480996068, "grad_norm": 0.32906249165534973, "learning_rate": 9.881996107332307e-06, "loss": 0.4802, "step": 248 }, { "epoch": 0.4895150720838794, "grad_norm": 0.3176659047603607, "learning_rate": 9.879508818163536e-06, "loss": 0.4793, "step": 249 }, { "epoch": 0.49148099606815204, "grad_norm": 0.32096564769744873, "learning_rate": 9.876995907718875e-06, "loss": 0.492, "step": 250 }, { "epoch": 0.49344692005242463, "grad_norm": 0.3479171693325043, "learning_rate": 9.87445738919309e-06, "loss": 0.4985, "step": 251 }, { "epoch": 0.4954128440366973, "grad_norm": 0.3736298978328705, "learning_rate": 9.871893275915408e-06, "loss": 0.4944, "step": 252 }, { "epoch": 0.49737876802096986, "grad_norm": 0.2852229177951813, "learning_rate": 9.869303581349448e-06, "loss": 0.4613, "step": 253 }, { "epoch": 0.49934469200524245, "grad_norm": 0.3456667363643646, "learning_rate": 9.866688319093155e-06, "loss": 0.4977, "step": 254 }, { "epoch": 0.5013106159895151, "grad_norm": 0.27879470586776733, "learning_rate": 9.864047502878717e-06, "loss": 0.4823, "step": 255 }, { "epoch": 0.5032765399737876, "grad_norm": 0.3423708975315094, "learning_rate": 9.86138114657251e-06, "loss": 0.4771, "step": 256 }, { "epoch": 0.5052424639580603, "grad_norm": 0.3409838080406189, "learning_rate": 9.858689264175012e-06, "loss": 0.4648, "step": 257 }, { "epoch": 0.5072083879423329, "grad_norm": 0.32440969347953796, "learning_rate": 9.855971869820726e-06, "loss": 0.4696, "step": 258 }, { "epoch": 0.5091743119266054, "grad_norm": 0.32682913541793823, "learning_rate": 9.853228977778125e-06, "loss": 0.4853, "step": 259 }, { "epoch": 0.5111402359108781, "grad_norm": 0.38436365127563477, "learning_rate": 9.850460602449555e-06, "loss": 0.4838, "step": 260 }, { "epoch": 0.5131061598951507, "grad_norm": 0.33962368965148926, "learning_rate": 9.847666758371175e-06, "loss": 0.5017, "step": 261 }, { "epoch": 0.5150720838794234, "grad_norm": 0.3401210606098175, "learning_rate": 9.844847460212875e-06, "loss": 0.4689, "step": 262 }, { "epoch": 0.5170380078636959, "grad_norm": 0.3694901168346405, "learning_rate": 9.842002722778191e-06, "loss": 0.4848, "step": 263 }, { "epoch": 0.5190039318479686, "grad_norm": 0.3879911005496979, "learning_rate": 9.83913256100425e-06, "loss": 0.4919, "step": 264 }, { "epoch": 0.5209698558322412, "grad_norm": 0.3491765856742859, "learning_rate": 9.836236989961665e-06, "loss": 0.5028, "step": 265 }, { "epoch": 0.5229357798165137, "grad_norm": 0.384046345949173, "learning_rate": 9.83331602485447e-06, "loss": 0.4799, "step": 266 }, { "epoch": 0.5249017038007864, "grad_norm": 0.35017964243888855, "learning_rate": 9.830369681020043e-06, "loss": 0.4887, "step": 267 }, { "epoch": 0.526867627785059, "grad_norm": 0.3668372929096222, "learning_rate": 9.827397973929012e-06, "loss": 0.4781, "step": 268 }, { "epoch": 0.5288335517693316, "grad_norm": 0.3488873243331909, "learning_rate": 9.82440091918519e-06, "loss": 0.4976, "step": 269 }, { "epoch": 0.5307994757536042, "grad_norm": 0.3492840528488159, "learning_rate": 9.821378532525479e-06, "loss": 0.4939, "step": 270 }, { "epoch": 0.5327653997378768, "grad_norm": 0.3887472450733185, "learning_rate": 9.8183308298198e-06, "loss": 0.5051, "step": 271 }, { "epoch": 0.5347313237221494, "grad_norm": 0.41635629534721375, "learning_rate": 9.815257827071e-06, "loss": 0.4955, "step": 272 }, { "epoch": 0.536697247706422, "grad_norm": 0.31790298223495483, "learning_rate": 9.812159540414766e-06, "loss": 0.4727, "step": 273 }, { "epoch": 0.5386631716906947, "grad_norm": 0.35766008496284485, "learning_rate": 9.809035986119553e-06, "loss": 0.4765, "step": 274 }, { "epoch": 0.5406290956749672, "grad_norm": 0.34865373373031616, "learning_rate": 9.80588718058649e-06, "loss": 0.4889, "step": 275 }, { "epoch": 0.5425950196592398, "grad_norm": 0.32705819606781006, "learning_rate": 9.802713140349294e-06, "loss": 0.5019, "step": 276 }, { "epoch": 0.5445609436435125, "grad_norm": 0.32511982321739197, "learning_rate": 9.799513882074182e-06, "loss": 0.4805, "step": 277 }, { "epoch": 0.546526867627785, "grad_norm": 0.3440779149532318, "learning_rate": 9.796289422559788e-06, "loss": 0.4611, "step": 278 }, { "epoch": 0.5484927916120577, "grad_norm": 0.3183061182498932, "learning_rate": 9.79303977873707e-06, "loss": 0.4733, "step": 279 }, { "epoch": 0.5504587155963303, "grad_norm": 0.33301064372062683, "learning_rate": 9.789764967669225e-06, "loss": 0.4995, "step": 280 }, { "epoch": 0.5524246395806028, "grad_norm": 0.3093130588531494, "learning_rate": 9.786465006551601e-06, "loss": 0.4795, "step": 281 }, { "epoch": 0.5543905635648755, "grad_norm": 0.31512442231178284, "learning_rate": 9.783139912711597e-06, "loss": 0.4864, "step": 282 }, { "epoch": 0.5563564875491481, "grad_norm": 0.3267965614795685, "learning_rate": 9.779789703608586e-06, "loss": 0.4891, "step": 283 }, { "epoch": 0.5583224115334207, "grad_norm": 0.30504485964775085, "learning_rate": 9.776414396833811e-06, "loss": 0.4658, "step": 284 }, { "epoch": 0.5602883355176933, "grad_norm": 0.33676230907440186, "learning_rate": 9.773014010110298e-06, "loss": 0.5032, "step": 285 }, { "epoch": 0.562254259501966, "grad_norm": 0.33661526441574097, "learning_rate": 9.769588561292762e-06, "loss": 0.4791, "step": 286 }, { "epoch": 0.5642201834862385, "grad_norm": 0.310126394033432, "learning_rate": 9.766138068367519e-06, "loss": 0.5007, "step": 287 }, { "epoch": 0.5661861074705111, "grad_norm": 0.35674116015434265, "learning_rate": 9.76266254945238e-06, "loss": 0.4751, "step": 288 }, { "epoch": 0.5681520314547838, "grad_norm": 0.3606201112270355, "learning_rate": 9.759162022796566e-06, "loss": 0.49, "step": 289 }, { "epoch": 0.5701179554390564, "grad_norm": 0.445045530796051, "learning_rate": 9.755636506780606e-06, "loss": 0.4909, "step": 290 }, { "epoch": 0.572083879423329, "grad_norm": 0.36715635657310486, "learning_rate": 9.752086019916246e-06, "loss": 0.5007, "step": 291 }, { "epoch": 0.5740498034076016, "grad_norm": 0.3720010221004486, "learning_rate": 9.748510580846344e-06, "loss": 0.5019, "step": 292 }, { "epoch": 0.5760157273918742, "grad_norm": 0.38443723320961, "learning_rate": 9.744910208344783e-06, "loss": 0.4956, "step": 293 }, { "epoch": 0.5779816513761468, "grad_norm": 0.3365614414215088, "learning_rate": 9.74128492131636e-06, "loss": 0.4902, "step": 294 }, { "epoch": 0.5799475753604194, "grad_norm": 0.35834208130836487, "learning_rate": 9.737634738796698e-06, "loss": 0.4822, "step": 295 }, { "epoch": 0.581913499344692, "grad_norm": 0.34508103132247925, "learning_rate": 9.733959679952136e-06, "loss": 0.4851, "step": 296 }, { "epoch": 0.5838794233289646, "grad_norm": 0.4581654965877533, "learning_rate": 9.730259764079636e-06, "loss": 0.502, "step": 297 }, { "epoch": 0.5858453473132372, "grad_norm": 0.34432968497276306, "learning_rate": 9.726535010606684e-06, "loss": 0.4866, "step": 298 }, { "epoch": 0.5878112712975099, "grad_norm": 0.43571075797080994, "learning_rate": 9.722785439091172e-06, "loss": 0.4901, "step": 299 }, { "epoch": 0.5897771952817824, "grad_norm": 0.3575364053249359, "learning_rate": 9.719011069221316e-06, "loss": 0.4866, "step": 300 }, { "epoch": 0.591743119266055, "grad_norm": 0.3271622061729431, "learning_rate": 9.715211920815538e-06, "loss": 0.4897, "step": 301 }, { "epoch": 0.5937090432503277, "grad_norm": 0.38964205980300903, "learning_rate": 9.711388013822369e-06, "loss": 0.4778, "step": 302 }, { "epoch": 0.5956749672346002, "grad_norm": 0.3184320330619812, "learning_rate": 9.70753936832034e-06, "loss": 0.5051, "step": 303 }, { "epoch": 0.5976408912188729, "grad_norm": 0.370286226272583, "learning_rate": 9.703666004517884e-06, "loss": 0.5047, "step": 304 }, { "epoch": 0.5996068152031455, "grad_norm": 0.32215046882629395, "learning_rate": 9.699767942753216e-06, "loss": 0.4933, "step": 305 }, { "epoch": 0.601572739187418, "grad_norm": 0.3631575107574463, "learning_rate": 9.695845203494242e-06, "loss": 0.4689, "step": 306 }, { "epoch": 0.6035386631716907, "grad_norm": 0.322794109582901, "learning_rate": 9.691897807338444e-06, "loss": 0.4796, "step": 307 }, { "epoch": 0.6055045871559633, "grad_norm": 0.3302377760410309, "learning_rate": 9.687925775012764e-06, "loss": 0.4847, "step": 308 }, { "epoch": 0.6074705111402359, "grad_norm": 0.3229452669620514, "learning_rate": 9.683929127373514e-06, "loss": 0.4777, "step": 309 }, { "epoch": 0.6094364351245085, "grad_norm": 0.29505693912506104, "learning_rate": 9.679907885406253e-06, "loss": 0.4902, "step": 310 }, { "epoch": 0.6114023591087812, "grad_norm": 0.37603944540023804, "learning_rate": 9.675862070225671e-06, "loss": 0.4877, "step": 311 }, { "epoch": 0.6133682830930537, "grad_norm": 0.3594703674316406, "learning_rate": 9.671791703075502e-06, "loss": 0.4912, "step": 312 }, { "epoch": 0.6153342070773263, "grad_norm": 0.32903850078582764, "learning_rate": 9.667696805328384e-06, "loss": 0.4944, "step": 313 }, { "epoch": 0.617300131061599, "grad_norm": 0.3076882064342499, "learning_rate": 9.663577398485765e-06, "loss": 0.4784, "step": 314 }, { "epoch": 0.6192660550458715, "grad_norm": 0.34379473328590393, "learning_rate": 9.659433504177786e-06, "loss": 0.4872, "step": 315 }, { "epoch": 0.6212319790301442, "grad_norm": 0.3046328127384186, "learning_rate": 9.655265144163164e-06, "loss": 0.4753, "step": 316 }, { "epoch": 0.6231979030144168, "grad_norm": 0.3317876160144806, "learning_rate": 9.651072340329087e-06, "loss": 0.4943, "step": 317 }, { "epoch": 0.6251638269986893, "grad_norm": 0.3519972860813141, "learning_rate": 9.646855114691081e-06, "loss": 0.4808, "step": 318 }, { "epoch": 0.627129750982962, "grad_norm": 0.34271880984306335, "learning_rate": 9.642613489392916e-06, "loss": 0.4735, "step": 319 }, { "epoch": 0.6290956749672346, "grad_norm": 0.31241729855537415, "learning_rate": 9.638347486706474e-06, "loss": 0.4605, "step": 320 }, { "epoch": 0.6310615989515073, "grad_norm": 0.33519551157951355, "learning_rate": 9.63405712903164e-06, "loss": 0.4917, "step": 321 }, { "epoch": 0.6330275229357798, "grad_norm": 0.33583033084869385, "learning_rate": 9.62974243889618e-06, "loss": 0.4685, "step": 322 }, { "epoch": 0.6349934469200524, "grad_norm": 0.33649152517318726, "learning_rate": 9.625403438955625e-06, "loss": 0.47, "step": 323 }, { "epoch": 0.6369593709043251, "grad_norm": 0.36066529154777527, "learning_rate": 9.621040151993153e-06, "loss": 0.4822, "step": 324 }, { "epoch": 0.6389252948885976, "grad_norm": 0.32451003789901733, "learning_rate": 9.616652600919468e-06, "loss": 0.4931, "step": 325 }, { "epoch": 0.6408912188728703, "grad_norm": 0.34756049513816833, "learning_rate": 9.61224080877268e-06, "loss": 0.4645, "step": 326 }, { "epoch": 0.6428571428571429, "grad_norm": 0.3787544071674347, "learning_rate": 9.607804798718182e-06, "loss": 0.463, "step": 327 }, { "epoch": 0.6448230668414154, "grad_norm": 0.3527866005897522, "learning_rate": 9.603344594048526e-06, "loss": 0.4617, "step": 328 }, { "epoch": 0.6467889908256881, "grad_norm": 0.353280633687973, "learning_rate": 9.598860218183318e-06, "loss": 0.4791, "step": 329 }, { "epoch": 0.6487549148099607, "grad_norm": 0.3094800114631653, "learning_rate": 9.59435169466907e-06, "loss": 0.4835, "step": 330 }, { "epoch": 0.6507208387942333, "grad_norm": 0.30767667293548584, "learning_rate": 9.589819047179094e-06, "loss": 0.4698, "step": 331 }, { "epoch": 0.6526867627785059, "grad_norm": 0.3043842315673828, "learning_rate": 9.585262299513366e-06, "loss": 0.4899, "step": 332 }, { "epoch": 0.6546526867627785, "grad_norm": 0.3159531056880951, "learning_rate": 9.580681475598413e-06, "loss": 0.469, "step": 333 }, { "epoch": 0.6566186107470511, "grad_norm": 0.3240654170513153, "learning_rate": 9.576076599487181e-06, "loss": 0.4888, "step": 334 }, { "epoch": 0.6585845347313237, "grad_norm": 0.32407113909721375, "learning_rate": 9.571447695358903e-06, "loss": 0.4726, "step": 335 }, { "epoch": 0.6605504587155964, "grad_norm": 0.33088815212249756, "learning_rate": 9.566794787518986e-06, "loss": 0.4531, "step": 336 }, { "epoch": 0.6625163826998689, "grad_norm": 0.3312188684940338, "learning_rate": 9.56211790039887e-06, "loss": 0.4933, "step": 337 }, { "epoch": 0.6644823066841415, "grad_norm": 0.34961023926734924, "learning_rate": 9.557417058555909e-06, "loss": 0.4705, "step": 338 }, { "epoch": 0.6664482306684142, "grad_norm": 0.3974532186985016, "learning_rate": 9.552692286673231e-06, "loss": 0.4846, "step": 339 }, { "epoch": 0.6684141546526867, "grad_norm": 0.3288216292858124, "learning_rate": 9.547943609559629e-06, "loss": 0.4617, "step": 340 }, { "epoch": 0.6703800786369594, "grad_norm": 0.3328869938850403, "learning_rate": 9.5431710521494e-06, "loss": 0.4801, "step": 341 }, { "epoch": 0.672346002621232, "grad_norm": 0.34561023116111755, "learning_rate": 9.538374639502247e-06, "loss": 0.4926, "step": 342 }, { "epoch": 0.6743119266055045, "grad_norm": 0.30892154574394226, "learning_rate": 9.533554396803123e-06, "loss": 0.4694, "step": 343 }, { "epoch": 0.6762778505897772, "grad_norm": 0.33325186371803284, "learning_rate": 9.528710349362112e-06, "loss": 0.4772, "step": 344 }, { "epoch": 0.6782437745740498, "grad_norm": 0.31278443336486816, "learning_rate": 9.523842522614285e-06, "loss": 0.4896, "step": 345 }, { "epoch": 0.6802096985583224, "grad_norm": 0.3059135377407074, "learning_rate": 9.518950942119582e-06, "loss": 0.5063, "step": 346 }, { "epoch": 0.682175622542595, "grad_norm": 0.400730162858963, "learning_rate": 9.514035633562662e-06, "loss": 0.4876, "step": 347 }, { "epoch": 0.6841415465268676, "grad_norm": 0.3564932942390442, "learning_rate": 9.509096622752781e-06, "loss": 0.4819, "step": 348 }, { "epoch": 0.6861074705111402, "grad_norm": 0.30216097831726074, "learning_rate": 9.504133935623643e-06, "loss": 0.4565, "step": 349 }, { "epoch": 0.6880733944954128, "grad_norm": 0.3681231737136841, "learning_rate": 9.49914759823328e-06, "loss": 0.4619, "step": 350 }, { "epoch": 0.6900393184796855, "grad_norm": 0.2910490334033966, "learning_rate": 9.4941376367639e-06, "loss": 0.4784, "step": 351 }, { "epoch": 0.6920052424639581, "grad_norm": 0.43464285135269165, "learning_rate": 9.489104077521761e-06, "loss": 0.4861, "step": 352 }, { "epoch": 0.6939711664482306, "grad_norm": 0.31047558784484863, "learning_rate": 9.484046946937021e-06, "loss": 0.4656, "step": 353 }, { "epoch": 0.6959370904325033, "grad_norm": 0.352581650018692, "learning_rate": 9.478966271563614e-06, "loss": 0.4823, "step": 354 }, { "epoch": 0.6979030144167759, "grad_norm": 0.3447401523590088, "learning_rate": 9.473862078079098e-06, "loss": 0.4749, "step": 355 }, { "epoch": 0.6998689384010485, "grad_norm": 0.3738485276699066, "learning_rate": 9.468734393284517e-06, "loss": 0.4895, "step": 356 }, { "epoch": 0.7018348623853211, "grad_norm": 0.32498571276664734, "learning_rate": 9.463583244104274e-06, "loss": 0.4939, "step": 357 }, { "epoch": 0.7038007863695938, "grad_norm": 0.31145179271698, "learning_rate": 9.458408657585968e-06, "loss": 0.466, "step": 358 }, { "epoch": 0.7057667103538663, "grad_norm": 0.323177307844162, "learning_rate": 9.453210660900264e-06, "loss": 0.5062, "step": 359 }, { "epoch": 0.7077326343381389, "grad_norm": 0.2810510993003845, "learning_rate": 9.447989281340753e-06, "loss": 0.4782, "step": 360 }, { "epoch": 0.7096985583224116, "grad_norm": 0.36067265272140503, "learning_rate": 9.442744546323804e-06, "loss": 0.4846, "step": 361 }, { "epoch": 0.7116644823066841, "grad_norm": 0.2921036183834076, "learning_rate": 9.437476483388415e-06, "loss": 0.4735, "step": 362 }, { "epoch": 0.7136304062909568, "grad_norm": 0.30914342403411865, "learning_rate": 9.43218512019608e-06, "loss": 0.4662, "step": 363 }, { "epoch": 0.7155963302752294, "grad_norm": 0.311714768409729, "learning_rate": 9.426870484530638e-06, "loss": 0.4682, "step": 364 }, { "epoch": 0.7175622542595019, "grad_norm": 0.3075977861881256, "learning_rate": 9.421532604298118e-06, "loss": 0.4855, "step": 365 }, { "epoch": 0.7195281782437746, "grad_norm": 0.318114310503006, "learning_rate": 9.416171507526615e-06, "loss": 0.4973, "step": 366 }, { "epoch": 0.7214941022280472, "grad_norm": 0.3103352189064026, "learning_rate": 9.410787222366117e-06, "loss": 0.4784, "step": 367 }, { "epoch": 0.7234600262123198, "grad_norm": 0.31439492106437683, "learning_rate": 9.405379777088379e-06, "loss": 0.4677, "step": 368 }, { "epoch": 0.7254259501965924, "grad_norm": 0.28842946887016296, "learning_rate": 9.399949200086757e-06, "loss": 0.482, "step": 369 }, { "epoch": 0.727391874180865, "grad_norm": 0.3343795835971832, "learning_rate": 9.394495519876071e-06, "loss": 0.4704, "step": 370 }, { "epoch": 0.7293577981651376, "grad_norm": 0.3141765892505646, "learning_rate": 9.38901876509245e-06, "loss": 0.455, "step": 371 }, { "epoch": 0.7313237221494102, "grad_norm": 0.28839895129203796, "learning_rate": 9.383518964493183e-06, "loss": 0.484, "step": 372 }, { "epoch": 0.7332896461336829, "grad_norm": 0.3266107141971588, "learning_rate": 9.377996146956569e-06, "loss": 0.4964, "step": 373 }, { "epoch": 0.7352555701179554, "grad_norm": 0.3106229901313782, "learning_rate": 9.372450341481762e-06, "loss": 0.4823, "step": 374 }, { "epoch": 0.737221494102228, "grad_norm": 0.30188578367233276, "learning_rate": 9.36688157718862e-06, "loss": 0.4625, "step": 375 }, { "epoch": 0.7391874180865007, "grad_norm": 0.35195842385292053, "learning_rate": 9.361289883317558e-06, "loss": 0.4947, "step": 376 }, { "epoch": 0.7411533420707732, "grad_norm": 0.31608274579048157, "learning_rate": 9.355675289229384e-06, "loss": 0.4843, "step": 377 }, { "epoch": 0.7431192660550459, "grad_norm": 0.3384999930858612, "learning_rate": 9.350037824405151e-06, "loss": 0.4908, "step": 378 }, { "epoch": 0.7450851900393185, "grad_norm": 0.3566296696662903, "learning_rate": 9.344377518446006e-06, "loss": 0.4744, "step": 379 }, { "epoch": 0.747051114023591, "grad_norm": 0.34384074807167053, "learning_rate": 9.338694401073026e-06, "loss": 0.4744, "step": 380 }, { "epoch": 0.7490170380078637, "grad_norm": 0.3111206889152527, "learning_rate": 9.332988502127063e-06, "loss": 0.4749, "step": 381 }, { "epoch": 0.7509829619921363, "grad_norm": 0.29371377825737, "learning_rate": 9.327259851568596e-06, "loss": 0.4849, "step": 382 }, { "epoch": 0.752948885976409, "grad_norm": 0.2952675521373749, "learning_rate": 9.321508479477568e-06, "loss": 0.464, "step": 383 }, { "epoch": 0.7549148099606815, "grad_norm": 0.2904905676841736, "learning_rate": 9.315734416053223e-06, "loss": 0.47, "step": 384 }, { "epoch": 0.7568807339449541, "grad_norm": 0.30284953117370605, "learning_rate": 9.309937691613957e-06, "loss": 0.4658, "step": 385 }, { "epoch": 0.7588466579292268, "grad_norm": 0.28848186135292053, "learning_rate": 9.30411833659715e-06, "loss": 0.4643, "step": 386 }, { "epoch": 0.7608125819134993, "grad_norm": 0.32645782828330994, "learning_rate": 9.298276381559015e-06, "loss": 0.489, "step": 387 }, { "epoch": 0.762778505897772, "grad_norm": 0.3258381187915802, "learning_rate": 9.29241185717443e-06, "loss": 0.4827, "step": 388 }, { "epoch": 0.7647444298820446, "grad_norm": 0.41076749563217163, "learning_rate": 9.286524794236783e-06, "loss": 0.4868, "step": 389 }, { "epoch": 0.7667103538663171, "grad_norm": 0.299274742603302, "learning_rate": 9.280615223657801e-06, "loss": 0.48, "step": 390 }, { "epoch": 0.7686762778505898, "grad_norm": 0.4232471287250519, "learning_rate": 9.2746831764674e-06, "loss": 0.4899, "step": 391 }, { "epoch": 0.7706422018348624, "grad_norm": 0.3225405514240265, "learning_rate": 9.268728683813517e-06, "loss": 0.4911, "step": 392 }, { "epoch": 0.772608125819135, "grad_norm": 0.4578843116760254, "learning_rate": 9.262751776961936e-06, "loss": 0.5114, "step": 393 }, { "epoch": 0.7745740498034076, "grad_norm": 0.32411909103393555, "learning_rate": 9.256752487296142e-06, "loss": 0.4885, "step": 394 }, { "epoch": 0.7765399737876802, "grad_norm": 0.3600336015224457, "learning_rate": 9.250730846317145e-06, "loss": 0.4759, "step": 395 }, { "epoch": 0.7785058977719528, "grad_norm": 0.2981262505054474, "learning_rate": 9.24468688564332e-06, "loss": 0.4608, "step": 396 }, { "epoch": 0.7804718217562254, "grad_norm": 0.347411185503006, "learning_rate": 9.23862063701023e-06, "loss": 0.4833, "step": 397 }, { "epoch": 0.7824377457404981, "grad_norm": 0.34579214453697205, "learning_rate": 9.232532132270472e-06, "loss": 0.4734, "step": 398 }, { "epoch": 0.7844036697247706, "grad_norm": 0.3555125594139099, "learning_rate": 9.226421403393513e-06, "loss": 0.4809, "step": 399 }, { "epoch": 0.7863695937090432, "grad_norm": 0.3262729346752167, "learning_rate": 9.220288482465499e-06, "loss": 0.4746, "step": 400 }, { "epoch": 0.7883355176933159, "grad_norm": 0.39413920044898987, "learning_rate": 9.214133401689113e-06, "loss": 0.48, "step": 401 }, { "epoch": 0.7903014416775884, "grad_norm": 0.33590251207351685, "learning_rate": 9.207956193383392e-06, "loss": 0.4907, "step": 402 }, { "epoch": 0.7922673656618611, "grad_norm": 0.3144283592700958, "learning_rate": 9.201756889983557e-06, "loss": 0.4786, "step": 403 }, { "epoch": 0.7942332896461337, "grad_norm": 0.4004082977771759, "learning_rate": 9.19553552404085e-06, "loss": 0.4879, "step": 404 }, { "epoch": 0.7961992136304062, "grad_norm": 0.31236234307289124, "learning_rate": 9.189292128222355e-06, "loss": 0.4764, "step": 405 }, { "epoch": 0.7981651376146789, "grad_norm": 0.4149709939956665, "learning_rate": 9.183026735310834e-06, "loss": 0.4581, "step": 406 }, { "epoch": 0.8001310615989515, "grad_norm": 0.3310157358646393, "learning_rate": 9.17673937820455e-06, "loss": 0.4735, "step": 407 }, { "epoch": 0.8020969855832241, "grad_norm": 0.37418389320373535, "learning_rate": 9.170430089917089e-06, "loss": 0.4939, "step": 408 }, { "epoch": 0.8040629095674967, "grad_norm": 0.3040340840816498, "learning_rate": 9.164098903577203e-06, "loss": 0.4569, "step": 409 }, { "epoch": 0.8060288335517694, "grad_norm": 0.37164947390556335, "learning_rate": 9.157745852428624e-06, "loss": 0.4764, "step": 410 }, { "epoch": 0.8079947575360419, "grad_norm": 0.33743494749069214, "learning_rate": 9.151370969829883e-06, "loss": 0.4776, "step": 411 }, { "epoch": 0.8099606815203145, "grad_norm": 0.39068764448165894, "learning_rate": 9.144974289254154e-06, "loss": 0.4812, "step": 412 }, { "epoch": 0.8119266055045872, "grad_norm": 0.3487973213195801, "learning_rate": 9.138555844289061e-06, "loss": 0.4847, "step": 413 }, { "epoch": 0.8138925294888598, "grad_norm": 0.42854028940200806, "learning_rate": 9.132115668636512e-06, "loss": 0.4738, "step": 414 }, { "epoch": 0.8158584534731324, "grad_norm": 0.2957235276699066, "learning_rate": 9.125653796112514e-06, "loss": 0.4461, "step": 415 }, { "epoch": 0.817824377457405, "grad_norm": 0.44258782267570496, "learning_rate": 9.119170260647005e-06, "loss": 0.5026, "step": 416 }, { "epoch": 0.8197903014416776, "grad_norm": 0.334903359413147, "learning_rate": 9.112665096283668e-06, "loss": 0.4774, "step": 417 }, { "epoch": 0.8217562254259502, "grad_norm": 0.3543790876865387, "learning_rate": 9.106138337179754e-06, "loss": 0.4854, "step": 418 }, { "epoch": 0.8237221494102228, "grad_norm": 0.3583733141422272, "learning_rate": 9.099590017605903e-06, "loss": 0.48, "step": 419 }, { "epoch": 0.8256880733944955, "grad_norm": 0.3436172902584076, "learning_rate": 9.093020171945966e-06, "loss": 0.45, "step": 420 }, { "epoch": 0.827653997378768, "grad_norm": 0.3209732472896576, "learning_rate": 9.086428834696824e-06, "loss": 0.4821, "step": 421 }, { "epoch": 0.8296199213630406, "grad_norm": 0.37522175908088684, "learning_rate": 9.079816040468201e-06, "loss": 0.4611, "step": 422 }, { "epoch": 0.8315858453473133, "grad_norm": 0.30042916536331177, "learning_rate": 9.073181823982495e-06, "loss": 0.5015, "step": 423 }, { "epoch": 0.8335517693315858, "grad_norm": 0.3324982523918152, "learning_rate": 9.066526220074582e-06, "loss": 0.4796, "step": 424 }, { "epoch": 0.8355176933158585, "grad_norm": 0.352521151304245, "learning_rate": 9.059849263691638e-06, "loss": 0.4543, "step": 425 }, { "epoch": 0.8374836173001311, "grad_norm": 0.2786426842212677, "learning_rate": 9.05315098989296e-06, "loss": 0.4571, "step": 426 }, { "epoch": 0.8394495412844036, "grad_norm": 0.3154030740261078, "learning_rate": 9.046431433849778e-06, "loss": 0.478, "step": 427 }, { "epoch": 0.8414154652686763, "grad_norm": 0.29987379908561707, "learning_rate": 9.039690630845067e-06, "loss": 0.4688, "step": 428 }, { "epoch": 0.8433813892529489, "grad_norm": 0.3286818861961365, "learning_rate": 9.032928616273369e-06, "loss": 0.479, "step": 429 }, { "epoch": 0.8453473132372215, "grad_norm": 0.3046414852142334, "learning_rate": 9.026145425640601e-06, "loss": 0.4758, "step": 430 }, { "epoch": 0.8473132372214941, "grad_norm": 0.32858696579933167, "learning_rate": 9.019341094563875e-06, "loss": 0.4703, "step": 431 }, { "epoch": 0.8492791612057667, "grad_norm": 0.29131099581718445, "learning_rate": 9.012515658771301e-06, "loss": 0.4745, "step": 432 }, { "epoch": 0.8512450851900393, "grad_norm": 0.3147415220737457, "learning_rate": 9.005669154101811e-06, "loss": 0.4996, "step": 433 }, { "epoch": 0.8532110091743119, "grad_norm": 0.32791829109191895, "learning_rate": 8.998801616504962e-06, "loss": 0.4642, "step": 434 }, { "epoch": 0.8551769331585846, "grad_norm": 0.3501948416233063, "learning_rate": 8.991913082040752e-06, "loss": 0.4884, "step": 435 }, { "epoch": 0.8571428571428571, "grad_norm": 0.3144097328186035, "learning_rate": 8.985003586879428e-06, "loss": 0.4627, "step": 436 }, { "epoch": 0.8591087811271297, "grad_norm": 0.32123497128486633, "learning_rate": 8.978073167301298e-06, "loss": 0.4683, "step": 437 }, { "epoch": 0.8610747051114024, "grad_norm": 0.3028917610645294, "learning_rate": 8.971121859696539e-06, "loss": 0.4688, "step": 438 }, { "epoch": 0.8630406290956749, "grad_norm": 0.29881271719932556, "learning_rate": 8.964149700565006e-06, "loss": 0.466, "step": 439 }, { "epoch": 0.8650065530799476, "grad_norm": 0.33461838960647583, "learning_rate": 8.957156726516047e-06, "loss": 0.4834, "step": 440 }, { "epoch": 0.8669724770642202, "grad_norm": 0.29429683089256287, "learning_rate": 8.950142974268295e-06, "loss": 0.4531, "step": 441 }, { "epoch": 0.8689384010484927, "grad_norm": 0.3107634484767914, "learning_rate": 8.943108480649492e-06, "loss": 0.4488, "step": 442 }, { "epoch": 0.8709043250327654, "grad_norm": 0.3410981595516205, "learning_rate": 8.936053282596284e-06, "loss": 0.4803, "step": 443 }, { "epoch": 0.872870249017038, "grad_norm": 0.2996877133846283, "learning_rate": 8.928977417154037e-06, "loss": 0.469, "step": 444 }, { "epoch": 0.8748361730013107, "grad_norm": 0.3480963408946991, "learning_rate": 8.921880921476634e-06, "loss": 0.4671, "step": 445 }, { "epoch": 0.8768020969855832, "grad_norm": 0.2955884635448456, "learning_rate": 8.914763832826282e-06, "loss": 0.4607, "step": 446 }, { "epoch": 0.8787680209698558, "grad_norm": 0.34652408957481384, "learning_rate": 8.907626188573319e-06, "loss": 0.4946, "step": 447 }, { "epoch": 0.8807339449541285, "grad_norm": 0.3643608093261719, "learning_rate": 8.900468026196017e-06, "loss": 0.4721, "step": 448 }, { "epoch": 0.882699868938401, "grad_norm": 0.26901280879974365, "learning_rate": 8.893289383280379e-06, "loss": 0.4799, "step": 449 }, { "epoch": 0.8846657929226737, "grad_norm": 0.36749908328056335, "learning_rate": 8.886090297519956e-06, "loss": 0.4523, "step": 450 }, { "epoch": 0.8866317169069463, "grad_norm": 0.3095279932022095, "learning_rate": 8.878870806715631e-06, "loss": 0.4628, "step": 451 }, { "epoch": 0.8885976408912188, "grad_norm": 0.3318431079387665, "learning_rate": 8.871630948775438e-06, "loss": 0.4787, "step": 452 }, { "epoch": 0.8905635648754915, "grad_norm": 0.3512536585330963, "learning_rate": 8.864370761714348e-06, "loss": 0.4677, "step": 453 }, { "epoch": 0.8925294888597641, "grad_norm": 0.2928638756275177, "learning_rate": 8.857090283654078e-06, "loss": 0.4718, "step": 454 }, { "epoch": 0.8944954128440367, "grad_norm": 0.2978120744228363, "learning_rate": 8.84978955282289e-06, "loss": 0.4771, "step": 455 }, { "epoch": 0.8964613368283093, "grad_norm": 0.2687295079231262, "learning_rate": 8.842468607555389e-06, "loss": 0.4753, "step": 456 }, { "epoch": 0.898427260812582, "grad_norm": 0.2807966470718384, "learning_rate": 8.835127486292318e-06, "loss": 0.456, "step": 457 }, { "epoch": 0.9003931847968545, "grad_norm": 0.25572317838668823, "learning_rate": 8.827766227580366e-06, "loss": 0.4701, "step": 458 }, { "epoch": 0.9023591087811271, "grad_norm": 0.30143314599990845, "learning_rate": 8.820384870071951e-06, "loss": 0.4743, "step": 459 }, { "epoch": 0.9043250327653998, "grad_norm": 0.2788403034210205, "learning_rate": 8.812983452525038e-06, "loss": 0.4877, "step": 460 }, { "epoch": 0.9062909567496723, "grad_norm": 0.3468334674835205, "learning_rate": 8.805562013802914e-06, "loss": 0.4678, "step": 461 }, { "epoch": 0.908256880733945, "grad_norm": 0.2584637403488159, "learning_rate": 8.79812059287399e-06, "loss": 0.4737, "step": 462 }, { "epoch": 0.9102228047182176, "grad_norm": 0.39624977111816406, "learning_rate": 8.790659228811609e-06, "loss": 0.4647, "step": 463 }, { "epoch": 0.9121887287024901, "grad_norm": 0.3191069960594177, "learning_rate": 8.783177960793825e-06, "loss": 0.4876, "step": 464 }, { "epoch": 0.9141546526867628, "grad_norm": 0.3358793258666992, "learning_rate": 8.775676828103205e-06, "loss": 0.4643, "step": 465 }, { "epoch": 0.9161205766710354, "grad_norm": 0.3253157436847687, "learning_rate": 8.768155870126622e-06, "loss": 0.4637, "step": 466 }, { "epoch": 0.918086500655308, "grad_norm": 0.33859390020370483, "learning_rate": 8.760615126355045e-06, "loss": 0.482, "step": 467 }, { "epoch": 0.9200524246395806, "grad_norm": 0.37952473759651184, "learning_rate": 8.753054636383336e-06, "loss": 0.4976, "step": 468 }, { "epoch": 0.9220183486238532, "grad_norm": 0.3037833273410797, "learning_rate": 8.745474439910043e-06, "loss": 0.4541, "step": 469 }, { "epoch": 0.9239842726081258, "grad_norm": 0.32147467136383057, "learning_rate": 8.73787457673718e-06, "loss": 0.4663, "step": 470 }, { "epoch": 0.9259501965923984, "grad_norm": 0.30466553568840027, "learning_rate": 8.730255086770037e-06, "loss": 0.4746, "step": 471 }, { "epoch": 0.927916120576671, "grad_norm": 0.2942729592323303, "learning_rate": 8.722616010016953e-06, "loss": 0.4775, "step": 472 }, { "epoch": 0.9298820445609436, "grad_norm": 0.30535486340522766, "learning_rate": 8.714957386589115e-06, "loss": 0.4621, "step": 473 }, { "epoch": 0.9318479685452162, "grad_norm": 0.30644193291664124, "learning_rate": 8.707279256700348e-06, "loss": 0.4674, "step": 474 }, { "epoch": 0.9338138925294889, "grad_norm": 0.314421147108078, "learning_rate": 8.699581660666897e-06, "loss": 0.4773, "step": 475 }, { "epoch": 0.9357798165137615, "grad_norm": 0.31318455934524536, "learning_rate": 8.691864638907224e-06, "loss": 0.4622, "step": 476 }, { "epoch": 0.937745740498034, "grad_norm": 0.3173219561576843, "learning_rate": 8.684128231941789e-06, "loss": 0.4775, "step": 477 }, { "epoch": 0.9397116644823067, "grad_norm": 0.31080108880996704, "learning_rate": 8.67637248039284e-06, "loss": 0.4701, "step": 478 }, { "epoch": 0.9416775884665793, "grad_norm": 0.2851126790046692, "learning_rate": 8.668597424984196e-06, "loss": 0.4605, "step": 479 }, { "epoch": 0.9436435124508519, "grad_norm": 0.2801235318183899, "learning_rate": 8.660803106541044e-06, "loss": 0.456, "step": 480 }, { "epoch": 0.9456094364351245, "grad_norm": 0.31265968084335327, "learning_rate": 8.65298956598971e-06, "loss": 0.4688, "step": 481 }, { "epoch": 0.9475753604193972, "grad_norm": 0.29262563586235046, "learning_rate": 8.64515684435746e-06, "loss": 0.4576, "step": 482 }, { "epoch": 0.9495412844036697, "grad_norm": 0.3408479690551758, "learning_rate": 8.637304982772263e-06, "loss": 0.4826, "step": 483 }, { "epoch": 0.9515072083879423, "grad_norm": 0.3060987889766693, "learning_rate": 8.629434022462598e-06, "loss": 0.4606, "step": 484 }, { "epoch": 0.953473132372215, "grad_norm": 0.29849743843078613, "learning_rate": 8.621544004757226e-06, "loss": 0.476, "step": 485 }, { "epoch": 0.9554390563564875, "grad_norm": 0.31482797861099243, "learning_rate": 8.613634971084967e-06, "loss": 0.4533, "step": 486 }, { "epoch": 0.9574049803407602, "grad_norm": 0.32803142070770264, "learning_rate": 8.605706962974503e-06, "loss": 0.4972, "step": 487 }, { "epoch": 0.9593709043250328, "grad_norm": 0.30527326464653015, "learning_rate": 8.597760022054134e-06, "loss": 0.4785, "step": 488 }, { "epoch": 0.9613368283093053, "grad_norm": 0.2986467182636261, "learning_rate": 8.589794190051582e-06, "loss": 0.4817, "step": 489 }, { "epoch": 0.963302752293578, "grad_norm": 0.2935220003128052, "learning_rate": 8.581809508793753e-06, "loss": 0.4977, "step": 490 }, { "epoch": 0.9652686762778506, "grad_norm": 0.3608831465244293, "learning_rate": 8.573806020206534e-06, "loss": 0.4872, "step": 491 }, { "epoch": 0.9672346002621232, "grad_norm": 0.2806779146194458, "learning_rate": 8.56578376631456e-06, "loss": 0.4599, "step": 492 }, { "epoch": 0.9692005242463958, "grad_norm": 0.3233669400215149, "learning_rate": 8.557742789241003e-06, "loss": 0.4578, "step": 493 }, { "epoch": 0.9711664482306684, "grad_norm": 0.29287710785865784, "learning_rate": 8.549683131207347e-06, "loss": 0.4728, "step": 494 }, { "epoch": 0.973132372214941, "grad_norm": 0.31075453758239746, "learning_rate": 8.541604834533159e-06, "loss": 0.4808, "step": 495 }, { "epoch": 0.9750982961992136, "grad_norm": 0.314624547958374, "learning_rate": 8.533507941635882e-06, "loss": 0.4746, "step": 496 }, { "epoch": 0.9770642201834863, "grad_norm": 0.2979452311992645, "learning_rate": 8.525392495030596e-06, "loss": 0.4732, "step": 497 }, { "epoch": 0.9790301441677588, "grad_norm": 0.2798508405685425, "learning_rate": 8.51725853732981e-06, "loss": 0.4902, "step": 498 }, { "epoch": 0.9809960681520314, "grad_norm": 0.32786044478416443, "learning_rate": 8.509106111243223e-06, "loss": 0.4829, "step": 499 }, { "epoch": 0.9829619921363041, "grad_norm": 0.32859888672828674, "learning_rate": 8.500935259577517e-06, "loss": 0.4585, "step": 500 }, { "epoch": 0.9849279161205766, "grad_norm": 0.2928418815135956, "learning_rate": 8.492746025236113e-06, "loss": 0.4396, "step": 501 }, { "epoch": 0.9868938401048493, "grad_norm": 0.32088446617126465, "learning_rate": 8.484538451218966e-06, "loss": 0.4814, "step": 502 }, { "epoch": 0.9888597640891219, "grad_norm": 0.33992063999176025, "learning_rate": 8.476312580622318e-06, "loss": 0.4865, "step": 503 }, { "epoch": 0.9908256880733946, "grad_norm": 0.39013153314590454, "learning_rate": 8.468068456638491e-06, "loss": 0.4599, "step": 504 }, { "epoch": 0.9927916120576671, "grad_norm": 0.33563679456710815, "learning_rate": 8.459806122555648e-06, "loss": 0.4696, "step": 505 }, { "epoch": 0.9947575360419397, "grad_norm": 0.4276735484600067, "learning_rate": 8.451525621757571e-06, "loss": 0.4824, "step": 506 }, { "epoch": 0.9967234600262124, "grad_norm": 0.2916342616081238, "learning_rate": 8.443226997723426e-06, "loss": 0.463, "step": 507 }, { "epoch": 0.9986893840104849, "grad_norm": 0.4475458860397339, "learning_rate": 8.434910294027547e-06, "loss": 0.4787, "step": 508 }, { "epoch": 1.0006553079947575, "grad_norm": 0.42703890800476074, "learning_rate": 8.4265755543392e-06, "loss": 0.6154, "step": 509 }, { "epoch": 1.0026212319790302, "grad_norm": 0.41458940505981445, "learning_rate": 8.418222822422348e-06, "loss": 0.4612, "step": 510 }, { "epoch": 1.0045871559633028, "grad_norm": 0.34233203530311584, "learning_rate": 8.409852142135438e-06, "loss": 0.4433, "step": 511 }, { "epoch": 1.0065530799475753, "grad_norm": 0.32114264369010925, "learning_rate": 8.40146355743115e-06, "loss": 0.4444, "step": 512 }, { "epoch": 1.008519003931848, "grad_norm": 0.30860528349876404, "learning_rate": 8.393057112356181e-06, "loss": 0.435, "step": 513 }, { "epoch": 1.0104849279161205, "grad_norm": 0.31313031911849976, "learning_rate": 8.38463285105101e-06, "loss": 0.4331, "step": 514 }, { "epoch": 1.0124508519003932, "grad_norm": 0.32860052585601807, "learning_rate": 8.376190817749658e-06, "loss": 0.4323, "step": 515 }, { "epoch": 1.0144167758846658, "grad_norm": 0.3043825924396515, "learning_rate": 8.367731056779476e-06, "loss": 0.4568, "step": 516 }, { "epoch": 1.0163826998689385, "grad_norm": 0.295673131942749, "learning_rate": 8.35925361256089e-06, "loss": 0.4379, "step": 517 }, { "epoch": 1.018348623853211, "grad_norm": 0.314635306596756, "learning_rate": 8.350758529607174e-06, "loss": 0.4407, "step": 518 }, { "epoch": 1.0203145478374835, "grad_norm": 0.3012637794017792, "learning_rate": 8.342245852524229e-06, "loss": 0.4449, "step": 519 }, { "epoch": 1.0222804718217562, "grad_norm": 0.3112142086029053, "learning_rate": 8.333715626010334e-06, "loss": 0.4563, "step": 520 }, { "epoch": 1.0242463958060288, "grad_norm": 0.33864012360572815, "learning_rate": 8.325167894855917e-06, "loss": 0.4624, "step": 521 }, { "epoch": 1.0262123197903015, "grad_norm": 0.31816527247428894, "learning_rate": 8.316602703943315e-06, "loss": 0.4403, "step": 522 }, { "epoch": 1.0281782437745741, "grad_norm": 0.32534271478652954, "learning_rate": 8.308020098246552e-06, "loss": 0.4631, "step": 523 }, { "epoch": 1.0301441677588468, "grad_norm": 0.3206179141998291, "learning_rate": 8.299420122831084e-06, "loss": 0.4498, "step": 524 }, { "epoch": 1.0321100917431192, "grad_norm": 0.33189356327056885, "learning_rate": 8.290802822853576e-06, "loss": 0.4574, "step": 525 }, { "epoch": 1.0340760157273918, "grad_norm": 0.3216302990913391, "learning_rate": 8.28216824356166e-06, "loss": 0.4702, "step": 526 }, { "epoch": 1.0360419397116645, "grad_norm": 0.2860296666622162, "learning_rate": 8.2735164302937e-06, "loss": 0.4557, "step": 527 }, { "epoch": 1.0380078636959371, "grad_norm": 0.46882984042167664, "learning_rate": 8.26484742847855e-06, "loss": 0.4488, "step": 528 }, { "epoch": 1.0399737876802098, "grad_norm": 0.2686419188976288, "learning_rate": 8.256161283635315e-06, "loss": 0.4468, "step": 529 }, { "epoch": 1.0419397116644824, "grad_norm": 0.39462658762931824, "learning_rate": 8.24745804137312e-06, "loss": 0.4587, "step": 530 }, { "epoch": 1.0439056356487548, "grad_norm": 0.3102317750453949, "learning_rate": 8.238737747390859e-06, "loss": 0.4521, "step": 531 }, { "epoch": 1.0458715596330275, "grad_norm": 0.42338991165161133, "learning_rate": 8.230000447476968e-06, "loss": 0.4457, "step": 532 }, { "epoch": 1.0478374836173001, "grad_norm": 0.2850761413574219, "learning_rate": 8.22124618750917e-06, "loss": 0.4742, "step": 533 }, { "epoch": 1.0498034076015728, "grad_norm": 0.397907555103302, "learning_rate": 8.212475013454249e-06, "loss": 0.459, "step": 534 }, { "epoch": 1.0517693315858454, "grad_norm": 0.38815879821777344, "learning_rate": 8.203686971367795e-06, "loss": 0.4502, "step": 535 }, { "epoch": 1.053735255570118, "grad_norm": 0.33456432819366455, "learning_rate": 8.19488210739397e-06, "loss": 0.4383, "step": 536 }, { "epoch": 1.0557011795543905, "grad_norm": 0.2970755696296692, "learning_rate": 8.186060467765268e-06, "loss": 0.459, "step": 537 }, { "epoch": 1.0576671035386631, "grad_norm": 0.34444984793663025, "learning_rate": 8.177222098802264e-06, "loss": 0.4477, "step": 538 }, { "epoch": 1.0596330275229358, "grad_norm": 0.30864620208740234, "learning_rate": 8.16836704691338e-06, "loss": 0.4542, "step": 539 }, { "epoch": 1.0615989515072084, "grad_norm": 0.3233746588230133, "learning_rate": 8.159495358594627e-06, "loss": 0.4541, "step": 540 }, { "epoch": 1.063564875491481, "grad_norm": 0.35800066590309143, "learning_rate": 8.150607080429385e-06, "loss": 0.4482, "step": 541 }, { "epoch": 1.0655307994757537, "grad_norm": 0.33229756355285645, "learning_rate": 8.14170225908813e-06, "loss": 0.4291, "step": 542 }, { "epoch": 1.067496723460026, "grad_norm": 0.3414958417415619, "learning_rate": 8.13278094132821e-06, "loss": 0.4362, "step": 543 }, { "epoch": 1.0694626474442988, "grad_norm": 0.3265877962112427, "learning_rate": 8.123843173993592e-06, "loss": 0.4422, "step": 544 }, { "epoch": 1.0714285714285714, "grad_norm": 0.3254944980144501, "learning_rate": 8.114889004014612e-06, "loss": 0.4601, "step": 545 }, { "epoch": 1.073394495412844, "grad_norm": 0.3091132640838623, "learning_rate": 8.10591847840774e-06, "loss": 0.446, "step": 546 }, { "epoch": 1.0753604193971167, "grad_norm": 0.27796778082847595, "learning_rate": 8.096931644275318e-06, "loss": 0.435, "step": 547 }, { "epoch": 1.0773263433813893, "grad_norm": 0.3258281648159027, "learning_rate": 8.087928548805326e-06, "loss": 0.4569, "step": 548 }, { "epoch": 1.0792922673656618, "grad_norm": 0.29360657930374146, "learning_rate": 8.078909239271127e-06, "loss": 0.4372, "step": 549 }, { "epoch": 1.0812581913499344, "grad_norm": 0.34414565563201904, "learning_rate": 8.069873763031221e-06, "loss": 0.438, "step": 550 }, { "epoch": 1.083224115334207, "grad_norm": 0.2912955582141876, "learning_rate": 8.060822167528996e-06, "loss": 0.4519, "step": 551 }, { "epoch": 1.0851900393184797, "grad_norm": 0.27693864703178406, "learning_rate": 8.051754500292479e-06, "loss": 0.4423, "step": 552 }, { "epoch": 1.0871559633027523, "grad_norm": 0.29716360569000244, "learning_rate": 8.042670808934086e-06, "loss": 0.4736, "step": 553 }, { "epoch": 1.089121887287025, "grad_norm": 0.25404056906700134, "learning_rate": 8.033571141150371e-06, "loss": 0.4185, "step": 554 }, { "epoch": 1.0910878112712976, "grad_norm": 0.28415998816490173, "learning_rate": 8.024455544721778e-06, "loss": 0.459, "step": 555 }, { "epoch": 1.09305373525557, "grad_norm": 0.25493794679641724, "learning_rate": 8.015324067512393e-06, "loss": 0.4409, "step": 556 }, { "epoch": 1.0950196592398427, "grad_norm": 0.3459571599960327, "learning_rate": 8.006176757469681e-06, "loss": 0.4351, "step": 557 }, { "epoch": 1.0969855832241153, "grad_norm": 0.2602083683013916, "learning_rate": 7.997013662624246e-06, "loss": 0.4313, "step": 558 }, { "epoch": 1.098951507208388, "grad_norm": 0.3519684374332428, "learning_rate": 7.987834831089576e-06, "loss": 0.4401, "step": 559 }, { "epoch": 1.1009174311926606, "grad_norm": 0.2838681936264038, "learning_rate": 7.978640311061787e-06, "loss": 0.4671, "step": 560 }, { "epoch": 1.1028833551769333, "grad_norm": 0.31308382749557495, "learning_rate": 7.969430150819372e-06, "loss": 0.4381, "step": 561 }, { "epoch": 1.1048492791612057, "grad_norm": 0.2971574664115906, "learning_rate": 7.960204398722947e-06, "loss": 0.4509, "step": 562 }, { "epoch": 1.1068152031454783, "grad_norm": 0.2653731107711792, "learning_rate": 7.950963103215e-06, "loss": 0.4312, "step": 563 }, { "epoch": 1.108781127129751, "grad_norm": 0.2692069113254547, "learning_rate": 7.941706312819632e-06, "loss": 0.4427, "step": 564 }, { "epoch": 1.1107470511140236, "grad_norm": 0.29672127962112427, "learning_rate": 7.932434076142304e-06, "loss": 0.4527, "step": 565 }, { "epoch": 1.1127129750982963, "grad_norm": 0.2934640944004059, "learning_rate": 7.923146441869585e-06, "loss": 0.4736, "step": 566 }, { "epoch": 1.114678899082569, "grad_norm": 0.2855452597141266, "learning_rate": 7.913843458768892e-06, "loss": 0.4476, "step": 567 }, { "epoch": 1.1166448230668413, "grad_norm": 0.2716505825519562, "learning_rate": 7.904525175688234e-06, "loss": 0.4414, "step": 568 }, { "epoch": 1.118610747051114, "grad_norm": 0.2886597812175751, "learning_rate": 7.895191641555957e-06, "loss": 0.4302, "step": 569 }, { "epoch": 1.1205766710353866, "grad_norm": 0.31945592164993286, "learning_rate": 7.88584290538049e-06, "loss": 0.4672, "step": 570 }, { "epoch": 1.1225425950196593, "grad_norm": 0.2809208929538727, "learning_rate": 7.876479016250082e-06, "loss": 0.4548, "step": 571 }, { "epoch": 1.124508519003932, "grad_norm": 0.3163772523403168, "learning_rate": 7.86710002333255e-06, "loss": 0.4428, "step": 572 }, { "epoch": 1.1264744429882045, "grad_norm": 0.3288361728191376, "learning_rate": 7.857705975875015e-06, "loss": 0.465, "step": 573 }, { "epoch": 1.1284403669724772, "grad_norm": 0.28767189383506775, "learning_rate": 7.848296923203645e-06, "loss": 0.441, "step": 574 }, { "epoch": 1.1304062909567496, "grad_norm": 0.34927651286125183, "learning_rate": 7.838872914723403e-06, "loss": 0.4482, "step": 575 }, { "epoch": 1.1323722149410222, "grad_norm": 0.2668762505054474, "learning_rate": 7.829433999917773e-06, "loss": 0.4424, "step": 576 }, { "epoch": 1.134338138925295, "grad_norm": 0.3305877447128296, "learning_rate": 7.819980228348519e-06, "loss": 0.4607, "step": 577 }, { "epoch": 1.1363040629095675, "grad_norm": 0.2858606278896332, "learning_rate": 7.810511649655406e-06, "loss": 0.4625, "step": 578 }, { "epoch": 1.1382699868938402, "grad_norm": 0.2797732353210449, "learning_rate": 7.801028313555954e-06, "loss": 0.4558, "step": 579 }, { "epoch": 1.1402359108781126, "grad_norm": 0.33720502257347107, "learning_rate": 7.79153026984517e-06, "loss": 0.4454, "step": 580 }, { "epoch": 1.1422018348623852, "grad_norm": 0.2824835777282715, "learning_rate": 7.782017568395284e-06, "loss": 0.4509, "step": 581 }, { "epoch": 1.144167758846658, "grad_norm": 0.2600245475769043, "learning_rate": 7.772490259155493e-06, "loss": 0.4316, "step": 582 }, { "epoch": 1.1461336828309305, "grad_norm": 0.3566450774669647, "learning_rate": 7.762948392151699e-06, "loss": 0.4615, "step": 583 }, { "epoch": 1.1480996068152032, "grad_norm": 0.2671070992946625, "learning_rate": 7.75339201748624e-06, "loss": 0.445, "step": 584 }, { "epoch": 1.1500655307994758, "grad_norm": 0.29686596989631653, "learning_rate": 7.743821185337634e-06, "loss": 0.4493, "step": 585 }, { "epoch": 1.1520314547837485, "grad_norm": 0.2697345018386841, "learning_rate": 7.734235945960307e-06, "loss": 0.4495, "step": 586 }, { "epoch": 1.1539973787680209, "grad_norm": 0.3056127727031708, "learning_rate": 7.724636349684341e-06, "loss": 0.4455, "step": 587 }, { "epoch": 1.1559633027522935, "grad_norm": 0.3014778792858124, "learning_rate": 7.715022446915195e-06, "loss": 0.4435, "step": 588 }, { "epoch": 1.1579292267365662, "grad_norm": 0.2972736060619354, "learning_rate": 7.705394288133459e-06, "loss": 0.4624, "step": 589 }, { "epoch": 1.1598951507208388, "grad_norm": 0.326951265335083, "learning_rate": 7.695751923894568e-06, "loss": 0.4377, "step": 590 }, { "epoch": 1.1618610747051115, "grad_norm": 0.3335776925086975, "learning_rate": 7.686095404828552e-06, "loss": 0.4514, "step": 591 }, { "epoch": 1.163826998689384, "grad_norm": 0.3000209331512451, "learning_rate": 7.676424781639768e-06, "loss": 0.4668, "step": 592 }, { "epoch": 1.1657929226736565, "grad_norm": 0.3267945945262909, "learning_rate": 7.666740105106623e-06, "loss": 0.4515, "step": 593 }, { "epoch": 1.1677588466579292, "grad_norm": 0.2960260212421417, "learning_rate": 7.65704142608132e-06, "loss": 0.4344, "step": 594 }, { "epoch": 1.1697247706422018, "grad_norm": 0.27537232637405396, "learning_rate": 7.647328795489585e-06, "loss": 0.4644, "step": 595 }, { "epoch": 1.1716906946264745, "grad_norm": 0.3565449118614197, "learning_rate": 7.637602264330406e-06, "loss": 0.4714, "step": 596 }, { "epoch": 1.173656618610747, "grad_norm": 0.2900427281856537, "learning_rate": 7.627861883675748e-06, "loss": 0.4498, "step": 597 }, { "epoch": 1.1756225425950197, "grad_norm": 0.31862181425094604, "learning_rate": 7.618107704670308e-06, "loss": 0.4442, "step": 598 }, { "epoch": 1.1775884665792922, "grad_norm": 0.28148674964904785, "learning_rate": 7.60833977853123e-06, "loss": 0.441, "step": 599 }, { "epoch": 1.1795543905635648, "grad_norm": 0.35680100321769714, "learning_rate": 7.598558156547842e-06, "loss": 0.4632, "step": 600 }, { "epoch": 1.1815203145478375, "grad_norm": 0.2732924222946167, "learning_rate": 7.588762890081387e-06, "loss": 0.4565, "step": 601 }, { "epoch": 1.18348623853211, "grad_norm": 0.3304100036621094, "learning_rate": 7.578954030564754e-06, "loss": 0.4511, "step": 602 }, { "epoch": 1.1854521625163827, "grad_norm": 0.2812047004699707, "learning_rate": 7.569131629502201e-06, "loss": 0.4534, "step": 603 }, { "epoch": 1.1874180865006554, "grad_norm": 0.2930050194263458, "learning_rate": 7.559295738469094e-06, "loss": 0.4394, "step": 604 }, { "epoch": 1.189384010484928, "grad_norm": 0.26927125453948975, "learning_rate": 7.549446409111629e-06, "loss": 0.4413, "step": 605 }, { "epoch": 1.1913499344692005, "grad_norm": 0.2692791521549225, "learning_rate": 7.53958369314657e-06, "loss": 0.4618, "step": 606 }, { "epoch": 1.193315858453473, "grad_norm": 0.26487746834754944, "learning_rate": 7.529707642360963e-06, "loss": 0.462, "step": 607 }, { "epoch": 1.1952817824377457, "grad_norm": 0.2600611448287964, "learning_rate": 7.519818308611877e-06, "loss": 0.4454, "step": 608 }, { "epoch": 1.1972477064220184, "grad_norm": 0.3271210789680481, "learning_rate": 7.509915743826128e-06, "loss": 0.4537, "step": 609 }, { "epoch": 1.199213630406291, "grad_norm": 0.27632102370262146, "learning_rate": 7.500000000000001e-06, "loss": 0.4431, "step": 610 }, { "epoch": 1.2011795543905635, "grad_norm": 0.2809217870235443, "learning_rate": 7.490071129198986e-06, "loss": 0.4661, "step": 611 }, { "epoch": 1.203145478374836, "grad_norm": 0.28529879450798035, "learning_rate": 7.480129183557499e-06, "loss": 0.4543, "step": 612 }, { "epoch": 1.2051114023591087, "grad_norm": 0.2784067690372467, "learning_rate": 7.470174215278605e-06, "loss": 0.4557, "step": 613 }, { "epoch": 1.2070773263433814, "grad_norm": 0.29982420802116394, "learning_rate": 7.460206276633754e-06, "loss": 0.4604, "step": 614 }, { "epoch": 1.209043250327654, "grad_norm": 0.30934128165245056, "learning_rate": 7.450225419962498e-06, "loss": 0.4414, "step": 615 }, { "epoch": 1.2110091743119267, "grad_norm": 0.31483328342437744, "learning_rate": 7.440231697672218e-06, "loss": 0.4498, "step": 616 }, { "epoch": 1.2129750982961993, "grad_norm": 0.33800315856933594, "learning_rate": 7.430225162237852e-06, "loss": 0.4544, "step": 617 }, { "epoch": 1.2149410222804717, "grad_norm": 0.2889743745326996, "learning_rate": 7.4202058662016155e-06, "loss": 0.4515, "step": 618 }, { "epoch": 1.2169069462647444, "grad_norm": 0.300650954246521, "learning_rate": 7.4101738621727245e-06, "loss": 0.456, "step": 619 }, { "epoch": 1.218872870249017, "grad_norm": 0.2623285949230194, "learning_rate": 7.400129202827129e-06, "loss": 0.4351, "step": 620 }, { "epoch": 1.2208387942332897, "grad_norm": 0.29180240631103516, "learning_rate": 7.390071940907222e-06, "loss": 0.4321, "step": 621 }, { "epoch": 1.2228047182175623, "grad_norm": 0.2687191367149353, "learning_rate": 7.380002129221576e-06, "loss": 0.4416, "step": 622 }, { "epoch": 1.224770642201835, "grad_norm": 0.31968826055526733, "learning_rate": 7.369919820644656e-06, "loss": 0.4496, "step": 623 }, { "epoch": 1.2267365661861074, "grad_norm": 0.2760871946811676, "learning_rate": 7.3598250681165485e-06, "loss": 0.4561, "step": 624 }, { "epoch": 1.22870249017038, "grad_norm": 0.3169286549091339, "learning_rate": 7.3497179246426764e-06, "loss": 0.449, "step": 625 }, { "epoch": 1.2306684141546527, "grad_norm": 0.2949203848838806, "learning_rate": 7.339598443293526e-06, "loss": 0.4466, "step": 626 }, { "epoch": 1.2326343381389253, "grad_norm": 0.36728623509407043, "learning_rate": 7.329466677204371e-06, "loss": 0.4604, "step": 627 }, { "epoch": 1.234600262123198, "grad_norm": 0.2636052668094635, "learning_rate": 7.319322679574985e-06, "loss": 0.4538, "step": 628 }, { "epoch": 1.2365661861074706, "grad_norm": 0.3246074616909027, "learning_rate": 7.3091665036693716e-06, "loss": 0.4745, "step": 629 }, { "epoch": 1.238532110091743, "grad_norm": 0.29816025495529175, "learning_rate": 7.298998202815474e-06, "loss": 0.4579, "step": 630 }, { "epoch": 1.2404980340760157, "grad_norm": 0.30991989374160767, "learning_rate": 7.288817830404906e-06, "loss": 0.4436, "step": 631 }, { "epoch": 1.2424639580602883, "grad_norm": 0.31033027172088623, "learning_rate": 7.278625439892665e-06, "loss": 0.4639, "step": 632 }, { "epoch": 1.244429882044561, "grad_norm": 0.28393280506134033, "learning_rate": 7.268421084796852e-06, "loss": 0.449, "step": 633 }, { "epoch": 1.2463958060288336, "grad_norm": 0.305463045835495, "learning_rate": 7.258204818698394e-06, "loss": 0.4489, "step": 634 }, { "epoch": 1.2483617300131062, "grad_norm": 0.29833632707595825, "learning_rate": 7.247976695240754e-06, "loss": 0.4381, "step": 635 }, { "epoch": 1.2503276539973789, "grad_norm": 0.2623004913330078, "learning_rate": 7.237736768129663e-06, "loss": 0.4384, "step": 636 }, { "epoch": 1.2522935779816513, "grad_norm": 0.3139297664165497, "learning_rate": 7.2274850911328275e-06, "loss": 0.451, "step": 637 }, { "epoch": 1.254259501965924, "grad_norm": 0.26940205693244934, "learning_rate": 7.21722171807965e-06, "loss": 0.4595, "step": 638 }, { "epoch": 1.2562254259501966, "grad_norm": 0.2619832754135132, "learning_rate": 7.206946702860948e-06, "loss": 0.4436, "step": 639 }, { "epoch": 1.2581913499344692, "grad_norm": 0.25589093565940857, "learning_rate": 7.196660099428665e-06, "loss": 0.4587, "step": 640 }, { "epoch": 1.2601572739187419, "grad_norm": 0.27356746792793274, "learning_rate": 7.186361961795596e-06, "loss": 0.4408, "step": 641 }, { "epoch": 1.2621231979030143, "grad_norm": 0.26509717106819153, "learning_rate": 7.176052344035101e-06, "loss": 0.4462, "step": 642 }, { "epoch": 1.264089121887287, "grad_norm": 0.32008934020996094, "learning_rate": 7.165731300280814e-06, "loss": 0.4422, "step": 643 }, { "epoch": 1.2660550458715596, "grad_norm": 0.2853245437145233, "learning_rate": 7.15539888472637e-06, "loss": 0.4586, "step": 644 }, { "epoch": 1.2680209698558322, "grad_norm": 0.32988548278808594, "learning_rate": 7.145055151625113e-06, "loss": 0.4414, "step": 645 }, { "epoch": 1.2699868938401049, "grad_norm": 0.3138856589794159, "learning_rate": 7.134700155289812e-06, "loss": 0.4303, "step": 646 }, { "epoch": 1.2719528178243775, "grad_norm": 0.2495591640472412, "learning_rate": 7.124333950092377e-06, "loss": 0.4444, "step": 647 }, { "epoch": 1.2739187418086502, "grad_norm": 0.4393638074398041, "learning_rate": 7.1139565904635755e-06, "loss": 0.4488, "step": 648 }, { "epoch": 1.2758846657929226, "grad_norm": 0.2642851769924164, "learning_rate": 7.103568130892742e-06, "loss": 0.4303, "step": 649 }, { "epoch": 1.2778505897771952, "grad_norm": 0.29225823283195496, "learning_rate": 7.093168625927497e-06, "loss": 0.4478, "step": 650 }, { "epoch": 1.2798165137614679, "grad_norm": 0.3072997033596039, "learning_rate": 7.082758130173456e-06, "loss": 0.4404, "step": 651 }, { "epoch": 1.2817824377457405, "grad_norm": 0.28156915307044983, "learning_rate": 7.072336698293946e-06, "loss": 0.4576, "step": 652 }, { "epoch": 1.2837483617300132, "grad_norm": 0.34902632236480713, "learning_rate": 7.0619043850097165e-06, "loss": 0.4664, "step": 653 }, { "epoch": 1.2857142857142856, "grad_norm": 0.2630963623523712, "learning_rate": 7.051461245098654e-06, "loss": 0.4455, "step": 654 }, { "epoch": 1.2876802096985585, "grad_norm": 0.2813081741333008, "learning_rate": 7.041007333395491e-06, "loss": 0.4423, "step": 655 }, { "epoch": 1.2896461336828309, "grad_norm": 0.27456530928611755, "learning_rate": 7.030542704791522e-06, "loss": 0.4482, "step": 656 }, { "epoch": 1.2916120576671035, "grad_norm": 0.2905554473400116, "learning_rate": 7.020067414234315e-06, "loss": 0.4399, "step": 657 }, { "epoch": 1.2935779816513762, "grad_norm": 0.27594971656799316, "learning_rate": 7.009581516727422e-06, "loss": 0.4563, "step": 658 }, { "epoch": 1.2955439056356488, "grad_norm": 0.2840750217437744, "learning_rate": 6.999085067330085e-06, "loss": 0.4711, "step": 659 }, { "epoch": 1.2975098296199215, "grad_norm": 0.3185199499130249, "learning_rate": 6.988578121156956e-06, "loss": 0.4557, "step": 660 }, { "epoch": 1.2994757536041939, "grad_norm": 0.2815924286842346, "learning_rate": 6.978060733377805e-06, "loss": 0.4383, "step": 661 }, { "epoch": 1.3014416775884665, "grad_norm": 0.28519296646118164, "learning_rate": 6.967532959217223e-06, "loss": 0.4462, "step": 662 }, { "epoch": 1.3034076015727392, "grad_norm": 0.27675876021385193, "learning_rate": 6.956994853954342e-06, "loss": 0.4386, "step": 663 }, { "epoch": 1.3053735255570118, "grad_norm": 0.2669420540332794, "learning_rate": 6.946446472922539e-06, "loss": 0.4316, "step": 664 }, { "epoch": 1.3073394495412844, "grad_norm": 0.2995237708091736, "learning_rate": 6.935887871509147e-06, "loss": 0.4512, "step": 665 }, { "epoch": 1.309305373525557, "grad_norm": 0.28722241520881653, "learning_rate": 6.925319105155165e-06, "loss": 0.4626, "step": 666 }, { "epoch": 1.3112712975098297, "grad_norm": 0.27150627970695496, "learning_rate": 6.914740229354964e-06, "loss": 0.4338, "step": 667 }, { "epoch": 1.3132372214941022, "grad_norm": 0.29424235224723816, "learning_rate": 6.904151299656e-06, "loss": 0.4516, "step": 668 }, { "epoch": 1.3152031454783748, "grad_norm": 2.394724130630493, "learning_rate": 6.8935523716585195e-06, "loss": 0.4346, "step": 669 }, { "epoch": 1.3171690694626474, "grad_norm": 0.3479631841182709, "learning_rate": 6.882943501015264e-06, "loss": 0.4519, "step": 670 }, { "epoch": 1.31913499344692, "grad_norm": 0.2685585916042328, "learning_rate": 6.872324743431189e-06, "loss": 0.4346, "step": 671 }, { "epoch": 1.3211009174311927, "grad_norm": 0.3368861675262451, "learning_rate": 6.8616961546631575e-06, "loss": 0.4434, "step": 672 }, { "epoch": 1.3230668414154652, "grad_norm": 0.2756919264793396, "learning_rate": 6.851057790519656e-06, "loss": 0.4499, "step": 673 }, { "epoch": 1.3250327653997378, "grad_norm": 0.4107244312763214, "learning_rate": 6.840409706860502e-06, "loss": 0.4401, "step": 674 }, { "epoch": 1.3269986893840104, "grad_norm": 0.2796277701854706, "learning_rate": 6.829751959596544e-06, "loss": 0.4295, "step": 675 }, { "epoch": 1.328964613368283, "grad_norm": 0.37067267298698425, "learning_rate": 6.819084604689379e-06, "loss": 0.4526, "step": 676 }, { "epoch": 1.3309305373525557, "grad_norm": 0.30490928888320923, "learning_rate": 6.808407698151041e-06, "loss": 0.4392, "step": 677 }, { "epoch": 1.3328964613368284, "grad_norm": 0.30019262433052063, "learning_rate": 6.797721296043727e-06, "loss": 0.441, "step": 678 }, { "epoch": 1.334862385321101, "grad_norm": 0.26304328441619873, "learning_rate": 6.787025454479489e-06, "loss": 0.4261, "step": 679 }, { "epoch": 1.3368283093053734, "grad_norm": 0.3093208372592926, "learning_rate": 6.776320229619944e-06, "loss": 0.4444, "step": 680 }, { "epoch": 1.338794233289646, "grad_norm": 0.2719149887561798, "learning_rate": 6.765605677675982e-06, "loss": 0.4339, "step": 681 }, { "epoch": 1.3407601572739187, "grad_norm": 0.2760200798511505, "learning_rate": 6.754881854907461e-06, "loss": 0.4657, "step": 682 }, { "epoch": 1.3427260812581914, "grad_norm": 0.29559653997421265, "learning_rate": 6.744148817622924e-06, "loss": 0.4407, "step": 683 }, { "epoch": 1.344692005242464, "grad_norm": 0.280727356672287, "learning_rate": 6.733406622179295e-06, "loss": 0.4341, "step": 684 }, { "epoch": 1.3466579292267364, "grad_norm": 0.3014304041862488, "learning_rate": 6.722655324981584e-06, "loss": 0.4497, "step": 685 }, { "epoch": 1.3486238532110093, "grad_norm": 0.27196088433265686, "learning_rate": 6.711894982482598e-06, "loss": 0.4312, "step": 686 }, { "epoch": 1.3505897771952817, "grad_norm": 0.33416539430618286, "learning_rate": 6.701125651182631e-06, "loss": 0.4446, "step": 687 }, { "epoch": 1.3525557011795544, "grad_norm": 0.29358693957328796, "learning_rate": 6.690347387629184e-06, "loss": 0.4526, "step": 688 }, { "epoch": 1.354521625163827, "grad_norm": 0.28325355052948, "learning_rate": 6.679560248416652e-06, "loss": 0.4241, "step": 689 }, { "epoch": 1.3564875491480997, "grad_norm": 0.29557231068611145, "learning_rate": 6.668764290186039e-06, "loss": 0.4447, "step": 690 }, { "epoch": 1.3584534731323723, "grad_norm": 0.2679084539413452, "learning_rate": 6.6579595696246536e-06, "loss": 0.4284, "step": 691 }, { "epoch": 1.3604193971166447, "grad_norm": 0.29322460293769836, "learning_rate": 6.6471461434658135e-06, "loss": 0.4394, "step": 692 }, { "epoch": 1.3623853211009174, "grad_norm": 0.2770942449569702, "learning_rate": 6.6363240684885465e-06, "loss": 0.4435, "step": 693 }, { "epoch": 1.36435124508519, "grad_norm": 0.3174556493759155, "learning_rate": 6.625493401517299e-06, "loss": 0.4484, "step": 694 }, { "epoch": 1.3663171690694627, "grad_norm": 0.26972174644470215, "learning_rate": 6.614654199421625e-06, "loss": 0.4628, "step": 695 }, { "epoch": 1.3682830930537353, "grad_norm": 0.36254197359085083, "learning_rate": 6.603806519115899e-06, "loss": 0.4552, "step": 696 }, { "epoch": 1.370249017038008, "grad_norm": 0.30667033791542053, "learning_rate": 6.592950417559013e-06, "loss": 0.4467, "step": 697 }, { "epoch": 1.3722149410222806, "grad_norm": 0.29005348682403564, "learning_rate": 6.582085951754076e-06, "loss": 0.4488, "step": 698 }, { "epoch": 1.374180865006553, "grad_norm": 0.32942113280296326, "learning_rate": 6.571213178748112e-06, "loss": 0.4491, "step": 699 }, { "epoch": 1.3761467889908257, "grad_norm": 0.26501721143722534, "learning_rate": 6.560332155631774e-06, "loss": 0.4455, "step": 700 }, { "epoch": 1.3781127129750983, "grad_norm": 0.31284385919570923, "learning_rate": 6.549442939539026e-06, "loss": 0.4534, "step": 701 }, { "epoch": 1.380078636959371, "grad_norm": 0.2804517149925232, "learning_rate": 6.538545587646854e-06, "loss": 0.446, "step": 702 }, { "epoch": 1.3820445609436436, "grad_norm": 0.27337881922721863, "learning_rate": 6.527640157174964e-06, "loss": 0.4472, "step": 703 }, { "epoch": 1.384010484927916, "grad_norm": 0.30470162630081177, "learning_rate": 6.516726705385482e-06, "loss": 0.4462, "step": 704 }, { "epoch": 1.3859764089121887, "grad_norm": 0.2848241627216339, "learning_rate": 6.50580528958265e-06, "loss": 0.442, "step": 705 }, { "epoch": 1.3879423328964613, "grad_norm": 0.27773287892341614, "learning_rate": 6.494875967112529e-06, "loss": 0.4476, "step": 706 }, { "epoch": 1.389908256880734, "grad_norm": 0.28546908497810364, "learning_rate": 6.483938795362695e-06, "loss": 0.4259, "step": 707 }, { "epoch": 1.3918741808650066, "grad_norm": 0.25834253430366516, "learning_rate": 6.47299383176194e-06, "loss": 0.4365, "step": 708 }, { "epoch": 1.3938401048492792, "grad_norm": 0.26199427247047424, "learning_rate": 6.462041133779969e-06, "loss": 0.4295, "step": 709 }, { "epoch": 1.3958060288335519, "grad_norm": 0.26836487650871277, "learning_rate": 6.451080758927099e-06, "loss": 0.4316, "step": 710 }, { "epoch": 1.3977719528178243, "grad_norm": 0.2869493067264557, "learning_rate": 6.440112764753956e-06, "loss": 0.425, "step": 711 }, { "epoch": 1.399737876802097, "grad_norm": 0.27614301443099976, "learning_rate": 6.429137208851172e-06, "loss": 0.4602, "step": 712 }, { "epoch": 1.4017038007863696, "grad_norm": 0.27254679799079895, "learning_rate": 6.418154148849087e-06, "loss": 0.4415, "step": 713 }, { "epoch": 1.4036697247706422, "grad_norm": 0.26700153946876526, "learning_rate": 6.4071636424174435e-06, "loss": 0.4358, "step": 714 }, { "epoch": 1.4056356487549149, "grad_norm": 0.23848773539066315, "learning_rate": 6.396165747265079e-06, "loss": 0.4416, "step": 715 }, { "epoch": 1.4076015727391873, "grad_norm": 0.24086840450763702, "learning_rate": 6.385160521139633e-06, "loss": 0.427, "step": 716 }, { "epoch": 1.4095674967234602, "grad_norm": 0.23305992782115936, "learning_rate": 6.374148021827237e-06, "loss": 0.4443, "step": 717 }, { "epoch": 1.4115334207077326, "grad_norm": 0.2489941418170929, "learning_rate": 6.36312830715221e-06, "loss": 0.4353, "step": 718 }, { "epoch": 1.4134993446920052, "grad_norm": 0.24544461071491241, "learning_rate": 6.352101434976761e-06, "loss": 0.4336, "step": 719 }, { "epoch": 1.4154652686762779, "grad_norm": 0.2898389399051666, "learning_rate": 6.341067463200678e-06, "loss": 0.4439, "step": 720 }, { "epoch": 1.4174311926605505, "grad_norm": 0.28088799118995667, "learning_rate": 6.3300264497610295e-06, "loss": 0.4526, "step": 721 }, { "epoch": 1.4193971166448232, "grad_norm": 0.3206160366535187, "learning_rate": 6.318978452631859e-06, "loss": 0.48, "step": 722 }, { "epoch": 1.4213630406290956, "grad_norm": 0.2654613256454468, "learning_rate": 6.307923529823876e-06, "loss": 0.4594, "step": 723 }, { "epoch": 1.4233289646133682, "grad_norm": 0.31457147002220154, "learning_rate": 6.296861739384162e-06, "loss": 0.4333, "step": 724 }, { "epoch": 1.4252948885976409, "grad_norm": 0.25586414337158203, "learning_rate": 6.285793139395853e-06, "loss": 0.4354, "step": 725 }, { "epoch": 1.4272608125819135, "grad_norm": 0.27597638964653015, "learning_rate": 6.2747177879778424e-06, "loss": 0.4483, "step": 726 }, { "epoch": 1.4292267365661862, "grad_norm": 0.23994868993759155, "learning_rate": 6.263635743284475e-06, "loss": 0.4504, "step": 727 }, { "epoch": 1.4311926605504588, "grad_norm": 0.25358685851097107, "learning_rate": 6.252547063505241e-06, "loss": 0.4522, "step": 728 }, { "epoch": 1.4331585845347314, "grad_norm": 0.28718921542167664, "learning_rate": 6.241451806864465e-06, "loss": 0.4689, "step": 729 }, { "epoch": 1.4351245085190039, "grad_norm": 0.2732388973236084, "learning_rate": 6.230350031621014e-06, "loss": 0.4385, "step": 730 }, { "epoch": 1.4370904325032765, "grad_norm": 0.29866504669189453, "learning_rate": 6.219241796067974e-06, "loss": 0.4456, "step": 731 }, { "epoch": 1.4390563564875491, "grad_norm": 0.308805376291275, "learning_rate": 6.208127158532358e-06, "loss": 0.4529, "step": 732 }, { "epoch": 1.4410222804718218, "grad_norm": 0.26815065741539, "learning_rate": 6.197006177374793e-06, "loss": 0.4323, "step": 733 }, { "epoch": 1.4429882044560944, "grad_norm": 0.30783241987228394, "learning_rate": 6.1858789109892145e-06, "loss": 0.4334, "step": 734 }, { "epoch": 1.4449541284403669, "grad_norm": 0.25694021582603455, "learning_rate": 6.174745417802563e-06, "loss": 0.4469, "step": 735 }, { "epoch": 1.4469200524246395, "grad_norm": 0.2983214855194092, "learning_rate": 6.163605756274472e-06, "loss": 0.4634, "step": 736 }, { "epoch": 1.4488859764089121, "grad_norm": 0.2631153166294098, "learning_rate": 6.1524599848969635e-06, "loss": 0.4518, "step": 737 }, { "epoch": 1.4508519003931848, "grad_norm": 0.30863478779792786, "learning_rate": 6.141308162194141e-06, "loss": 0.4842, "step": 738 }, { "epoch": 1.4528178243774574, "grad_norm": 0.2634303569793701, "learning_rate": 6.130150346721888e-06, "loss": 0.4375, "step": 739 }, { "epoch": 1.45478374836173, "grad_norm": 0.2764803469181061, "learning_rate": 6.118986597067543e-06, "loss": 0.4523, "step": 740 }, { "epoch": 1.4567496723460027, "grad_norm": 0.25771403312683105, "learning_rate": 6.1078169718496164e-06, "loss": 0.4404, "step": 741 }, { "epoch": 1.4587155963302751, "grad_norm": 0.28037071228027344, "learning_rate": 6.096641529717459e-06, "loss": 0.4302, "step": 742 }, { "epoch": 1.4606815203145478, "grad_norm": 0.2815488576889038, "learning_rate": 6.085460329350975e-06, "loss": 0.4523, "step": 743 }, { "epoch": 1.4626474442988204, "grad_norm": 0.24113839864730835, "learning_rate": 6.074273429460296e-06, "loss": 0.4368, "step": 744 }, { "epoch": 1.464613368283093, "grad_norm": 0.2681233286857605, "learning_rate": 6.063080888785484e-06, "loss": 0.4313, "step": 745 }, { "epoch": 1.4665792922673657, "grad_norm": 0.26223933696746826, "learning_rate": 6.051882766096219e-06, "loss": 0.427, "step": 746 }, { "epoch": 1.4685452162516381, "grad_norm": 0.23828788101673126, "learning_rate": 6.040679120191491e-06, "loss": 0.4509, "step": 747 }, { "epoch": 1.470511140235911, "grad_norm": 0.2911582589149475, "learning_rate": 6.0294700098992944e-06, "loss": 0.4623, "step": 748 }, { "epoch": 1.4724770642201834, "grad_norm": 0.22850066423416138, "learning_rate": 6.018255494076309e-06, "loss": 0.4229, "step": 749 }, { "epoch": 1.474442988204456, "grad_norm": 0.28099560737609863, "learning_rate": 6.007035631607605e-06, "loss": 0.4398, "step": 750 }, { "epoch": 1.4764089121887287, "grad_norm": 0.24817194044589996, "learning_rate": 5.995810481406319e-06, "loss": 0.4438, "step": 751 }, { "epoch": 1.4783748361730014, "grad_norm": 0.29838526248931885, "learning_rate": 5.984580102413361e-06, "loss": 0.4374, "step": 752 }, { "epoch": 1.480340760157274, "grad_norm": 0.28657519817352295, "learning_rate": 5.9733445535970915e-06, "loss": 0.451, "step": 753 }, { "epoch": 1.4823066841415464, "grad_norm": 0.2528459429740906, "learning_rate": 5.962103893953016e-06, "loss": 0.4176, "step": 754 }, { "epoch": 1.484272608125819, "grad_norm": 0.29090726375579834, "learning_rate": 5.950858182503478e-06, "loss": 0.4679, "step": 755 }, { "epoch": 1.4862385321100917, "grad_norm": 0.2765166759490967, "learning_rate": 5.939607478297347e-06, "loss": 0.4687, "step": 756 }, { "epoch": 1.4882044560943644, "grad_norm": 0.28713834285736084, "learning_rate": 5.928351840409707e-06, "loss": 0.45, "step": 757 }, { "epoch": 1.490170380078637, "grad_norm": 0.2559838891029358, "learning_rate": 5.917091327941548e-06, "loss": 0.4383, "step": 758 }, { "epoch": 1.4921363040629096, "grad_norm": 0.24240902066230774, "learning_rate": 5.905826000019458e-06, "loss": 0.4527, "step": 759 }, { "epoch": 1.4941022280471823, "grad_norm": 0.3005048930644989, "learning_rate": 5.8945559157953035e-06, "loss": 0.4577, "step": 760 }, { "epoch": 1.4960681520314547, "grad_norm": 0.30774039030075073, "learning_rate": 5.883281134445932e-06, "loss": 0.4625, "step": 761 }, { "epoch": 1.4980340760157274, "grad_norm": 0.24888800084590912, "learning_rate": 5.8720017151728526e-06, "loss": 0.4438, "step": 762 }, { "epoch": 1.5, "grad_norm": 0.2986248731613159, "learning_rate": 5.8607177172019245e-06, "loss": 0.4405, "step": 763 }, { "epoch": 1.5019659239842726, "grad_norm": 0.2639208436012268, "learning_rate": 5.849429199783054e-06, "loss": 0.4518, "step": 764 }, { "epoch": 1.5039318479685453, "grad_norm": 0.2760374844074249, "learning_rate": 5.838136222189874e-06, "loss": 0.4393, "step": 765 }, { "epoch": 1.5058977719528177, "grad_norm": 0.27932795882225037, "learning_rate": 5.826838843719437e-06, "loss": 0.4402, "step": 766 }, { "epoch": 1.5078636959370906, "grad_norm": 0.24393486976623535, "learning_rate": 5.8155371236919045e-06, "loss": 0.4394, "step": 767 }, { "epoch": 1.509829619921363, "grad_norm": 0.2864012122154236, "learning_rate": 5.804231121450235e-06, "loss": 0.4457, "step": 768 }, { "epoch": 1.5117955439056356, "grad_norm": 0.2616628408432007, "learning_rate": 5.79292089635987e-06, "loss": 0.446, "step": 769 }, { "epoch": 1.5137614678899083, "grad_norm": 0.27714791893959045, "learning_rate": 5.781606507808428e-06, "loss": 0.4543, "step": 770 }, { "epoch": 1.5157273918741807, "grad_norm": 0.2534174621105194, "learning_rate": 5.770288015205385e-06, "loss": 0.4229, "step": 771 }, { "epoch": 1.5176933158584536, "grad_norm": 0.23894338309764862, "learning_rate": 5.758965477981771e-06, "loss": 0.4376, "step": 772 }, { "epoch": 1.519659239842726, "grad_norm": 0.2569407522678375, "learning_rate": 5.747638955589847e-06, "loss": 0.4344, "step": 773 }, { "epoch": 1.5216251638269986, "grad_norm": 0.23768967390060425, "learning_rate": 5.736308507502805e-06, "loss": 0.4303, "step": 774 }, { "epoch": 1.5235910878112713, "grad_norm": 0.2613021731376648, "learning_rate": 5.724974193214448e-06, "loss": 0.4352, "step": 775 }, { "epoch": 1.525557011795544, "grad_norm": 0.27572882175445557, "learning_rate": 5.713636072238879e-06, "loss": 0.4289, "step": 776 }, { "epoch": 1.5275229357798166, "grad_norm": 0.2537841498851776, "learning_rate": 5.702294204110191e-06, "loss": 0.4559, "step": 777 }, { "epoch": 1.529488859764089, "grad_norm": 0.256172776222229, "learning_rate": 5.6909486483821485e-06, "loss": 0.452, "step": 778 }, { "epoch": 1.5314547837483619, "grad_norm": 0.2669544517993927, "learning_rate": 5.679599464627885e-06, "loss": 0.4309, "step": 779 }, { "epoch": 1.5334207077326343, "grad_norm": 0.24457275867462158, "learning_rate": 5.668246712439579e-06, "loss": 0.4395, "step": 780 }, { "epoch": 1.535386631716907, "grad_norm": 0.23864710330963135, "learning_rate": 5.656890451428147e-06, "loss": 0.4672, "step": 781 }, { "epoch": 1.5373525557011796, "grad_norm": 0.27357181906700134, "learning_rate": 5.645530741222931e-06, "loss": 0.4625, "step": 782 }, { "epoch": 1.5393184796854522, "grad_norm": 0.24743252992630005, "learning_rate": 5.634167641471383e-06, "loss": 0.4583, "step": 783 }, { "epoch": 1.5412844036697249, "grad_norm": 0.2562152147293091, "learning_rate": 5.622801211838753e-06, "loss": 0.437, "step": 784 }, { "epoch": 1.5432503276539973, "grad_norm": 0.23854993283748627, "learning_rate": 5.611431512007776e-06, "loss": 0.4269, "step": 785 }, { "epoch": 1.5452162516382701, "grad_norm": 0.24666976928710938, "learning_rate": 5.600058601678357e-06, "loss": 0.4478, "step": 786 }, { "epoch": 1.5471821756225426, "grad_norm": 0.25134962797164917, "learning_rate": 5.588682540567261e-06, "loss": 0.4367, "step": 787 }, { "epoch": 1.5491480996068152, "grad_norm": 0.24130164086818695, "learning_rate": 5.577303388407793e-06, "loss": 0.45, "step": 788 }, { "epoch": 1.5511140235910879, "grad_norm": 0.32679837942123413, "learning_rate": 5.5659212049494915e-06, "loss": 0.4541, "step": 789 }, { "epoch": 1.5530799475753603, "grad_norm": 0.26636821031570435, "learning_rate": 5.554536049957813e-06, "loss": 0.4467, "step": 790 }, { "epoch": 1.5550458715596331, "grad_norm": 0.28566592931747437, "learning_rate": 5.543147983213811e-06, "loss": 0.4362, "step": 791 }, { "epoch": 1.5570117955439056, "grad_norm": 0.2524894177913666, "learning_rate": 5.531757064513837e-06, "loss": 0.4416, "step": 792 }, { "epoch": 1.5589777195281782, "grad_norm": 0.2857241928577423, "learning_rate": 5.520363353669208e-06, "loss": 0.4417, "step": 793 }, { "epoch": 1.5609436435124509, "grad_norm": 0.25668948888778687, "learning_rate": 5.50896691050591e-06, "loss": 0.4505, "step": 794 }, { "epoch": 1.5629095674967235, "grad_norm": 0.2940739393234253, "learning_rate": 5.4975677948642704e-06, "loss": 0.4559, "step": 795 }, { "epoch": 1.5648754914809961, "grad_norm": 0.2603815793991089, "learning_rate": 5.48616606659865e-06, "loss": 0.4436, "step": 796 }, { "epoch": 1.5668414154652686, "grad_norm": 0.25054648518562317, "learning_rate": 5.474761785577133e-06, "loss": 0.4318, "step": 797 }, { "epoch": 1.5688073394495414, "grad_norm": 0.2663193643093109, "learning_rate": 5.4633550116812e-06, "loss": 0.4403, "step": 798 }, { "epoch": 1.5707732634338138, "grad_norm": 0.24768784642219543, "learning_rate": 5.451945804805425e-06, "loss": 0.4464, "step": 799 }, { "epoch": 1.5727391874180865, "grad_norm": 0.2361469715833664, "learning_rate": 5.440534224857158e-06, "loss": 0.4482, "step": 800 }, { "epoch": 1.5747051114023591, "grad_norm": 0.26781949400901794, "learning_rate": 5.429120331756208e-06, "loss": 0.4413, "step": 801 }, { "epoch": 1.5766710353866316, "grad_norm": 0.28918296098709106, "learning_rate": 5.417704185434531e-06, "loss": 0.4341, "step": 802 }, { "epoch": 1.5786369593709044, "grad_norm": 0.24010907113552094, "learning_rate": 5.4062858458359135e-06, "loss": 0.4259, "step": 803 }, { "epoch": 1.5806028833551768, "grad_norm": 0.26394084095954895, "learning_rate": 5.394865372915656e-06, "loss": 0.4376, "step": 804 }, { "epoch": 1.5825688073394495, "grad_norm": 0.25336146354675293, "learning_rate": 5.383442826640266e-06, "loss": 0.4472, "step": 805 }, { "epoch": 1.5845347313237221, "grad_norm": 0.25079286098480225, "learning_rate": 5.3720182669871334e-06, "loss": 0.452, "step": 806 }, { "epoch": 1.5865006553079948, "grad_norm": 0.27424266934394836, "learning_rate": 5.360591753944221e-06, "loss": 0.4225, "step": 807 }, { "epoch": 1.5884665792922674, "grad_norm": 0.2350778877735138, "learning_rate": 5.349163347509748e-06, "loss": 0.4247, "step": 808 }, { "epoch": 1.5904325032765398, "grad_norm": 0.2994675934314728, "learning_rate": 5.337733107691879e-06, "loss": 0.4335, "step": 809 }, { "epoch": 1.5923984272608127, "grad_norm": 0.23439203202724457, "learning_rate": 5.3263010945083994e-06, "loss": 0.42, "step": 810 }, { "epoch": 1.5943643512450851, "grad_norm": 0.2544015049934387, "learning_rate": 5.314867367986409e-06, "loss": 0.4446, "step": 811 }, { "epoch": 1.5963302752293578, "grad_norm": 0.29329681396484375, "learning_rate": 5.303431988162008e-06, "loss": 0.4397, "step": 812 }, { "epoch": 1.5982961992136304, "grad_norm": 0.26393359899520874, "learning_rate": 5.291995015079969e-06, "loss": 0.4418, "step": 813 }, { "epoch": 1.600262123197903, "grad_norm": 0.22339020669460297, "learning_rate": 5.2805565087934396e-06, "loss": 0.413, "step": 814 }, { "epoch": 1.6022280471821757, "grad_norm": 0.25190436840057373, "learning_rate": 5.269116529363613e-06, "loss": 0.4291, "step": 815 }, { "epoch": 1.6041939711664481, "grad_norm": 0.22340822219848633, "learning_rate": 5.257675136859415e-06, "loss": 0.4377, "step": 816 }, { "epoch": 1.606159895150721, "grad_norm": 0.23750069737434387, "learning_rate": 5.246232391357198e-06, "loss": 0.4284, "step": 817 }, { "epoch": 1.6081258191349934, "grad_norm": 0.25304466485977173, "learning_rate": 5.234788352940413e-06, "loss": 0.4414, "step": 818 }, { "epoch": 1.610091743119266, "grad_norm": 0.23254382610321045, "learning_rate": 5.223343081699302e-06, "loss": 0.4508, "step": 819 }, { "epoch": 1.6120576671035387, "grad_norm": 0.25423750281333923, "learning_rate": 5.211896637730582e-06, "loss": 0.4543, "step": 820 }, { "epoch": 1.6140235910878111, "grad_norm": 0.25801584124565125, "learning_rate": 5.200449081137124e-06, "loss": 0.4447, "step": 821 }, { "epoch": 1.615989515072084, "grad_norm": 0.2248302847146988, "learning_rate": 5.189000472027645e-06, "loss": 0.4302, "step": 822 }, { "epoch": 1.6179554390563564, "grad_norm": 0.27316683530807495, "learning_rate": 5.177550870516386e-06, "loss": 0.4333, "step": 823 }, { "epoch": 1.619921363040629, "grad_norm": 0.24663394689559937, "learning_rate": 5.1661003367228e-06, "loss": 0.4538, "step": 824 }, { "epoch": 1.6218872870249017, "grad_norm": 0.26696285605430603, "learning_rate": 5.1546489307712345e-06, "loss": 0.4366, "step": 825 }, { "epoch": 1.6238532110091743, "grad_norm": 0.26279351115226746, "learning_rate": 5.1431967127906156e-06, "loss": 0.4383, "step": 826 }, { "epoch": 1.625819134993447, "grad_norm": 0.2355157434940338, "learning_rate": 5.131743742914136e-06, "loss": 0.4334, "step": 827 }, { "epoch": 1.6277850589777194, "grad_norm": 0.2610161006450653, "learning_rate": 5.1202900812789346e-06, "loss": 0.4309, "step": 828 }, { "epoch": 1.6297509829619923, "grad_norm": 0.24807049334049225, "learning_rate": 5.108835788025782e-06, "loss": 0.4339, "step": 829 }, { "epoch": 1.6317169069462647, "grad_norm": 0.2702607214450836, "learning_rate": 5.097380923298767e-06, "loss": 0.4312, "step": 830 }, { "epoch": 1.6336828309305373, "grad_norm": 0.2486424446105957, "learning_rate": 5.085925547244978e-06, "loss": 0.4435, "step": 831 }, { "epoch": 1.63564875491481, "grad_norm": 0.2502179741859436, "learning_rate": 5.07446972001419e-06, "loss": 0.4404, "step": 832 }, { "epoch": 1.6376146788990824, "grad_norm": 0.29890352487564087, "learning_rate": 5.063013501758544e-06, "loss": 0.4325, "step": 833 }, { "epoch": 1.6395806028833553, "grad_norm": 0.2597431540489197, "learning_rate": 5.051556952632235e-06, "loss": 0.4438, "step": 834 }, { "epoch": 1.6415465268676277, "grad_norm": 0.23143181204795837, "learning_rate": 5.040100132791197e-06, "loss": 0.4387, "step": 835 }, { "epoch": 1.6435124508519003, "grad_norm": 0.2477666586637497, "learning_rate": 5.028643102392785e-06, "loss": 0.4247, "step": 836 }, { "epoch": 1.645478374836173, "grad_norm": 0.23772212862968445, "learning_rate": 5.0171859215954575e-06, "loss": 0.4314, "step": 837 }, { "epoch": 1.6474442988204456, "grad_norm": 0.2282267063856125, "learning_rate": 5.005728650558467e-06, "loss": 0.4519, "step": 838 }, { "epoch": 1.6494102228047183, "grad_norm": 0.27698102593421936, "learning_rate": 4.994271349441534e-06, "loss": 0.4283, "step": 839 }, { "epoch": 1.6513761467889907, "grad_norm": 0.22621089220046997, "learning_rate": 4.982814078404543e-06, "loss": 0.4115, "step": 840 }, { "epoch": 1.6533420707732636, "grad_norm": 0.2617005705833435, "learning_rate": 4.971356897607216e-06, "loss": 0.4468, "step": 841 }, { "epoch": 1.655307994757536, "grad_norm": 0.25365275144577026, "learning_rate": 4.959899867208805e-06, "loss": 0.4469, "step": 842 }, { "epoch": 1.6572739187418086, "grad_norm": 0.21561695635318756, "learning_rate": 4.948443047367767e-06, "loss": 0.4206, "step": 843 }, { "epoch": 1.6592398427260813, "grad_norm": 0.2597047686576843, "learning_rate": 4.936986498241458e-06, "loss": 0.4226, "step": 844 }, { "epoch": 1.661205766710354, "grad_norm": 0.27081021666526794, "learning_rate": 4.9255302799858125e-06, "loss": 0.4531, "step": 845 }, { "epoch": 1.6631716906946266, "grad_norm": 0.24182185530662537, "learning_rate": 4.9140744527550225e-06, "loss": 0.4275, "step": 846 }, { "epoch": 1.665137614678899, "grad_norm": 0.2677760720252991, "learning_rate": 4.9026190767012345e-06, "loss": 0.4383, "step": 847 }, { "epoch": 1.6671035386631718, "grad_norm": 0.24200788140296936, "learning_rate": 4.891164211974218e-06, "loss": 0.4246, "step": 848 }, { "epoch": 1.6690694626474443, "grad_norm": 0.27642157673835754, "learning_rate": 4.879709918721067e-06, "loss": 0.4454, "step": 849 }, { "epoch": 1.671035386631717, "grad_norm": 0.2820422351360321, "learning_rate": 4.868256257085866e-06, "loss": 0.4439, "step": 850 }, { "epoch": 1.6730013106159896, "grad_norm": 0.23756973445415497, "learning_rate": 4.856803287209385e-06, "loss": 0.4303, "step": 851 }, { "epoch": 1.674967234600262, "grad_norm": 0.2752552926540375, "learning_rate": 4.845351069228767e-06, "loss": 0.4196, "step": 852 }, { "epoch": 1.6769331585845348, "grad_norm": 0.2415931224822998, "learning_rate": 4.8338996632772014e-06, "loss": 0.4377, "step": 853 }, { "epoch": 1.6788990825688073, "grad_norm": 0.2568812668323517, "learning_rate": 4.822449129483616e-06, "loss": 0.4459, "step": 854 }, { "epoch": 1.68086500655308, "grad_norm": 0.2614029049873352, "learning_rate": 4.8109995279723556e-06, "loss": 0.4542, "step": 855 }, { "epoch": 1.6828309305373526, "grad_norm": 0.24816203117370605, "learning_rate": 4.799550918862877e-06, "loss": 0.452, "step": 856 }, { "epoch": 1.6847968545216252, "grad_norm": 0.24617765843868256, "learning_rate": 4.788103362269418e-06, "loss": 0.4341, "step": 857 }, { "epoch": 1.6867627785058978, "grad_norm": 0.23530834913253784, "learning_rate": 4.776656918300699e-06, "loss": 0.4248, "step": 858 }, { "epoch": 1.6887287024901703, "grad_norm": 0.263662725687027, "learning_rate": 4.76521164705959e-06, "loss": 0.4456, "step": 859 }, { "epoch": 1.6906946264744431, "grad_norm": 0.24673894047737122, "learning_rate": 4.7537676086428035e-06, "loss": 0.4545, "step": 860 }, { "epoch": 1.6926605504587156, "grad_norm": 0.24264618754386902, "learning_rate": 4.742324863140587e-06, "loss": 0.4328, "step": 861 }, { "epoch": 1.6946264744429882, "grad_norm": 0.2592974305152893, "learning_rate": 4.73088347063639e-06, "loss": 0.4512, "step": 862 }, { "epoch": 1.6965923984272608, "grad_norm": 0.2544708549976349, "learning_rate": 4.719443491206562e-06, "loss": 0.4415, "step": 863 }, { "epoch": 1.6985583224115333, "grad_norm": 0.2565777897834778, "learning_rate": 4.70800498492003e-06, "loss": 0.434, "step": 864 }, { "epoch": 1.7005242463958061, "grad_norm": 0.24272798001766205, "learning_rate": 4.696568011837994e-06, "loss": 0.4404, "step": 865 }, { "epoch": 1.7024901703800785, "grad_norm": 0.2778138816356659, "learning_rate": 4.685132632013592e-06, "loss": 0.4377, "step": 866 }, { "epoch": 1.7044560943643512, "grad_norm": 0.24986910820007324, "learning_rate": 4.673698905491602e-06, "loss": 0.4316, "step": 867 }, { "epoch": 1.7064220183486238, "grad_norm": 0.2514886260032654, "learning_rate": 4.6622668923081235e-06, "loss": 0.4352, "step": 868 }, { "epoch": 1.7083879423328965, "grad_norm": 0.24508267641067505, "learning_rate": 4.6508366524902525e-06, "loss": 0.4434, "step": 869 }, { "epoch": 1.7103538663171691, "grad_norm": 0.2508834898471832, "learning_rate": 4.639408246055781e-06, "loss": 0.4242, "step": 870 }, { "epoch": 1.7123197903014415, "grad_norm": 0.2465783655643463, "learning_rate": 4.627981733012868e-06, "loss": 0.4348, "step": 871 }, { "epoch": 1.7142857142857144, "grad_norm": 0.25469180941581726, "learning_rate": 4.616557173359736e-06, "loss": 0.4439, "step": 872 }, { "epoch": 1.7162516382699868, "grad_norm": 0.2471664994955063, "learning_rate": 4.605134627084345e-06, "loss": 0.4394, "step": 873 }, { "epoch": 1.7182175622542595, "grad_norm": 0.27033987641334534, "learning_rate": 4.593714154164088e-06, "loss": 0.453, "step": 874 }, { "epoch": 1.7201834862385321, "grad_norm": 0.22780223190784454, "learning_rate": 4.58229581456547e-06, "loss": 0.4449, "step": 875 }, { "epoch": 1.7221494102228048, "grad_norm": 0.24192917346954346, "learning_rate": 4.570879668243792e-06, "loss": 0.4547, "step": 876 }, { "epoch": 1.7241153342070774, "grad_norm": 0.259541392326355, "learning_rate": 4.559465775142843e-06, "loss": 0.4416, "step": 877 }, { "epoch": 1.7260812581913498, "grad_norm": 0.24240902066230774, "learning_rate": 4.548054195194576e-06, "loss": 0.437, "step": 878 }, { "epoch": 1.7280471821756227, "grad_norm": 0.23845937848091125, "learning_rate": 4.536644988318802e-06, "loss": 0.4482, "step": 879 }, { "epoch": 1.7300131061598951, "grad_norm": 0.2592415511608124, "learning_rate": 4.5252382144228696e-06, "loss": 0.4249, "step": 880 }, { "epoch": 1.7319790301441678, "grad_norm": 0.2503202557563782, "learning_rate": 4.5138339334013505e-06, "loss": 0.4435, "step": 881 }, { "epoch": 1.7339449541284404, "grad_norm": 0.23188181221485138, "learning_rate": 4.502432205135731e-06, "loss": 0.4361, "step": 882 }, { "epoch": 1.7359108781127128, "grad_norm": 0.27275577187538147, "learning_rate": 4.491033089494091e-06, "loss": 0.4644, "step": 883 }, { "epoch": 1.7378768020969857, "grad_norm": 0.27509182691574097, "learning_rate": 4.479636646330793e-06, "loss": 0.4496, "step": 884 }, { "epoch": 1.7398427260812581, "grad_norm": 0.2707621157169342, "learning_rate": 4.468242935486164e-06, "loss": 0.4384, "step": 885 }, { "epoch": 1.7418086500655308, "grad_norm": 0.24250587821006775, "learning_rate": 4.45685201678619e-06, "loss": 0.4458, "step": 886 }, { "epoch": 1.7437745740498034, "grad_norm": 0.25717586278915405, "learning_rate": 4.445463950042191e-06, "loss": 0.4288, "step": 887 }, { "epoch": 1.745740498034076, "grad_norm": 0.2693054676055908, "learning_rate": 4.434078795050509e-06, "loss": 0.4534, "step": 888 }, { "epoch": 1.7477064220183487, "grad_norm": 0.2557573616504669, "learning_rate": 4.4226966115922096e-06, "loss": 0.4385, "step": 889 }, { "epoch": 1.7496723460026211, "grad_norm": 0.224521666765213, "learning_rate": 4.411317459432741e-06, "loss": 0.4176, "step": 890 }, { "epoch": 1.751638269986894, "grad_norm": 0.27973490953445435, "learning_rate": 4.3999413983216434e-06, "loss": 0.448, "step": 891 }, { "epoch": 1.7536041939711664, "grad_norm": 0.2818668484687805, "learning_rate": 4.388568487992225e-06, "loss": 0.4555, "step": 892 }, { "epoch": 1.755570117955439, "grad_norm": 0.25364866852760315, "learning_rate": 4.3771987881612484e-06, "loss": 0.4399, "step": 893 }, { "epoch": 1.7575360419397117, "grad_norm": 0.24888482689857483, "learning_rate": 4.365832358528618e-06, "loss": 0.4504, "step": 894 }, { "epoch": 1.7595019659239841, "grad_norm": 0.2617346942424774, "learning_rate": 4.35446925877707e-06, "loss": 0.4355, "step": 895 }, { "epoch": 1.761467889908257, "grad_norm": 0.2641500234603882, "learning_rate": 4.343109548571855e-06, "loss": 0.4545, "step": 896 }, { "epoch": 1.7634338138925294, "grad_norm": 0.2263491451740265, "learning_rate": 4.331753287560423e-06, "loss": 0.4433, "step": 897 }, { "epoch": 1.765399737876802, "grad_norm": 0.2606113851070404, "learning_rate": 4.320400535372117e-06, "loss": 0.429, "step": 898 }, { "epoch": 1.7673656618610747, "grad_norm": 0.25996842980384827, "learning_rate": 4.3090513516178514e-06, "loss": 0.446, "step": 899 }, { "epoch": 1.7693315858453473, "grad_norm": 0.23425088822841644, "learning_rate": 4.29770579588981e-06, "loss": 0.4664, "step": 900 }, { "epoch": 1.77129750982962, "grad_norm": 0.2421378493309021, "learning_rate": 4.286363927761122e-06, "loss": 0.4585, "step": 901 }, { "epoch": 1.7732634338138924, "grad_norm": 0.2616322636604309, "learning_rate": 4.275025806785554e-06, "loss": 0.4585, "step": 902 }, { "epoch": 1.7752293577981653, "grad_norm": 0.23578320443630219, "learning_rate": 4.263691492497197e-06, "loss": 0.4362, "step": 903 }, { "epoch": 1.7771952817824377, "grad_norm": 0.22001796960830688, "learning_rate": 4.252361044410154e-06, "loss": 0.4431, "step": 904 }, { "epoch": 1.7791612057667103, "grad_norm": 0.24889324605464935, "learning_rate": 4.241034522018232e-06, "loss": 0.4493, "step": 905 }, { "epoch": 1.781127129750983, "grad_norm": 0.2580612897872925, "learning_rate": 4.229711984794614e-06, "loss": 0.4433, "step": 906 }, { "epoch": 1.7830930537352556, "grad_norm": 0.2380072921514511, "learning_rate": 4.218393492191573e-06, "loss": 0.4507, "step": 907 }, { "epoch": 1.7850589777195283, "grad_norm": 0.24413016438484192, "learning_rate": 4.207079103640129e-06, "loss": 0.4496, "step": 908 }, { "epoch": 1.7870249017038007, "grad_norm": 0.2281772643327713, "learning_rate": 4.195768878549766e-06, "loss": 0.4202, "step": 909 }, { "epoch": 1.7889908256880735, "grad_norm": 0.2313961535692215, "learning_rate": 4.184462876308097e-06, "loss": 0.4306, "step": 910 }, { "epoch": 1.790956749672346, "grad_norm": 0.24212126433849335, "learning_rate": 4.173161156280564e-06, "loss": 0.4366, "step": 911 }, { "epoch": 1.7929226736566186, "grad_norm": 0.2456527203321457, "learning_rate": 4.161863777810128e-06, "loss": 0.4594, "step": 912 }, { "epoch": 1.7948885976408913, "grad_norm": 0.24351178109645844, "learning_rate": 4.150570800216946e-06, "loss": 0.4497, "step": 913 }, { "epoch": 1.7968545216251637, "grad_norm": 0.2541344463825226, "learning_rate": 4.139282282798076e-06, "loss": 0.454, "step": 914 }, { "epoch": 1.7988204456094365, "grad_norm": 0.2662005126476288, "learning_rate": 4.127998284827148e-06, "loss": 0.4479, "step": 915 }, { "epoch": 1.800786369593709, "grad_norm": 0.26165616512298584, "learning_rate": 4.11671886555407e-06, "loss": 0.4249, "step": 916 }, { "epoch": 1.8027522935779816, "grad_norm": 0.2643074095249176, "learning_rate": 4.105444084204699e-06, "loss": 0.4396, "step": 917 }, { "epoch": 1.8047182175622543, "grad_norm": 0.2607715129852295, "learning_rate": 4.094173999980544e-06, "loss": 0.4366, "step": 918 }, { "epoch": 1.806684141546527, "grad_norm": 0.2415345460176468, "learning_rate": 4.082908672058453e-06, "loss": 0.4458, "step": 919 }, { "epoch": 1.8086500655307995, "grad_norm": 0.2511383891105652, "learning_rate": 4.071648159590294e-06, "loss": 0.4428, "step": 920 }, { "epoch": 1.810615989515072, "grad_norm": 0.24544315040111542, "learning_rate": 4.060392521702655e-06, "loss": 0.4462, "step": 921 }, { "epoch": 1.8125819134993448, "grad_norm": 0.24017304182052612, "learning_rate": 4.0491418174965225e-06, "loss": 0.4395, "step": 922 }, { "epoch": 1.8145478374836173, "grad_norm": 0.23082610964775085, "learning_rate": 4.037896106046986e-06, "loss": 0.4313, "step": 923 }, { "epoch": 1.81651376146789, "grad_norm": 0.24073214828968048, "learning_rate": 4.026655446402912e-06, "loss": 0.458, "step": 924 }, { "epoch": 1.8184796854521625, "grad_norm": 0.24371448159217834, "learning_rate": 4.01541989758664e-06, "loss": 0.4429, "step": 925 }, { "epoch": 1.820445609436435, "grad_norm": 0.24334824085235596, "learning_rate": 4.004189518593683e-06, "loss": 0.4395, "step": 926 }, { "epoch": 1.8224115334207078, "grad_norm": 0.26907652616500854, "learning_rate": 3.9929643683923965e-06, "loss": 0.4482, "step": 927 }, { "epoch": 1.8243774574049803, "grad_norm": 0.23593313992023468, "learning_rate": 3.981744505923692e-06, "loss": 0.4453, "step": 928 }, { "epoch": 1.826343381389253, "grad_norm": 0.2463892549276352, "learning_rate": 3.970529990100706e-06, "loss": 0.4295, "step": 929 }, { "epoch": 1.8283093053735255, "grad_norm": 0.2889455556869507, "learning_rate": 3.9593208798085094e-06, "loss": 0.4455, "step": 930 }, { "epoch": 1.8302752293577982, "grad_norm": 0.25848588347435, "learning_rate": 3.948117233903781e-06, "loss": 0.4381, "step": 931 }, { "epoch": 1.8322411533420708, "grad_norm": 0.2491040676832199, "learning_rate": 3.936919111214518e-06, "loss": 0.4493, "step": 932 }, { "epoch": 1.8342070773263432, "grad_norm": 0.27089598774909973, "learning_rate": 3.9257265705397065e-06, "loss": 0.433, "step": 933 }, { "epoch": 1.8361730013106161, "grad_norm": 0.23713093996047974, "learning_rate": 3.914539670649026e-06, "loss": 0.4403, "step": 934 }, { "epoch": 1.8381389252948885, "grad_norm": 0.2511511445045471, "learning_rate": 3.903358470282542e-06, "loss": 0.4355, "step": 935 }, { "epoch": 1.8401048492791612, "grad_norm": 0.23158514499664307, "learning_rate": 3.892183028150384e-06, "loss": 0.4426, "step": 936 }, { "epoch": 1.8420707732634338, "grad_norm": 0.23655346035957336, "learning_rate": 3.881013402932458e-06, "loss": 0.4652, "step": 937 }, { "epoch": 1.8440366972477065, "grad_norm": 0.24980229139328003, "learning_rate": 3.869849653278114e-06, "loss": 0.4421, "step": 938 }, { "epoch": 1.8460026212319791, "grad_norm": 0.24362362921237946, "learning_rate": 3.8586918378058595e-06, "loss": 0.4269, "step": 939 }, { "epoch": 1.8479685452162515, "grad_norm": 0.23607191443443298, "learning_rate": 3.847540015103038e-06, "loss": 0.4618, "step": 940 }, { "epoch": 1.8499344692005244, "grad_norm": 0.22779516875743866, "learning_rate": 3.836394243725529e-06, "loss": 0.4473, "step": 941 }, { "epoch": 1.8519003931847968, "grad_norm": 0.2740325629711151, "learning_rate": 3.8252545821974385e-06, "loss": 0.4491, "step": 942 }, { "epoch": 1.8538663171690695, "grad_norm": 0.26140812039375305, "learning_rate": 3.814121089010786e-06, "loss": 0.4311, "step": 943 }, { "epoch": 1.855832241153342, "grad_norm": 0.2269832193851471, "learning_rate": 3.8029938226252095e-06, "loss": 0.4228, "step": 944 }, { "epoch": 1.8577981651376145, "grad_norm": 0.2418784499168396, "learning_rate": 3.791872841467643e-06, "loss": 0.4206, "step": 945 }, { "epoch": 1.8597640891218874, "grad_norm": 0.25120002031326294, "learning_rate": 3.780758203932028e-06, "loss": 0.4367, "step": 946 }, { "epoch": 1.8617300131061598, "grad_norm": 0.2378482073545456, "learning_rate": 3.769649968378989e-06, "loss": 0.4405, "step": 947 }, { "epoch": 1.8636959370904325, "grad_norm": 0.2553141713142395, "learning_rate": 3.758548193135536e-06, "loss": 0.4472, "step": 948 }, { "epoch": 1.865661861074705, "grad_norm": 0.23460035026073456, "learning_rate": 3.747452936494761e-06, "loss": 0.4426, "step": 949 }, { "epoch": 1.8676277850589778, "grad_norm": 0.2531510293483734, "learning_rate": 3.7363642567155254e-06, "loss": 0.4687, "step": 950 }, { "epoch": 1.8695937090432504, "grad_norm": 0.21824617683887482, "learning_rate": 3.7252822120221592e-06, "loss": 0.4408, "step": 951 }, { "epoch": 1.8715596330275228, "grad_norm": 0.2500864863395691, "learning_rate": 3.714206860604148e-06, "loss": 0.4329, "step": 952 }, { "epoch": 1.8735255570117957, "grad_norm": 0.2598780691623688, "learning_rate": 3.7031382606158396e-06, "loss": 0.459, "step": 953 }, { "epoch": 1.875491480996068, "grad_norm": 0.25134357810020447, "learning_rate": 3.6920764701761263e-06, "loss": 0.4465, "step": 954 }, { "epoch": 1.8774574049803407, "grad_norm": 0.24129661917686462, "learning_rate": 3.6810215473681433e-06, "loss": 0.4462, "step": 955 }, { "epoch": 1.8794233289646134, "grad_norm": 0.24308183789253235, "learning_rate": 3.6699735502389734e-06, "loss": 0.4255, "step": 956 }, { "epoch": 1.8813892529488858, "grad_norm": 0.27380120754241943, "learning_rate": 3.6589325367993243e-06, "loss": 0.4181, "step": 957 }, { "epoch": 1.8833551769331587, "grad_norm": 0.22356578707695007, "learning_rate": 3.6478985650232413e-06, "loss": 0.437, "step": 958 }, { "epoch": 1.885321100917431, "grad_norm": 0.22350762784481049, "learning_rate": 3.636871692847791e-06, "loss": 0.4359, "step": 959 }, { "epoch": 1.8872870249017037, "grad_norm": 0.21726961433887482, "learning_rate": 3.625851978172765e-06, "loss": 0.4358, "step": 960 }, { "epoch": 1.8892529488859764, "grad_norm": 0.2217891365289688, "learning_rate": 3.614839478860369e-06, "loss": 0.434, "step": 961 }, { "epoch": 1.891218872870249, "grad_norm": 0.2290656715631485, "learning_rate": 3.6038342527349225e-06, "loss": 0.4239, "step": 962 }, { "epoch": 1.8931847968545217, "grad_norm": 0.21448542177677155, "learning_rate": 3.59283635758256e-06, "loss": 0.4377, "step": 963 }, { "epoch": 1.895150720838794, "grad_norm": 0.2176762968301773, "learning_rate": 3.5818458511509135e-06, "loss": 0.4366, "step": 964 }, { "epoch": 1.897116644823067, "grad_norm": 0.23071660101413727, "learning_rate": 3.57086279114883e-06, "loss": 0.4415, "step": 965 }, { "epoch": 1.8990825688073394, "grad_norm": 0.23253513872623444, "learning_rate": 3.5598872352460457e-06, "loss": 0.4366, "step": 966 }, { "epoch": 1.901048492791612, "grad_norm": 0.22595295310020447, "learning_rate": 3.548919241072901e-06, "loss": 0.4212, "step": 967 }, { "epoch": 1.9030144167758847, "grad_norm": 0.22117412090301514, "learning_rate": 3.537958866220031e-06, "loss": 0.4363, "step": 968 }, { "epoch": 1.9049803407601573, "grad_norm": 0.23329798877239227, "learning_rate": 3.527006168238061e-06, "loss": 0.4164, "step": 969 }, { "epoch": 1.90694626474443, "grad_norm": 0.21395321190357208, "learning_rate": 3.5160612046373067e-06, "loss": 0.435, "step": 970 }, { "epoch": 1.9089121887287024, "grad_norm": 0.2612607777118683, "learning_rate": 3.505124032887471e-06, "loss": 0.4505, "step": 971 }, { "epoch": 1.9108781127129753, "grad_norm": 0.2517973482608795, "learning_rate": 3.4941947104173514e-06, "loss": 0.4362, "step": 972 }, { "epoch": 1.9128440366972477, "grad_norm": 0.25207528471946716, "learning_rate": 3.4832732946145187e-06, "loss": 0.4461, "step": 973 }, { "epoch": 1.9148099606815203, "grad_norm": 0.2599436938762665, "learning_rate": 3.472359842825037e-06, "loss": 0.4332, "step": 974 }, { "epoch": 1.916775884665793, "grad_norm": 0.24764251708984375, "learning_rate": 3.4614544123531476e-06, "loss": 0.4217, "step": 975 }, { "epoch": 1.9187418086500654, "grad_norm": 0.2398913949728012, "learning_rate": 3.450557060460975e-06, "loss": 0.4417, "step": 976 }, { "epoch": 1.9207077326343382, "grad_norm": 0.2356083244085312, "learning_rate": 3.4396678443682274e-06, "loss": 0.4305, "step": 977 }, { "epoch": 1.9226736566186107, "grad_norm": 0.2680135667324066, "learning_rate": 3.428786821251888e-06, "loss": 0.4484, "step": 978 }, { "epoch": 1.9246395806028833, "grad_norm": 0.22987043857574463, "learning_rate": 3.417914048245927e-06, "loss": 0.4395, "step": 979 }, { "epoch": 1.926605504587156, "grad_norm": 0.2541668117046356, "learning_rate": 3.4070495824409876e-06, "loss": 0.4183, "step": 980 }, { "epoch": 1.9285714285714286, "grad_norm": 0.2168930470943451, "learning_rate": 3.3961934808841023e-06, "loss": 0.4343, "step": 981 }, { "epoch": 1.9305373525557012, "grad_norm": 0.23768441379070282, "learning_rate": 3.385345800578376e-06, "loss": 0.4511, "step": 982 }, { "epoch": 1.9325032765399737, "grad_norm": 0.24167458713054657, "learning_rate": 3.374506598482703e-06, "loss": 0.4505, "step": 983 }, { "epoch": 1.9344692005242465, "grad_norm": 0.22421284019947052, "learning_rate": 3.363675931511455e-06, "loss": 0.4556, "step": 984 }, { "epoch": 1.936435124508519, "grad_norm": 0.23368152976036072, "learning_rate": 3.3528538565341885e-06, "loss": 0.4276, "step": 985 }, { "epoch": 1.9384010484927916, "grad_norm": 0.26514336466789246, "learning_rate": 3.3420404303753485e-06, "loss": 0.4469, "step": 986 }, { "epoch": 1.9403669724770642, "grad_norm": 0.24340538680553436, "learning_rate": 3.331235709813962e-06, "loss": 0.4406, "step": 987 }, { "epoch": 1.9423328964613367, "grad_norm": 0.2298521101474762, "learning_rate": 3.3204397515833494e-06, "loss": 0.437, "step": 988 }, { "epoch": 1.9442988204456095, "grad_norm": 0.2267940640449524, "learning_rate": 3.309652612370816e-06, "loss": 0.4432, "step": 989 }, { "epoch": 1.946264744429882, "grad_norm": 0.24631616473197937, "learning_rate": 3.29887434881737e-06, "loss": 0.4255, "step": 990 }, { "epoch": 1.9482306684141546, "grad_norm": 0.22750426828861237, "learning_rate": 3.288105017517405e-06, "loss": 0.4252, "step": 991 }, { "epoch": 1.9501965923984272, "grad_norm": 0.22862617671489716, "learning_rate": 3.277344675018417e-06, "loss": 0.4496, "step": 992 }, { "epoch": 1.9521625163826999, "grad_norm": 0.21800747513771057, "learning_rate": 3.2665933778207082e-06, "loss": 0.4136, "step": 993 }, { "epoch": 1.9541284403669725, "grad_norm": 0.2186320722103119, "learning_rate": 3.255851182377077e-06, "loss": 0.4328, "step": 994 }, { "epoch": 1.956094364351245, "grad_norm": 0.23287145793437958, "learning_rate": 3.2451181450925406e-06, "loss": 0.4273, "step": 995 }, { "epoch": 1.9580602883355178, "grad_norm": 0.2228022962808609, "learning_rate": 3.234394322324019e-06, "loss": 0.4402, "step": 996 }, { "epoch": 1.9600262123197902, "grad_norm": 0.2278522402048111, "learning_rate": 3.223679770380057e-06, "loss": 0.4347, "step": 997 }, { "epoch": 1.9619921363040629, "grad_norm": 0.21856601536273956, "learning_rate": 3.2129745455205135e-06, "loss": 0.4267, "step": 998 }, { "epoch": 1.9639580602883355, "grad_norm": 0.21911118924617767, "learning_rate": 3.2022787039562745e-06, "loss": 0.4325, "step": 999 }, { "epoch": 1.9659239842726082, "grad_norm": 0.2267688512802124, "learning_rate": 3.191592301848961e-06, "loss": 0.4536, "step": 1000 }, { "epoch": 1.9678899082568808, "grad_norm": 0.21859389543533325, "learning_rate": 3.180915395310623e-06, "loss": 0.4686, "step": 1001 }, { "epoch": 1.9698558322411532, "grad_norm": 0.22693945467472076, "learning_rate": 3.170248040403457e-06, "loss": 0.426, "step": 1002 }, { "epoch": 1.971821756225426, "grad_norm": 0.23281870782375336, "learning_rate": 3.1595902931394983e-06, "loss": 0.4416, "step": 1003 }, { "epoch": 1.9737876802096985, "grad_norm": 0.22778624296188354, "learning_rate": 3.1489422094803458e-06, "loss": 0.443, "step": 1004 }, { "epoch": 1.9757536041939712, "grad_norm": 0.214397594332695, "learning_rate": 3.138303845336844e-06, "loss": 0.4338, "step": 1005 }, { "epoch": 1.9777195281782438, "grad_norm": 0.2683956027030945, "learning_rate": 3.127675256568813e-06, "loss": 0.4484, "step": 1006 }, { "epoch": 1.9796854521625162, "grad_norm": 0.23611973226070404, "learning_rate": 3.1170564989847374e-06, "loss": 0.4338, "step": 1007 }, { "epoch": 1.981651376146789, "grad_norm": 0.22654104232788086, "learning_rate": 3.1064476283414818e-06, "loss": 0.4365, "step": 1008 }, { "epoch": 1.9836173001310615, "grad_norm": 0.2249293178319931, "learning_rate": 3.095848700344001e-06, "loss": 0.426, "step": 1009 }, { "epoch": 1.9855832241153342, "grad_norm": 0.2095581293106079, "learning_rate": 3.085259770645036e-06, "loss": 0.4408, "step": 1010 }, { "epoch": 1.9875491480996068, "grad_norm": 0.2307659089565277, "learning_rate": 3.074680894844837e-06, "loss": 0.4314, "step": 1011 }, { "epoch": 1.9895150720838795, "grad_norm": 0.2108234465122223, "learning_rate": 3.064112128490853e-06, "loss": 0.4483, "step": 1012 }, { "epoch": 1.991480996068152, "grad_norm": 0.2225804477930069, "learning_rate": 3.0535535270774624e-06, "loss": 0.4384, "step": 1013 }, { "epoch": 1.9934469200524245, "grad_norm": 0.20851469039916992, "learning_rate": 3.04300514604566e-06, "loss": 0.427, "step": 1014 }, { "epoch": 1.9954128440366974, "grad_norm": 0.24080389738082886, "learning_rate": 3.0324670407827788e-06, "loss": 0.4377, "step": 1015 }, { "epoch": 1.9973787680209698, "grad_norm": 0.22984230518341064, "learning_rate": 3.0219392666221975e-06, "loss": 0.441, "step": 1016 }, { "epoch": 1.9993446920052425, "grad_norm": 0.25892481207847595, "learning_rate": 3.011421878843044e-06, "loss": 0.5071, "step": 1017 }, { "epoch": 2.001310615989515, "grad_norm": 0.2732914984226227, "learning_rate": 3.000914932669917e-06, "loss": 0.5019, "step": 1018 }, { "epoch": 2.0032765399737875, "grad_norm": 0.22422873973846436, "learning_rate": 2.990418483272579e-06, "loss": 0.4274, "step": 1019 }, { "epoch": 2.0052424639580604, "grad_norm": 0.2819391191005707, "learning_rate": 2.9799325857656856e-06, "loss": 0.4444, "step": 1020 }, { "epoch": 2.007208387942333, "grad_norm": 0.23747709393501282, "learning_rate": 2.96945729520848e-06, "loss": 0.4291, "step": 1021 }, { "epoch": 2.0091743119266057, "grad_norm": 0.21126174926757812, "learning_rate": 2.958992666604511e-06, "loss": 0.4128, "step": 1022 }, { "epoch": 2.011140235910878, "grad_norm": 0.2312362641096115, "learning_rate": 2.948538754901349e-06, "loss": 0.4242, "step": 1023 }, { "epoch": 2.0131061598951505, "grad_norm": 0.25565868616104126, "learning_rate": 2.938095614990285e-06, "loss": 0.4331, "step": 1024 }, { "epoch": 2.0150720838794234, "grad_norm": 0.23785267770290375, "learning_rate": 2.9276633017060563e-06, "loss": 0.4268, "step": 1025 }, { "epoch": 2.017038007863696, "grad_norm": 0.23399539291858673, "learning_rate": 2.917241869826545e-06, "loss": 0.4207, "step": 1026 }, { "epoch": 2.0190039318479687, "grad_norm": 0.23603691160678864, "learning_rate": 2.906831374072504e-06, "loss": 0.4207, "step": 1027 }, { "epoch": 2.020969855832241, "grad_norm": 0.23575784265995026, "learning_rate": 2.89643186910726e-06, "loss": 0.416, "step": 1028 }, { "epoch": 2.022935779816514, "grad_norm": 0.2484109103679657, "learning_rate": 2.8860434095364266e-06, "loss": 0.4299, "step": 1029 }, { "epoch": 2.0249017038007864, "grad_norm": 0.22861795127391815, "learning_rate": 2.875666049907625e-06, "loss": 0.4118, "step": 1030 }, { "epoch": 2.026867627785059, "grad_norm": 0.23161958158016205, "learning_rate": 2.86529984471019e-06, "loss": 0.4158, "step": 1031 }, { "epoch": 2.0288335517693317, "grad_norm": 0.22446328401565552, "learning_rate": 2.8549448483748888e-06, "loss": 0.4041, "step": 1032 }, { "epoch": 2.030799475753604, "grad_norm": 0.27122098207473755, "learning_rate": 2.8446011152736295e-06, "loss": 0.4351, "step": 1033 }, { "epoch": 2.032765399737877, "grad_norm": 0.23165744543075562, "learning_rate": 2.834268699719187e-06, "loss": 0.4329, "step": 1034 }, { "epoch": 2.0347313237221494, "grad_norm": 0.23846569657325745, "learning_rate": 2.8239476559649013e-06, "loss": 0.4179, "step": 1035 }, { "epoch": 2.036697247706422, "grad_norm": 0.2745479941368103, "learning_rate": 2.8136380382044036e-06, "loss": 0.4211, "step": 1036 }, { "epoch": 2.0386631716906947, "grad_norm": 0.2304745316505432, "learning_rate": 2.803339900571337e-06, "loss": 0.4348, "step": 1037 }, { "epoch": 2.040629095674967, "grad_norm": 0.22313368320465088, "learning_rate": 2.7930532971390543e-06, "loss": 0.4273, "step": 1038 }, { "epoch": 2.04259501965924, "grad_norm": 0.2543697953224182, "learning_rate": 2.7827782819203497e-06, "loss": 0.4419, "step": 1039 }, { "epoch": 2.0445609436435124, "grad_norm": 0.2175356149673462, "learning_rate": 2.7725149088671733e-06, "loss": 0.4189, "step": 1040 }, { "epoch": 2.0465268676277852, "grad_norm": 0.24138113856315613, "learning_rate": 2.762263231870339e-06, "loss": 0.4346, "step": 1041 }, { "epoch": 2.0484927916120577, "grad_norm": 0.23216262459754944, "learning_rate": 2.752023304759248e-06, "loss": 0.4362, "step": 1042 }, { "epoch": 2.05045871559633, "grad_norm": 0.21769055724143982, "learning_rate": 2.7417951813016097e-06, "loss": 0.4262, "step": 1043 }, { "epoch": 2.052424639580603, "grad_norm": 0.23920607566833496, "learning_rate": 2.7315789152031504e-06, "loss": 0.4207, "step": 1044 }, { "epoch": 2.0543905635648754, "grad_norm": 0.24695385992527008, "learning_rate": 2.721374560107336e-06, "loss": 0.4227, "step": 1045 }, { "epoch": 2.0563564875491482, "grad_norm": 0.23072242736816406, "learning_rate": 2.7111821695950957e-06, "loss": 0.4289, "step": 1046 }, { "epoch": 2.0583224115334207, "grad_norm": 0.22974249720573425, "learning_rate": 2.7010017971845267e-06, "loss": 0.429, "step": 1047 }, { "epoch": 2.0602883355176935, "grad_norm": 0.2350340336561203, "learning_rate": 2.6908334963306305e-06, "loss": 0.4434, "step": 1048 }, { "epoch": 2.062254259501966, "grad_norm": 0.25064942240715027, "learning_rate": 2.6806773204250148e-06, "loss": 0.4253, "step": 1049 }, { "epoch": 2.0642201834862384, "grad_norm": 0.23788711428642273, "learning_rate": 2.6705333227956304e-06, "loss": 0.4119, "step": 1050 }, { "epoch": 2.0661861074705112, "grad_norm": 0.2238015979528427, "learning_rate": 2.6604015567064756e-06, "loss": 0.4324, "step": 1051 }, { "epoch": 2.0681520314547837, "grad_norm": 0.21394391357898712, "learning_rate": 2.650282075357325e-06, "loss": 0.4102, "step": 1052 }, { "epoch": 2.0701179554390565, "grad_norm": 0.23077036440372467, "learning_rate": 2.6401749318834528e-06, "loss": 0.4204, "step": 1053 }, { "epoch": 2.072083879423329, "grad_norm": 0.23175548017024994, "learning_rate": 2.6300801793553433e-06, "loss": 0.462, "step": 1054 }, { "epoch": 2.0740498034076014, "grad_norm": 0.22535480558872223, "learning_rate": 2.619997870778424e-06, "loss": 0.4178, "step": 1055 }, { "epoch": 2.0760157273918742, "grad_norm": 0.2462986558675766, "learning_rate": 2.609928059092779e-06, "loss": 0.4117, "step": 1056 }, { "epoch": 2.0779816513761467, "grad_norm": 0.26453328132629395, "learning_rate": 2.599870797172872e-06, "loss": 0.4261, "step": 1057 }, { "epoch": 2.0799475753604195, "grad_norm": 0.2081374228000641, "learning_rate": 2.589826137827277e-06, "loss": 0.4206, "step": 1058 }, { "epoch": 2.081913499344692, "grad_norm": 0.27211225032806396, "learning_rate": 2.579794133798388e-06, "loss": 0.4513, "step": 1059 }, { "epoch": 2.083879423328965, "grad_norm": 0.24931125342845917, "learning_rate": 2.5697748377621505e-06, "loss": 0.4104, "step": 1060 }, { "epoch": 2.0858453473132372, "grad_norm": 0.23902453482151031, "learning_rate": 2.559768302327783e-06, "loss": 0.4114, "step": 1061 }, { "epoch": 2.0878112712975097, "grad_norm": 0.20612815022468567, "learning_rate": 2.549774580037504e-06, "loss": 0.4109, "step": 1062 }, { "epoch": 2.0897771952817825, "grad_norm": 0.2153496891260147, "learning_rate": 2.539793723366247e-06, "loss": 0.3957, "step": 1063 }, { "epoch": 2.091743119266055, "grad_norm": 0.24023601412773132, "learning_rate": 2.529825784721397e-06, "loss": 0.4136, "step": 1064 }, { "epoch": 2.093709043250328, "grad_norm": 0.25313615798950195, "learning_rate": 2.5198708164425046e-06, "loss": 0.4243, "step": 1065 }, { "epoch": 2.0956749672346002, "grad_norm": 0.22436189651489258, "learning_rate": 2.509928870801015e-06, "loss": 0.4136, "step": 1066 }, { "epoch": 2.097640891218873, "grad_norm": 0.22276759147644043, "learning_rate": 2.5000000000000015e-06, "loss": 0.4469, "step": 1067 }, { "epoch": 2.0996068152031455, "grad_norm": 0.22407156229019165, "learning_rate": 2.4900842561738736e-06, "loss": 0.4213, "step": 1068 }, { "epoch": 2.101572739187418, "grad_norm": 0.244294673204422, "learning_rate": 2.4801816913881242e-06, "loss": 0.4247, "step": 1069 }, { "epoch": 2.103538663171691, "grad_norm": 0.2237965315580368, "learning_rate": 2.4702923576390377e-06, "loss": 0.4201, "step": 1070 }, { "epoch": 2.1055045871559632, "grad_norm": 0.22248247265815735, "learning_rate": 2.4604163068534313e-06, "loss": 0.437, "step": 1071 }, { "epoch": 2.107470511140236, "grad_norm": 0.21586407721042633, "learning_rate": 2.4505535908883714e-06, "loss": 0.423, "step": 1072 }, { "epoch": 2.1094364351245085, "grad_norm": 0.22616347670555115, "learning_rate": 2.4407042615309066e-06, "loss": 0.4139, "step": 1073 }, { "epoch": 2.111402359108781, "grad_norm": 0.23774725198745728, "learning_rate": 2.4308683704978e-06, "loss": 0.4141, "step": 1074 }, { "epoch": 2.113368283093054, "grad_norm": 0.22781816124916077, "learning_rate": 2.421045969435246e-06, "loss": 0.4261, "step": 1075 }, { "epoch": 2.1153342070773262, "grad_norm": 0.21420340240001678, "learning_rate": 2.411237109918613e-06, "loss": 0.4065, "step": 1076 }, { "epoch": 2.117300131061599, "grad_norm": 0.21004629135131836, "learning_rate": 2.401441843452159e-06, "loss": 0.433, "step": 1077 }, { "epoch": 2.1192660550458715, "grad_norm": 0.2432694286108017, "learning_rate": 2.391660221468773e-06, "loss": 0.4172, "step": 1078 }, { "epoch": 2.1212319790301444, "grad_norm": 0.23806829750537872, "learning_rate": 2.3818922953296937e-06, "loss": 0.444, "step": 1079 }, { "epoch": 2.123197903014417, "grad_norm": 0.21620041131973267, "learning_rate": 2.372138116324254e-06, "loss": 0.43, "step": 1080 }, { "epoch": 2.125163826998689, "grad_norm": 0.21120987832546234, "learning_rate": 2.3623977356695977e-06, "loss": 0.4167, "step": 1081 }, { "epoch": 2.127129750982962, "grad_norm": 0.2339550107717514, "learning_rate": 2.352671204510415e-06, "loss": 0.4321, "step": 1082 }, { "epoch": 2.1290956749672345, "grad_norm": 0.22796887159347534, "learning_rate": 2.342958573918682e-06, "loss": 0.4121, "step": 1083 }, { "epoch": 2.1310615989515074, "grad_norm": 0.2140340507030487, "learning_rate": 2.333259894893378e-06, "loss": 0.4411, "step": 1084 }, { "epoch": 2.13302752293578, "grad_norm": 0.2166963368654251, "learning_rate": 2.323575218360234e-06, "loss": 0.4089, "step": 1085 }, { "epoch": 2.134993446920052, "grad_norm": 0.21713577210903168, "learning_rate": 2.3139045951714473e-06, "loss": 0.4237, "step": 1086 }, { "epoch": 2.136959370904325, "grad_norm": 0.21196705102920532, "learning_rate": 2.304248076105434e-06, "loss": 0.4223, "step": 1087 }, { "epoch": 2.1389252948885975, "grad_norm": 0.21942131221294403, "learning_rate": 2.2946057118665436e-06, "loss": 0.4268, "step": 1088 }, { "epoch": 2.1408912188728704, "grad_norm": 0.21689389646053314, "learning_rate": 2.2849775530848057e-06, "loss": 0.4242, "step": 1089 }, { "epoch": 2.142857142857143, "grad_norm": 0.2458450198173523, "learning_rate": 2.2753636503156624e-06, "loss": 0.418, "step": 1090 }, { "epoch": 2.1448230668414157, "grad_norm": 0.21314334869384766, "learning_rate": 2.2657640540396935e-06, "loss": 0.4093, "step": 1091 }, { "epoch": 2.146788990825688, "grad_norm": 0.20487774908542633, "learning_rate": 2.256178814662368e-06, "loss": 0.4275, "step": 1092 }, { "epoch": 2.1487549148099605, "grad_norm": 0.21627502143383026, "learning_rate": 2.2466079825137595e-06, "loss": 0.4207, "step": 1093 }, { "epoch": 2.1507208387942334, "grad_norm": 0.21706241369247437, "learning_rate": 2.2370516078483014e-06, "loss": 0.4117, "step": 1094 }, { "epoch": 2.152686762778506, "grad_norm": 0.22046822309494019, "learning_rate": 2.227509740844508e-06, "loss": 0.428, "step": 1095 }, { "epoch": 2.1546526867627787, "grad_norm": 0.21805192530155182, "learning_rate": 2.217982431604719e-06, "loss": 0.429, "step": 1096 }, { "epoch": 2.156618610747051, "grad_norm": 0.21168456971645355, "learning_rate": 2.2084697301548336e-06, "loss": 0.419, "step": 1097 }, { "epoch": 2.1585845347313235, "grad_norm": 0.22348026931285858, "learning_rate": 2.198971686444047e-06, "loss": 0.418, "step": 1098 }, { "epoch": 2.1605504587155964, "grad_norm": 0.23373623192310333, "learning_rate": 2.189488350344596e-06, "loss": 0.4159, "step": 1099 }, { "epoch": 2.162516382699869, "grad_norm": 0.21262992918491364, "learning_rate": 2.1800197716514824e-06, "loss": 0.4343, "step": 1100 }, { "epoch": 2.1644823066841417, "grad_norm": 0.21812313795089722, "learning_rate": 2.1705660000822286e-06, "loss": 0.421, "step": 1101 }, { "epoch": 2.166448230668414, "grad_norm": 0.2137649804353714, "learning_rate": 2.1611270852766e-06, "loss": 0.4233, "step": 1102 }, { "epoch": 2.168414154652687, "grad_norm": 0.219873309135437, "learning_rate": 2.151703076796356e-06, "loss": 0.4232, "step": 1103 }, { "epoch": 2.1703800786369594, "grad_norm": 0.20803530514240265, "learning_rate": 2.1422940241249875e-06, "loss": 0.4146, "step": 1104 }, { "epoch": 2.172346002621232, "grad_norm": 0.2098754644393921, "learning_rate": 2.1328999766674504e-06, "loss": 0.4335, "step": 1105 }, { "epoch": 2.1743119266055047, "grad_norm": 0.20901520550251007, "learning_rate": 2.1235209837499186e-06, "loss": 0.4263, "step": 1106 }, { "epoch": 2.176277850589777, "grad_norm": 0.2198159247636795, "learning_rate": 2.1141570946195106e-06, "loss": 0.4132, "step": 1107 }, { "epoch": 2.17824377457405, "grad_norm": 0.21341699361801147, "learning_rate": 2.104808358444044e-06, "loss": 0.4188, "step": 1108 }, { "epoch": 2.1802096985583224, "grad_norm": 0.20872806012630463, "learning_rate": 2.095474824311769e-06, "loss": 0.4293, "step": 1109 }, { "epoch": 2.1821756225425952, "grad_norm": 0.21234822273254395, "learning_rate": 2.086156541231109e-06, "loss": 0.4352, "step": 1110 }, { "epoch": 2.1841415465268676, "grad_norm": 0.2104458063840866, "learning_rate": 2.0768535581304154e-06, "loss": 0.4454, "step": 1111 }, { "epoch": 2.18610747051114, "grad_norm": 0.2399250715970993, "learning_rate": 2.0675659238576955e-06, "loss": 0.4357, "step": 1112 }, { "epoch": 2.188073394495413, "grad_norm": 0.21785323321819305, "learning_rate": 2.0582936871803692e-06, "loss": 0.4272, "step": 1113 }, { "epoch": 2.1900393184796854, "grad_norm": 0.19603775441646576, "learning_rate": 2.049036896785002e-06, "loss": 0.3989, "step": 1114 }, { "epoch": 2.1920052424639582, "grad_norm": 0.20027326047420502, "learning_rate": 2.0397956012770555e-06, "loss": 0.4016, "step": 1115 }, { "epoch": 2.1939711664482306, "grad_norm": 0.19911032915115356, "learning_rate": 2.0305698491806297e-06, "loss": 0.4156, "step": 1116 }, { "epoch": 2.195937090432503, "grad_norm": 0.21624262630939484, "learning_rate": 2.0213596889382153e-06, "loss": 0.422, "step": 1117 }, { "epoch": 2.197903014416776, "grad_norm": 0.21816721558570862, "learning_rate": 2.0121651689104264e-06, "loss": 0.4191, "step": 1118 }, { "epoch": 2.1998689384010484, "grad_norm": 0.20420682430267334, "learning_rate": 2.0029863373757553e-06, "loss": 0.4315, "step": 1119 }, { "epoch": 2.2018348623853212, "grad_norm": 0.20504304766654968, "learning_rate": 1.9938232425303215e-06, "loss": 0.43, "step": 1120 }, { "epoch": 2.2038007863695936, "grad_norm": 0.19833733141422272, "learning_rate": 1.984675932487608e-06, "loss": 0.4155, "step": 1121 }, { "epoch": 2.2057667103538665, "grad_norm": 0.21118539571762085, "learning_rate": 1.9755444552782228e-06, "loss": 0.4167, "step": 1122 }, { "epoch": 2.207732634338139, "grad_norm": 0.1972157508134842, "learning_rate": 1.96642885884963e-06, "loss": 0.4247, "step": 1123 }, { "epoch": 2.2096985583224114, "grad_norm": 0.2121991664171219, "learning_rate": 1.957329191065916e-06, "loss": 0.4154, "step": 1124 }, { "epoch": 2.211664482306684, "grad_norm": 0.2141561657190323, "learning_rate": 1.948245499707523e-06, "loss": 0.4172, "step": 1125 }, { "epoch": 2.2136304062909566, "grad_norm": 0.20581470429897308, "learning_rate": 1.939177832471004e-06, "loss": 0.4043, "step": 1126 }, { "epoch": 2.2155963302752295, "grad_norm": 0.21256645023822784, "learning_rate": 1.9301262369687794e-06, "loss": 0.4369, "step": 1127 }, { "epoch": 2.217562254259502, "grad_norm": 0.224394753575325, "learning_rate": 1.9210907607288728e-06, "loss": 0.415, "step": 1128 }, { "epoch": 2.219528178243775, "grad_norm": 0.22776895761489868, "learning_rate": 1.9120714511946746e-06, "loss": 0.4314, "step": 1129 }, { "epoch": 2.221494102228047, "grad_norm": 0.19702021777629852, "learning_rate": 1.9030683557246815e-06, "loss": 0.4223, "step": 1130 }, { "epoch": 2.2234600262123196, "grad_norm": 0.20599868893623352, "learning_rate": 1.8940815215922609e-06, "loss": 0.4204, "step": 1131 }, { "epoch": 2.2254259501965925, "grad_norm": 0.21677346527576447, "learning_rate": 1.8851109959853885e-06, "loss": 0.4087, "step": 1132 }, { "epoch": 2.227391874180865, "grad_norm": 0.20800592005252838, "learning_rate": 1.8761568260064085e-06, "loss": 0.4462, "step": 1133 }, { "epoch": 2.229357798165138, "grad_norm": 0.2114061564207077, "learning_rate": 1.867219058671791e-06, "loss": 0.4209, "step": 1134 }, { "epoch": 2.23132372214941, "grad_norm": 0.1985040009021759, "learning_rate": 1.858297740911872e-06, "loss": 0.4145, "step": 1135 }, { "epoch": 2.2332896461336826, "grad_norm": 0.19175900518894196, "learning_rate": 1.8493929195706178e-06, "loss": 0.4304, "step": 1136 }, { "epoch": 2.2352555701179555, "grad_norm": 0.20461994409561157, "learning_rate": 1.8405046414053728e-06, "loss": 0.4237, "step": 1137 }, { "epoch": 2.237221494102228, "grad_norm": 0.19885732233524323, "learning_rate": 1.8316329530866228e-06, "loss": 0.4195, "step": 1138 }, { "epoch": 2.239187418086501, "grad_norm": 0.20640921592712402, "learning_rate": 1.822777901197738e-06, "loss": 0.4476, "step": 1139 }, { "epoch": 2.241153342070773, "grad_norm": 0.22110776603221893, "learning_rate": 1.8139395322347335e-06, "loss": 0.4341, "step": 1140 }, { "epoch": 2.243119266055046, "grad_norm": 0.19489331543445587, "learning_rate": 1.805117892606032e-06, "loss": 0.4084, "step": 1141 }, { "epoch": 2.2450851900393185, "grad_norm": 0.20213156938552856, "learning_rate": 1.7963130286322066e-06, "loss": 0.4177, "step": 1142 }, { "epoch": 2.247051114023591, "grad_norm": 0.2150600552558899, "learning_rate": 1.787524986545753e-06, "loss": 0.416, "step": 1143 }, { "epoch": 2.249017038007864, "grad_norm": 0.1949455887079239, "learning_rate": 1.7787538124908293e-06, "loss": 0.4329, "step": 1144 }, { "epoch": 2.250982961992136, "grad_norm": 0.2082454264163971, "learning_rate": 1.769999552523033e-06, "loss": 0.4101, "step": 1145 }, { "epoch": 2.252948885976409, "grad_norm": 0.20170274376869202, "learning_rate": 1.7612622526091406e-06, "loss": 0.4226, "step": 1146 }, { "epoch": 2.2549148099606815, "grad_norm": 0.21375320851802826, "learning_rate": 1.7525419586268816e-06, "loss": 0.421, "step": 1147 }, { "epoch": 2.2568807339449544, "grad_norm": 0.23509110510349274, "learning_rate": 1.7438387163646868e-06, "loss": 0.4237, "step": 1148 }, { "epoch": 2.258846657929227, "grad_norm": 0.20422513782978058, "learning_rate": 1.7351525715214512e-06, "loss": 0.4149, "step": 1149 }, { "epoch": 2.260812581913499, "grad_norm": 0.2060721516609192, "learning_rate": 1.7264835697063009e-06, "loss": 0.4165, "step": 1150 }, { "epoch": 2.262778505897772, "grad_norm": 0.21105261147022247, "learning_rate": 1.7178317564383396e-06, "loss": 0.4315, "step": 1151 }, { "epoch": 2.2647444298820445, "grad_norm": 0.20209087431430817, "learning_rate": 1.709197177146425e-06, "loss": 0.3992, "step": 1152 }, { "epoch": 2.2667103538663174, "grad_norm": 0.22701884806156158, "learning_rate": 1.700579877168918e-06, "loss": 0.422, "step": 1153 }, { "epoch": 2.26867627785059, "grad_norm": 0.21007543802261353, "learning_rate": 1.6919799017534505e-06, "loss": 0.4172, "step": 1154 }, { "epoch": 2.270642201834862, "grad_norm": 0.20358331501483917, "learning_rate": 1.6833972960566868e-06, "loss": 0.428, "step": 1155 }, { "epoch": 2.272608125819135, "grad_norm": 0.20329374074935913, "learning_rate": 1.6748321051440853e-06, "loss": 0.4287, "step": 1156 }, { "epoch": 2.2745740498034075, "grad_norm": 0.2196989506483078, "learning_rate": 1.6662843739896678e-06, "loss": 0.42, "step": 1157 }, { "epoch": 2.2765399737876804, "grad_norm": 0.22448624670505524, "learning_rate": 1.6577541474757712e-06, "loss": 0.4254, "step": 1158 }, { "epoch": 2.278505897771953, "grad_norm": 0.19686675071716309, "learning_rate": 1.6492414703928277e-06, "loss": 0.4204, "step": 1159 }, { "epoch": 2.280471821756225, "grad_norm": 0.20652048289775848, "learning_rate": 1.640746387439112e-06, "loss": 0.4214, "step": 1160 }, { "epoch": 2.282437745740498, "grad_norm": 0.21331384778022766, "learning_rate": 1.6322689432205252e-06, "loss": 0.4178, "step": 1161 }, { "epoch": 2.2844036697247705, "grad_norm": 0.2033684104681015, "learning_rate": 1.6238091822503426e-06, "loss": 0.4195, "step": 1162 }, { "epoch": 2.2863695937090434, "grad_norm": 0.20949597656726837, "learning_rate": 1.6153671489489925e-06, "loss": 0.4339, "step": 1163 }, { "epoch": 2.288335517693316, "grad_norm": 0.20488624274730682, "learning_rate": 1.6069428876438203e-06, "loss": 0.4191, "step": 1164 }, { "epoch": 2.2903014416775886, "grad_norm": 0.19698691368103027, "learning_rate": 1.5985364425688505e-06, "loss": 0.4094, "step": 1165 }, { "epoch": 2.292267365661861, "grad_norm": 0.20300184190273285, "learning_rate": 1.590147857864563e-06, "loss": 0.4183, "step": 1166 }, { "epoch": 2.2942332896461335, "grad_norm": 0.22306329011917114, "learning_rate": 1.5817771775776508e-06, "loss": 0.3995, "step": 1167 }, { "epoch": 2.2961992136304064, "grad_norm": 0.20931847393512726, "learning_rate": 1.5734244456608023e-06, "loss": 0.4185, "step": 1168 }, { "epoch": 2.2981651376146788, "grad_norm": 0.20768919587135315, "learning_rate": 1.5650897059724545e-06, "loss": 0.4257, "step": 1169 }, { "epoch": 2.3001310615989516, "grad_norm": 0.23441515862941742, "learning_rate": 1.5567730022765753e-06, "loss": 0.4214, "step": 1170 }, { "epoch": 2.302096985583224, "grad_norm": 0.20758725702762604, "learning_rate": 1.5484743782424317e-06, "loss": 0.421, "step": 1171 }, { "epoch": 2.304062909567497, "grad_norm": 0.20621424913406372, "learning_rate": 1.540193877444353e-06, "loss": 0.4153, "step": 1172 }, { "epoch": 2.3060288335517694, "grad_norm": 0.20830129086971283, "learning_rate": 1.5319315433615101e-06, "loss": 0.4295, "step": 1173 }, { "epoch": 2.3079947575360418, "grad_norm": 0.2129228115081787, "learning_rate": 1.5236874193776824e-06, "loss": 0.4108, "step": 1174 }, { "epoch": 2.3099606815203146, "grad_norm": 0.22020192444324493, "learning_rate": 1.515461548781036e-06, "loss": 0.4269, "step": 1175 }, { "epoch": 2.311926605504587, "grad_norm": 0.21290135383605957, "learning_rate": 1.5072539747638887e-06, "loss": 0.4373, "step": 1176 }, { "epoch": 2.31389252948886, "grad_norm": 0.2099660485982895, "learning_rate": 1.4990647404224856e-06, "loss": 0.4349, "step": 1177 }, { "epoch": 2.3158584534731324, "grad_norm": 0.19619333744049072, "learning_rate": 1.4908938887567797e-06, "loss": 0.3947, "step": 1178 }, { "epoch": 2.3178243774574048, "grad_norm": 0.21211563050746918, "learning_rate": 1.482741462670193e-06, "loss": 0.4304, "step": 1179 }, { "epoch": 2.3197903014416776, "grad_norm": 0.2202829271554947, "learning_rate": 1.4746075049694065e-06, "loss": 0.4376, "step": 1180 }, { "epoch": 2.32175622542595, "grad_norm": 0.20805491507053375, "learning_rate": 1.4664920583641196e-06, "loss": 0.4045, "step": 1181 }, { "epoch": 2.323722149410223, "grad_norm": 0.20760352909564972, "learning_rate": 1.4583951654668416e-06, "loss": 0.4068, "step": 1182 }, { "epoch": 2.3256880733944953, "grad_norm": 0.20598104596138, "learning_rate": 1.4503168687926533e-06, "loss": 0.4134, "step": 1183 }, { "epoch": 2.327653997378768, "grad_norm": 0.19814953207969666, "learning_rate": 1.4422572107589965e-06, "loss": 0.4028, "step": 1184 }, { "epoch": 2.3296199213630406, "grad_norm": 0.20932331681251526, "learning_rate": 1.434216233685441e-06, "loss": 0.4084, "step": 1185 }, { "epoch": 2.331585845347313, "grad_norm": 0.21522469818592072, "learning_rate": 1.426193979793467e-06, "loss": 0.416, "step": 1186 }, { "epoch": 2.333551769331586, "grad_norm": 0.20631645619869232, "learning_rate": 1.4181904912062482e-06, "loss": 0.4192, "step": 1187 }, { "epoch": 2.3355176933158583, "grad_norm": 0.2081977128982544, "learning_rate": 1.4102058099484188e-06, "loss": 0.4247, "step": 1188 }, { "epoch": 2.337483617300131, "grad_norm": 0.20724564790725708, "learning_rate": 1.4022399779458656e-06, "loss": 0.4513, "step": 1189 }, { "epoch": 2.3394495412844036, "grad_norm": 0.2072247415781021, "learning_rate": 1.3942930370254982e-06, "loss": 0.425, "step": 1190 }, { "epoch": 2.3414154652686765, "grad_norm": 0.22311511635780334, "learning_rate": 1.3863650289150338e-06, "loss": 0.4204, "step": 1191 }, { "epoch": 2.343381389252949, "grad_norm": 0.2175440937280655, "learning_rate": 1.3784559952427761e-06, "loss": 0.4231, "step": 1192 }, { "epoch": 2.3453473132372213, "grad_norm": 0.2023177295923233, "learning_rate": 1.3705659775374036e-06, "loss": 0.4218, "step": 1193 }, { "epoch": 2.347313237221494, "grad_norm": 0.1970416158437729, "learning_rate": 1.3626950172277398e-06, "loss": 0.4111, "step": 1194 }, { "epoch": 2.3492791612057666, "grad_norm": 0.20438626408576965, "learning_rate": 1.3548431556425423e-06, "loss": 0.4123, "step": 1195 }, { "epoch": 2.3512450851900395, "grad_norm": 0.19982948899269104, "learning_rate": 1.34701043401029e-06, "loss": 0.4094, "step": 1196 }, { "epoch": 2.353211009174312, "grad_norm": 0.21173065900802612, "learning_rate": 1.3391968934589573e-06, "loss": 0.4217, "step": 1197 }, { "epoch": 2.3551769331585843, "grad_norm": 0.2196841835975647, "learning_rate": 1.331402575015806e-06, "loss": 0.4232, "step": 1198 }, { "epoch": 2.357142857142857, "grad_norm": 0.2042812556028366, "learning_rate": 1.3236275196071641e-06, "loss": 0.4172, "step": 1199 }, { "epoch": 2.3591087811271296, "grad_norm": 0.21468588709831238, "learning_rate": 1.3158717680582128e-06, "loss": 0.4206, "step": 1200 }, { "epoch": 2.3610747051114025, "grad_norm": 0.2045600861310959, "learning_rate": 1.3081353610927777e-06, "loss": 0.4238, "step": 1201 }, { "epoch": 2.363040629095675, "grad_norm": 0.20586079359054565, "learning_rate": 1.3004183393331038e-06, "loss": 0.4233, "step": 1202 }, { "epoch": 2.3650065530799473, "grad_norm": 0.19963957369327545, "learning_rate": 1.292720743299654e-06, "loss": 0.404, "step": 1203 }, { "epoch": 2.36697247706422, "grad_norm": 0.22821871936321259, "learning_rate": 1.2850426134108856e-06, "loss": 0.4136, "step": 1204 }, { "epoch": 2.3689384010484926, "grad_norm": 0.20745152235031128, "learning_rate": 1.2773839899830487e-06, "loss": 0.4189, "step": 1205 }, { "epoch": 2.3709043250327655, "grad_norm": 0.18824845552444458, "learning_rate": 1.2697449132299649e-06, "loss": 0.4157, "step": 1206 }, { "epoch": 2.372870249017038, "grad_norm": 0.19603678584098816, "learning_rate": 1.2621254232628199e-06, "loss": 0.4237, "step": 1207 }, { "epoch": 2.374836173001311, "grad_norm": 0.2191622406244278, "learning_rate": 1.2545255600899587e-06, "loss": 0.4076, "step": 1208 }, { "epoch": 2.376802096985583, "grad_norm": 0.20817476511001587, "learning_rate": 1.2469453636166645e-06, "loss": 0.4118, "step": 1209 }, { "epoch": 2.378768020969856, "grad_norm": 0.21630269289016724, "learning_rate": 1.2393848736449559e-06, "loss": 0.4236, "step": 1210 }, { "epoch": 2.3807339449541285, "grad_norm": 0.19972224533557892, "learning_rate": 1.2318441298733796e-06, "loss": 0.4086, "step": 1211 }, { "epoch": 2.382699868938401, "grad_norm": 0.1989041119813919, "learning_rate": 1.224323171896797e-06, "loss": 0.4269, "step": 1212 }, { "epoch": 2.3846657929226738, "grad_norm": 0.20862899720668793, "learning_rate": 1.2168220392061775e-06, "loss": 0.4347, "step": 1213 }, { "epoch": 2.386631716906946, "grad_norm": 0.19993065297603607, "learning_rate": 1.2093407711883926e-06, "loss": 0.4258, "step": 1214 }, { "epoch": 2.388597640891219, "grad_norm": 0.20331786572933197, "learning_rate": 1.201879407126012e-06, "loss": 0.4497, "step": 1215 }, { "epoch": 2.3905635648754915, "grad_norm": 0.19923487305641174, "learning_rate": 1.1944379861970884e-06, "loss": 0.4437, "step": 1216 }, { "epoch": 2.392529488859764, "grad_norm": 0.20787520706653595, "learning_rate": 1.187016547474963e-06, "loss": 0.427, "step": 1217 }, { "epoch": 2.3944954128440368, "grad_norm": 0.20175479352474213, "learning_rate": 1.1796151299280483e-06, "loss": 0.4245, "step": 1218 }, { "epoch": 2.396461336828309, "grad_norm": 0.194331094622612, "learning_rate": 1.1722337724196365e-06, "loss": 0.4293, "step": 1219 }, { "epoch": 2.398427260812582, "grad_norm": 0.19052304327487946, "learning_rate": 1.1648725137076822e-06, "loss": 0.4242, "step": 1220 }, { "epoch": 2.4003931847968545, "grad_norm": 0.20737259089946747, "learning_rate": 1.1575313924446123e-06, "loss": 0.4301, "step": 1221 }, { "epoch": 2.402359108781127, "grad_norm": 0.21076960861682892, "learning_rate": 1.1502104471771109e-06, "loss": 0.4299, "step": 1222 }, { "epoch": 2.4043250327653998, "grad_norm": 0.19797001779079437, "learning_rate": 1.1429097163459219e-06, "loss": 0.4172, "step": 1223 }, { "epoch": 2.406290956749672, "grad_norm": 0.197958305478096, "learning_rate": 1.1356292382856531e-06, "loss": 0.4286, "step": 1224 }, { "epoch": 2.408256880733945, "grad_norm": 0.2084892839193344, "learning_rate": 1.1283690512245621e-06, "loss": 0.4317, "step": 1225 }, { "epoch": 2.4102228047182175, "grad_norm": 0.2041456699371338, "learning_rate": 1.1211291932843687e-06, "loss": 0.4218, "step": 1226 }, { "epoch": 2.4121887287024903, "grad_norm": 0.1887100487947464, "learning_rate": 1.113909702480046e-06, "loss": 0.4307, "step": 1227 }, { "epoch": 2.4141546526867628, "grad_norm": 0.21170374751091003, "learning_rate": 1.1067106167196217e-06, "loss": 0.4119, "step": 1228 }, { "epoch": 2.4161205766710356, "grad_norm": 0.2029750794172287, "learning_rate": 1.0995319738039855e-06, "loss": 0.4309, "step": 1229 }, { "epoch": 2.418086500655308, "grad_norm": 0.21280771493911743, "learning_rate": 1.0923738114266824e-06, "loss": 0.4182, "step": 1230 }, { "epoch": 2.4200524246395805, "grad_norm": 0.19209042191505432, "learning_rate": 1.08523616717372e-06, "loss": 0.417, "step": 1231 }, { "epoch": 2.4220183486238533, "grad_norm": 0.19855521619319916, "learning_rate": 1.078119078523367e-06, "loss": 0.4164, "step": 1232 }, { "epoch": 2.4239842726081258, "grad_norm": 0.2216549813747406, "learning_rate": 1.0710225828459642e-06, "loss": 0.422, "step": 1233 }, { "epoch": 2.4259501965923986, "grad_norm": 0.19830770790576935, "learning_rate": 1.0639467174037165e-06, "loss": 0.4192, "step": 1234 }, { "epoch": 2.427916120576671, "grad_norm": 0.20297428965568542, "learning_rate": 1.0568915193505103e-06, "loss": 0.4325, "step": 1235 }, { "epoch": 2.4298820445609435, "grad_norm": 0.2170720249414444, "learning_rate": 1.0498570257317075e-06, "loss": 0.4345, "step": 1236 }, { "epoch": 2.4318479685452163, "grad_norm": 0.20226174592971802, "learning_rate": 1.0428432734839543e-06, "loss": 0.4273, "step": 1237 }, { "epoch": 2.4338138925294888, "grad_norm": 0.19869181513786316, "learning_rate": 1.0358502994349945e-06, "loss": 0.4268, "step": 1238 }, { "epoch": 2.4357798165137616, "grad_norm": 0.20573928952217102, "learning_rate": 1.028878140303462e-06, "loss": 0.4256, "step": 1239 }, { "epoch": 2.437745740498034, "grad_norm": 0.1978793442249298, "learning_rate": 1.0219268326987035e-06, "loss": 0.4215, "step": 1240 }, { "epoch": 2.4397116644823065, "grad_norm": 0.19740813970565796, "learning_rate": 1.0149964131205724e-06, "loss": 0.415, "step": 1241 }, { "epoch": 2.4416775884665793, "grad_norm": 0.19393078982830048, "learning_rate": 1.008086917959249e-06, "loss": 0.4194, "step": 1242 }, { "epoch": 2.4436435124508518, "grad_norm": 0.20559275150299072, "learning_rate": 1.0011983834950389e-06, "loss": 0.4165, "step": 1243 }, { "epoch": 2.4456094364351246, "grad_norm": 0.19457489252090454, "learning_rate": 9.943308458981892e-07, "loss": 0.409, "step": 1244 }, { "epoch": 2.447575360419397, "grad_norm": 0.19760730862617493, "learning_rate": 9.874843412286994e-07, "loss": 0.4249, "step": 1245 }, { "epoch": 2.44954128440367, "grad_norm": 0.19084718823432922, "learning_rate": 9.806589054361255e-07, "loss": 0.4205, "step": 1246 }, { "epoch": 2.4515072083879423, "grad_norm": 0.2036636471748352, "learning_rate": 9.738545743593991e-07, "loss": 0.416, "step": 1247 }, { "epoch": 2.4534731323722148, "grad_norm": 0.1984458714723587, "learning_rate": 9.670713837266322e-07, "loss": 0.4235, "step": 1248 }, { "epoch": 2.4554390563564876, "grad_norm": 0.2098272740840912, "learning_rate": 9.603093691549348e-07, "loss": 0.4344, "step": 1249 }, { "epoch": 2.45740498034076, "grad_norm": 0.206814706325531, "learning_rate": 9.535685661502248e-07, "loss": 0.4232, "step": 1250 }, { "epoch": 2.459370904325033, "grad_norm": 0.2012605369091034, "learning_rate": 9.46849010107041e-07, "loss": 0.4076, "step": 1251 }, { "epoch": 2.4613368283093053, "grad_norm": 0.204511821269989, "learning_rate": 9.401507363083634e-07, "loss": 0.4219, "step": 1252 }, { "epoch": 2.463302752293578, "grad_norm": 0.19177298247814178, "learning_rate": 9.334737799254195e-07, "loss": 0.4103, "step": 1253 }, { "epoch": 2.4652686762778506, "grad_norm": 0.20843657851219177, "learning_rate": 9.26818176017506e-07, "loss": 0.4244, "step": 1254 }, { "epoch": 2.467234600262123, "grad_norm": 0.21537087857723236, "learning_rate": 9.201839595317991e-07, "loss": 0.4357, "step": 1255 }, { "epoch": 2.469200524246396, "grad_norm": 0.19909898936748505, "learning_rate": 9.135711653031781e-07, "loss": 0.4234, "step": 1256 }, { "epoch": 2.4711664482306683, "grad_norm": 0.19548694789409637, "learning_rate": 9.069798280540348e-07, "loss": 0.4144, "step": 1257 }, { "epoch": 2.473132372214941, "grad_norm": 0.2011128067970276, "learning_rate": 9.004099823940982e-07, "loss": 0.4165, "step": 1258 }, { "epoch": 2.4750982961992136, "grad_norm": 0.19573193788528442, "learning_rate": 8.938616628202478e-07, "loss": 0.4129, "step": 1259 }, { "epoch": 2.477064220183486, "grad_norm": 0.19089443981647491, "learning_rate": 8.87334903716332e-07, "loss": 0.4006, "step": 1260 }, { "epoch": 2.479030144167759, "grad_norm": 0.19790777564048767, "learning_rate": 8.808297393529946e-07, "loss": 0.4227, "step": 1261 }, { "epoch": 2.4809960681520313, "grad_norm": 0.21096652746200562, "learning_rate": 8.743462038874856e-07, "loss": 0.4232, "step": 1262 }, { "epoch": 2.482961992136304, "grad_norm": 0.20578713715076447, "learning_rate": 8.678843313634894e-07, "loss": 0.4274, "step": 1263 }, { "epoch": 2.4849279161205766, "grad_norm": 0.20468361675739288, "learning_rate": 8.614441557109388e-07, "loss": 0.4269, "step": 1264 }, { "epoch": 2.486893840104849, "grad_norm": 0.19431470334529877, "learning_rate": 8.550257107458471e-07, "loss": 0.4174, "step": 1265 }, { "epoch": 2.488859764089122, "grad_norm": 0.1942060887813568, "learning_rate": 8.486290301701183e-07, "loss": 0.4107, "step": 1266 }, { "epoch": 2.4908256880733943, "grad_norm": 0.20655293762683868, "learning_rate": 8.422541475713785e-07, "loss": 0.4332, "step": 1267 }, { "epoch": 2.492791612057667, "grad_norm": 0.19286088645458221, "learning_rate": 8.35901096422797e-07, "loss": 0.4246, "step": 1268 }, { "epoch": 2.4947575360419396, "grad_norm": 0.1953059434890747, "learning_rate": 8.295699100829124e-07, "loss": 0.4258, "step": 1269 }, { "epoch": 2.4967234600262125, "grad_norm": 0.21908029913902283, "learning_rate": 8.232606217954536e-07, "loss": 0.4419, "step": 1270 }, { "epoch": 2.498689384010485, "grad_norm": 0.1864355057477951, "learning_rate": 8.169732646891665e-07, "loss": 0.4069, "step": 1271 }, { "epoch": 2.5006553079947578, "grad_norm": 0.19375306367874146, "learning_rate": 8.107078717776457e-07, "loss": 0.4138, "step": 1272 }, { "epoch": 2.50262123197903, "grad_norm": 0.18619294464588165, "learning_rate": 8.044644759591519e-07, "loss": 0.4108, "step": 1273 }, { "epoch": 2.5045871559633026, "grad_norm": 0.20051321387290955, "learning_rate": 7.982431100164439e-07, "loss": 0.4241, "step": 1274 }, { "epoch": 2.5065530799475755, "grad_norm": 0.195013627409935, "learning_rate": 7.920438066166097e-07, "loss": 0.4239, "step": 1275 }, { "epoch": 2.508519003931848, "grad_norm": 0.19501401484012604, "learning_rate": 7.858665983108871e-07, "loss": 0.4446, "step": 1276 }, { "epoch": 2.5104849279161208, "grad_norm": 0.190773144364357, "learning_rate": 7.797115175345021e-07, "loss": 0.4097, "step": 1277 }, { "epoch": 2.512450851900393, "grad_norm": 0.18783928453922272, "learning_rate": 7.735785966064885e-07, "loss": 0.4115, "step": 1278 }, { "epoch": 2.5144167758846656, "grad_norm": 0.1892012506723404, "learning_rate": 7.674678677295277e-07, "loss": 0.4126, "step": 1279 }, { "epoch": 2.5163826998689385, "grad_norm": 0.20406071841716766, "learning_rate": 7.613793629897732e-07, "loss": 0.439, "step": 1280 }, { "epoch": 2.518348623853211, "grad_norm": 0.20593975484371185, "learning_rate": 7.553131143566822e-07, "loss": 0.4392, "step": 1281 }, { "epoch": 2.5203145478374838, "grad_norm": 0.19002759456634521, "learning_rate": 7.492691536828556e-07, "loss": 0.4296, "step": 1282 }, { "epoch": 2.522280471821756, "grad_norm": 0.19039657711982727, "learning_rate": 7.432475127038591e-07, "loss": 0.4028, "step": 1283 }, { "epoch": 2.5242463958060286, "grad_norm": 0.1925535649061203, "learning_rate": 7.372482230380657e-07, "loss": 0.4276, "step": 1284 }, { "epoch": 2.5262123197903015, "grad_norm": 0.19870179891586304, "learning_rate": 7.312713161864854e-07, "loss": 0.4313, "step": 1285 }, { "epoch": 2.528178243774574, "grad_norm": 0.20361679792404175, "learning_rate": 7.253168235325992e-07, "loss": 0.4296, "step": 1286 }, { "epoch": 2.5301441677588468, "grad_norm": 0.19246189296245575, "learning_rate": 7.193847763421991e-07, "loss": 0.4178, "step": 1287 }, { "epoch": 2.532110091743119, "grad_norm": 0.18752935528755188, "learning_rate": 7.134752057632188e-07, "loss": 0.4044, "step": 1288 }, { "epoch": 2.5340760157273916, "grad_norm": 0.1949397772550583, "learning_rate": 7.07588142825571e-07, "loss": 0.4116, "step": 1289 }, { "epoch": 2.5360419397116645, "grad_norm": 0.19233138859272003, "learning_rate": 7.017236184409859e-07, "loss": 0.4207, "step": 1290 }, { "epoch": 2.5380078636959373, "grad_norm": 0.1937728077173233, "learning_rate": 6.95881663402852e-07, "loss": 0.3934, "step": 1291 }, { "epoch": 2.5399737876802098, "grad_norm": 0.19518445432186127, "learning_rate": 6.900623083860453e-07, "loss": 0.4228, "step": 1292 }, { "epoch": 2.541939711664482, "grad_norm": 0.19948603212833405, "learning_rate": 6.842655839467787e-07, "loss": 0.4223, "step": 1293 }, { "epoch": 2.543905635648755, "grad_norm": 0.1915956288576126, "learning_rate": 6.78491520522433e-07, "loss": 0.4169, "step": 1294 }, { "epoch": 2.5458715596330275, "grad_norm": 0.19227120280265808, "learning_rate": 6.72740148431405e-07, "loss": 0.4069, "step": 1295 }, { "epoch": 2.5478374836173003, "grad_norm": 0.21147392690181732, "learning_rate": 6.670114978729392e-07, "loss": 0.4388, "step": 1296 }, { "epoch": 2.5498034076015728, "grad_norm": 0.19551649689674377, "learning_rate": 6.613055989269762e-07, "loss": 0.4349, "step": 1297 }, { "epoch": 2.551769331585845, "grad_norm": 0.1957281529903412, "learning_rate": 6.556224815539946e-07, "loss": 0.4079, "step": 1298 }, { "epoch": 2.553735255570118, "grad_norm": 0.19849686324596405, "learning_rate": 6.499621755948487e-07, "loss": 0.4161, "step": 1299 }, { "epoch": 2.5557011795543905, "grad_norm": 0.19393165409564972, "learning_rate": 6.443247107706174e-07, "loss": 0.4336, "step": 1300 }, { "epoch": 2.5576671035386633, "grad_norm": 0.21561844646930695, "learning_rate": 6.387101166824422e-07, "loss": 0.4394, "step": 1301 }, { "epoch": 2.5596330275229358, "grad_norm": 0.1955285668373108, "learning_rate": 6.331184228113801e-07, "loss": 0.4319, "step": 1302 }, { "epoch": 2.561598951507208, "grad_norm": 0.21019408106803894, "learning_rate": 6.2754965851824e-07, "loss": 0.4322, "step": 1303 }, { "epoch": 2.563564875491481, "grad_norm": 0.19174997508525848, "learning_rate": 6.220038530434319e-07, "loss": 0.4167, "step": 1304 }, { "epoch": 2.5655307994757535, "grad_norm": 0.2004367858171463, "learning_rate": 6.164810355068179e-07, "loss": 0.4347, "step": 1305 }, { "epoch": 2.5674967234600263, "grad_norm": 0.211960107088089, "learning_rate": 6.109812349075517e-07, "loss": 0.4186, "step": 1306 }, { "epoch": 2.5694626474442988, "grad_norm": 0.20421504974365234, "learning_rate": 6.055044801239313e-07, "loss": 0.4085, "step": 1307 }, { "epoch": 2.571428571428571, "grad_norm": 0.19226616621017456, "learning_rate": 6.000507999132444e-07, "loss": 0.4308, "step": 1308 }, { "epoch": 2.573394495412844, "grad_norm": 0.1891256868839264, "learning_rate": 5.946202229116227e-07, "loss": 0.4318, "step": 1309 }, { "epoch": 2.575360419397117, "grad_norm": 0.189310222864151, "learning_rate": 5.892127776338841e-07, "loss": 0.3998, "step": 1310 }, { "epoch": 2.5773263433813893, "grad_norm": 0.19134820997714996, "learning_rate": 5.838284924733866e-07, "loss": 0.4302, "step": 1311 }, { "epoch": 2.5792922673656618, "grad_norm": 0.19616124033927917, "learning_rate": 5.784673957018833e-07, "loss": 0.4245, "step": 1312 }, { "epoch": 2.5812581913499346, "grad_norm": 0.18709275126457214, "learning_rate": 5.731295154693644e-07, "loss": 0.4081, "step": 1313 }, { "epoch": 2.583224115334207, "grad_norm": 0.1914079487323761, "learning_rate": 5.678148798039213e-07, "loss": 0.4051, "step": 1314 }, { "epoch": 2.58519003931848, "grad_norm": 0.19706860184669495, "learning_rate": 5.625235166115855e-07, "loss": 0.4243, "step": 1315 }, { "epoch": 2.5871559633027523, "grad_norm": 0.19415414333343506, "learning_rate": 5.572554536761976e-07, "loss": 0.4348, "step": 1316 }, { "epoch": 2.5891218872870247, "grad_norm": 0.1907108873128891, "learning_rate": 5.520107186592477e-07, "loss": 0.4183, "step": 1317 }, { "epoch": 2.5910878112712976, "grad_norm": 0.20467859506607056, "learning_rate": 5.467893390997369e-07, "loss": 0.4101, "step": 1318 }, { "epoch": 2.59305373525557, "grad_norm": 0.19916951656341553, "learning_rate": 5.41591342414034e-07, "loss": 0.4148, "step": 1319 }, { "epoch": 2.595019659239843, "grad_norm": 0.1953822374343872, "learning_rate": 5.364167558957267e-07, "loss": 0.4188, "step": 1320 }, { "epoch": 2.5969855832241153, "grad_norm": 0.1931547224521637, "learning_rate": 5.312656067154831e-07, "loss": 0.4185, "step": 1321 }, { "epoch": 2.5989515072083877, "grad_norm": 0.19639557600021362, "learning_rate": 5.261379219209045e-07, "loss": 0.4142, "step": 1322 }, { "epoch": 2.6009174311926606, "grad_norm": 0.20015166699886322, "learning_rate": 5.210337284363876e-07, "loss": 0.4174, "step": 1323 }, { "epoch": 2.602883355176933, "grad_norm": 0.20684877038002014, "learning_rate": 5.159530530629803e-07, "loss": 0.4344, "step": 1324 }, { "epoch": 2.604849279161206, "grad_norm": 0.1928563117980957, "learning_rate": 5.108959224782406e-07, "loss": 0.4256, "step": 1325 }, { "epoch": 2.6068152031454783, "grad_norm": 0.19201450049877167, "learning_rate": 5.058623632361004e-07, "loss": 0.4047, "step": 1326 }, { "epoch": 2.6087811271297507, "grad_norm": 0.20671480894088745, "learning_rate": 5.008524017667204e-07, "loss": 0.4457, "step": 1327 }, { "epoch": 2.6107470511140236, "grad_norm": 0.19633173942565918, "learning_rate": 4.958660643763574e-07, "loss": 0.4013, "step": 1328 }, { "epoch": 2.6127129750982965, "grad_norm": 0.2064533680677414, "learning_rate": 4.909033772472204e-07, "loss": 0.4332, "step": 1329 }, { "epoch": 2.614678899082569, "grad_norm": 0.21506528556346893, "learning_rate": 4.859643664373387e-07, "loss": 0.4516, "step": 1330 }, { "epoch": 2.6166448230668413, "grad_norm": 0.19731992483139038, "learning_rate": 4.810490578804195e-07, "loss": 0.4359, "step": 1331 }, { "epoch": 2.618610747051114, "grad_norm": 0.18773064017295837, "learning_rate": 4.7615747738571636e-07, "loss": 0.4152, "step": 1332 }, { "epoch": 2.6205766710353866, "grad_norm": 0.1858416497707367, "learning_rate": 4.7128965063789054e-07, "loss": 0.4246, "step": 1333 }, { "epoch": 2.6225425950196595, "grad_norm": 0.18619361519813538, "learning_rate": 4.664456031968773e-07, "loss": 0.4199, "step": 1334 }, { "epoch": 2.624508519003932, "grad_norm": 0.19577224552631378, "learning_rate": 4.6162536049775387e-07, "loss": 0.4125, "step": 1335 }, { "epoch": 2.6264744429882043, "grad_norm": 0.1945699155330658, "learning_rate": 4.5682894785059995e-07, "loss": 0.4206, "step": 1336 }, { "epoch": 2.628440366972477, "grad_norm": 0.19429698586463928, "learning_rate": 4.520563904403735e-07, "loss": 0.4014, "step": 1337 }, { "epoch": 2.6304062909567496, "grad_norm": 0.2008766531944275, "learning_rate": 4.473077133267684e-07, "loss": 0.426, "step": 1338 }, { "epoch": 2.6323722149410225, "grad_norm": 0.20195744931697845, "learning_rate": 4.42582941444093e-07, "loss": 0.4237, "step": 1339 }, { "epoch": 2.634338138925295, "grad_norm": 0.189142107963562, "learning_rate": 4.378820996011307e-07, "loss": 0.4153, "step": 1340 }, { "epoch": 2.6363040629095673, "grad_norm": 0.18883055448532104, "learning_rate": 4.3320521248101487e-07, "loss": 0.3998, "step": 1341 }, { "epoch": 2.63826998689384, "grad_norm": 0.18498775362968445, "learning_rate": 4.2855230464109775e-07, "loss": 0.4135, "step": 1342 }, { "epoch": 2.6402359108781126, "grad_norm": 0.19491244852542877, "learning_rate": 4.239234005128212e-07, "loss": 0.4074, "step": 1343 }, { "epoch": 2.6422018348623855, "grad_norm": 0.20609928667545319, "learning_rate": 4.193185244015879e-07, "loss": 0.4149, "step": 1344 }, { "epoch": 2.644167758846658, "grad_norm": 0.1882026344537735, "learning_rate": 4.1473770048663487e-07, "loss": 0.416, "step": 1345 }, { "epoch": 2.6461336828309303, "grad_norm": 0.19239036738872528, "learning_rate": 4.1018095282090775e-07, "loss": 0.434, "step": 1346 }, { "epoch": 2.648099606815203, "grad_norm": 0.188425675034523, "learning_rate": 4.0564830533093014e-07, "loss": 0.4144, "step": 1347 }, { "epoch": 2.6500655307994756, "grad_norm": 0.1889129877090454, "learning_rate": 4.011397818166818e-07, "loss": 0.41, "step": 1348 }, { "epoch": 2.6520314547837485, "grad_norm": 0.19384776055812836, "learning_rate": 3.9665540595147376e-07, "loss": 0.4265, "step": 1349 }, { "epoch": 2.653997378768021, "grad_norm": 0.18682250380516052, "learning_rate": 3.9219520128182087e-07, "loss": 0.4165, "step": 1350 }, { "epoch": 2.6559633027522933, "grad_norm": 0.19491790235042572, "learning_rate": 3.877591912273215e-07, "loss": 0.42, "step": 1351 }, { "epoch": 2.657929226736566, "grad_norm": 0.19894134998321533, "learning_rate": 3.8334739908053196e-07, "loss": 0.4155, "step": 1352 }, { "epoch": 2.659895150720839, "grad_norm": 0.18585951626300812, "learning_rate": 3.789598480068479e-07, "loss": 0.4262, "step": 1353 }, { "epoch": 2.6618610747051115, "grad_norm": 0.19301895797252655, "learning_rate": 3.745965610443769e-07, "loss": 0.415, "step": 1354 }, { "epoch": 2.663826998689384, "grad_norm": 0.1924465447664261, "learning_rate": 3.702575611038217e-07, "loss": 0.4182, "step": 1355 }, { "epoch": 2.6657929226736568, "grad_norm": 0.18831956386566162, "learning_rate": 3.659428709683621e-07, "loss": 0.4143, "step": 1356 }, { "epoch": 2.667758846657929, "grad_norm": 0.19500558078289032, "learning_rate": 3.616525132935267e-07, "loss": 0.4196, "step": 1357 }, { "epoch": 2.669724770642202, "grad_norm": 0.1873614639043808, "learning_rate": 3.573865106070851e-07, "loss": 0.4095, "step": 1358 }, { "epoch": 2.6716906946264745, "grad_norm": 0.18997088074684143, "learning_rate": 3.531448853089192e-07, "loss": 0.4059, "step": 1359 }, { "epoch": 2.673656618610747, "grad_norm": 0.19585131108760834, "learning_rate": 3.489276596709146e-07, "loss": 0.4334, "step": 1360 }, { "epoch": 2.6756225425950197, "grad_norm": 0.19634681940078735, "learning_rate": 3.4473485583683576e-07, "loss": 0.4085, "step": 1361 }, { "epoch": 2.677588466579292, "grad_norm": 0.18681412935256958, "learning_rate": 3.40566495822216e-07, "loss": 0.4217, "step": 1362 }, { "epoch": 2.679554390563565, "grad_norm": 0.20143257081508636, "learning_rate": 3.364226015142369e-07, "loss": 0.4248, "step": 1363 }, { "epoch": 2.6815203145478375, "grad_norm": 0.2103082686662674, "learning_rate": 3.323031946716182e-07, "loss": 0.4346, "step": 1364 }, { "epoch": 2.68348623853211, "grad_norm": 0.19275066256523132, "learning_rate": 3.2820829692449984e-07, "loss": 0.4544, "step": 1365 }, { "epoch": 2.6854521625163827, "grad_norm": 0.18487761914730072, "learning_rate": 3.2413792977432856e-07, "loss": 0.3919, "step": 1366 }, { "epoch": 2.687418086500655, "grad_norm": 0.18378794193267822, "learning_rate": 3.2009211459374913e-07, "loss": 0.4259, "step": 1367 }, { "epoch": 2.689384010484928, "grad_norm": 0.18482978641986847, "learning_rate": 3.160708726264855e-07, "loss": 0.4222, "step": 1368 }, { "epoch": 2.6913499344692005, "grad_norm": 0.19463540613651276, "learning_rate": 3.1207422498723663e-07, "loss": 0.4228, "step": 1369 }, { "epoch": 2.693315858453473, "grad_norm": 0.19638504087924957, "learning_rate": 3.081021926615585e-07, "loss": 0.4058, "step": 1370 }, { "epoch": 2.6952817824377457, "grad_norm": 0.19137264788150787, "learning_rate": 3.0415479650575783e-07, "loss": 0.4124, "step": 1371 }, { "epoch": 2.6972477064220186, "grad_norm": 0.1765514612197876, "learning_rate": 3.0023205724678483e-07, "loss": 0.405, "step": 1372 }, { "epoch": 2.699213630406291, "grad_norm": 0.1941332072019577, "learning_rate": 2.9633399548211707e-07, "loss": 0.4081, "step": 1373 }, { "epoch": 2.7011795543905635, "grad_norm": 0.19398586452007294, "learning_rate": 2.9246063167965963e-07, "loss": 0.4279, "step": 1374 }, { "epoch": 2.7031454783748363, "grad_norm": 0.19305099546909332, "learning_rate": 2.8861198617763154e-07, "loss": 0.4139, "step": 1375 }, { "epoch": 2.7051114023591087, "grad_norm": 0.18326827883720398, "learning_rate": 2.8478807918446315e-07, "loss": 0.4204, "step": 1376 }, { "epoch": 2.7070773263433816, "grad_norm": 0.1823471635580063, "learning_rate": 2.809889307786856e-07, "loss": 0.4086, "step": 1377 }, { "epoch": 2.709043250327654, "grad_norm": 0.18850870430469513, "learning_rate": 2.7721456090882893e-07, "loss": 0.4252, "step": 1378 }, { "epoch": 2.7110091743119265, "grad_norm": 0.18475760519504547, "learning_rate": 2.734649893933178e-07, "loss": 0.4186, "step": 1379 }, { "epoch": 2.7129750982961993, "grad_norm": 0.19123856723308563, "learning_rate": 2.697402359203638e-07, "loss": 0.4288, "step": 1380 }, { "epoch": 2.7149410222804717, "grad_norm": 0.1787601113319397, "learning_rate": 2.6604032004786563e-07, "loss": 0.4028, "step": 1381 }, { "epoch": 2.7169069462647446, "grad_norm": 0.1857636570930481, "learning_rate": 2.6236526120330395e-07, "loss": 0.4107, "step": 1382 }, { "epoch": 2.718872870249017, "grad_norm": 0.18619926273822784, "learning_rate": 2.587150786836407e-07, "loss": 0.4234, "step": 1383 }, { "epoch": 2.7208387942332894, "grad_norm": 0.1910834163427353, "learning_rate": 2.550897916552181e-07, "loss": 0.4183, "step": 1384 }, { "epoch": 2.7228047182175623, "grad_norm": 0.18822459876537323, "learning_rate": 2.51489419153656e-07, "loss": 0.3991, "step": 1385 }, { "epoch": 2.7247706422018347, "grad_norm": 0.19424988329410553, "learning_rate": 2.4791398008375545e-07, "loss": 0.4327, "step": 1386 }, { "epoch": 2.7267365661861076, "grad_norm": 0.2632913887500763, "learning_rate": 2.4436349321939447e-07, "loss": 0.4187, "step": 1387 }, { "epoch": 2.72870249017038, "grad_norm": 0.18020807206630707, "learning_rate": 2.408379772034353e-07, "loss": 0.4197, "step": 1388 }, { "epoch": 2.7306684141546524, "grad_norm": 0.185395285487175, "learning_rate": 2.3733745054762059e-07, "loss": 0.4084, "step": 1389 }, { "epoch": 2.7326343381389253, "grad_norm": 0.19016891717910767, "learning_rate": 2.3386193163248193e-07, "loss": 0.4444, "step": 1390 }, { "epoch": 2.734600262123198, "grad_norm": 0.18271036446094513, "learning_rate": 2.3041143870723925e-07, "loss": 0.4086, "step": 1391 }, { "epoch": 2.7365661861074706, "grad_norm": 0.18831001222133636, "learning_rate": 2.2698598988970422e-07, "loss": 0.4097, "step": 1392 }, { "epoch": 2.738532110091743, "grad_norm": 0.18966880440711975, "learning_rate": 2.2358560316619093e-07, "loss": 0.4094, "step": 1393 }, { "epoch": 2.740498034076016, "grad_norm": 0.19208042323589325, "learning_rate": 2.2021029639141435e-07, "loss": 0.4268, "step": 1394 }, { "epoch": 2.7424639580602883, "grad_norm": 0.1806749701499939, "learning_rate": 2.1686008728840301e-07, "loss": 0.4164, "step": 1395 }, { "epoch": 2.744429882044561, "grad_norm": 0.19171354174613953, "learning_rate": 2.135349934483999e-07, "loss": 0.4241, "step": 1396 }, { "epoch": 2.7463958060288336, "grad_norm": 0.1913602203130722, "learning_rate": 2.102350323307756e-07, "loss": 0.4216, "step": 1397 }, { "epoch": 2.748361730013106, "grad_norm": 0.1981024146080017, "learning_rate": 2.0696022126293126e-07, "loss": 0.4092, "step": 1398 }, { "epoch": 2.750327653997379, "grad_norm": 0.18847931921482086, "learning_rate": 2.0371057744021315e-07, "loss": 0.4213, "step": 1399 }, { "epoch": 2.7522935779816513, "grad_norm": 0.1934966892004013, "learning_rate": 2.004861179258183e-07, "loss": 0.4432, "step": 1400 }, { "epoch": 2.754259501965924, "grad_norm": 0.18944783508777618, "learning_rate": 1.9728685965070604e-07, "loss": 0.4146, "step": 1401 }, { "epoch": 2.7562254259501966, "grad_norm": 0.19792866706848145, "learning_rate": 1.9411281941351001e-07, "loss": 0.4145, "step": 1402 }, { "epoch": 2.758191349934469, "grad_norm": 0.1858528107404709, "learning_rate": 1.9096401388044695e-07, "loss": 0.4184, "step": 1403 }, { "epoch": 2.760157273918742, "grad_norm": 0.1945904940366745, "learning_rate": 1.8784045958523623e-07, "loss": 0.4196, "step": 1404 }, { "epoch": 2.7621231979030143, "grad_norm": 0.18836303055286407, "learning_rate": 1.8474217292900275e-07, "loss": 0.4256, "step": 1405 }, { "epoch": 2.764089121887287, "grad_norm": 0.18192465603351593, "learning_rate": 1.816691701802009e-07, "loss": 0.4093, "step": 1406 }, { "epoch": 2.7660550458715596, "grad_norm": 0.19344103336334229, "learning_rate": 1.786214674745218e-07, "loss": 0.4229, "step": 1407 }, { "epoch": 2.768020969855832, "grad_norm": 0.19816488027572632, "learning_rate": 1.7559908081481225e-07, "loss": 0.4329, "step": 1408 }, { "epoch": 2.769986893840105, "grad_norm": 0.1782555729150772, "learning_rate": 1.7260202607098985e-07, "loss": 0.3958, "step": 1409 }, { "epoch": 2.7719528178243773, "grad_norm": 0.19153274595737457, "learning_rate": 1.6963031897995863e-07, "loss": 0.4101, "step": 1410 }, { "epoch": 2.77391874180865, "grad_norm": 0.1951267421245575, "learning_rate": 1.6668397514553013e-07, "loss": 0.4227, "step": 1411 }, { "epoch": 2.7758846657929226, "grad_norm": 0.20096047222614288, "learning_rate": 1.6376301003833583e-07, "loss": 0.4401, "step": 1412 }, { "epoch": 2.777850589777195, "grad_norm": 0.18297040462493896, "learning_rate": 1.6086743899575042e-07, "loss": 0.4181, "step": 1413 }, { "epoch": 2.779816513761468, "grad_norm": 0.18248505890369415, "learning_rate": 1.5799727722180858e-07, "loss": 0.4128, "step": 1414 }, { "epoch": 2.7817824377457407, "grad_norm": 0.1905166059732437, "learning_rate": 1.551525397871273e-07, "loss": 0.4121, "step": 1415 }, { "epoch": 2.783748361730013, "grad_norm": 0.18732059001922607, "learning_rate": 1.523332416288259e-07, "loss": 0.4315, "step": 1416 }, { "epoch": 2.7857142857142856, "grad_norm": 0.18855880200862885, "learning_rate": 1.4953939755044556e-07, "loss": 0.4245, "step": 1417 }, { "epoch": 2.7876802096985585, "grad_norm": 0.18331807851791382, "learning_rate": 1.46771022221876e-07, "loss": 0.4042, "step": 1418 }, { "epoch": 2.789646133682831, "grad_norm": 0.19505196809768677, "learning_rate": 1.4402813017927396e-07, "loss": 0.4266, "step": 1419 }, { "epoch": 2.7916120576671037, "grad_norm": 0.19084881246089935, "learning_rate": 1.413107358249899e-07, "loss": 0.4289, "step": 1420 }, { "epoch": 2.793577981651376, "grad_norm": 0.18647164106369019, "learning_rate": 1.386188534274896e-07, "loss": 0.4322, "step": 1421 }, { "epoch": 2.7955439056356486, "grad_norm": 0.19274601340293884, "learning_rate": 1.3595249712128334e-07, "loss": 0.4357, "step": 1422 }, { "epoch": 2.7975098296199215, "grad_norm": 0.18728584051132202, "learning_rate": 1.3331168090684742e-07, "loss": 0.4352, "step": 1423 }, { "epoch": 2.799475753604194, "grad_norm": 0.18775223195552826, "learning_rate": 1.3069641865055328e-07, "loss": 0.4336, "step": 1424 }, { "epoch": 2.8014416775884667, "grad_norm": 0.18465016782283783, "learning_rate": 1.28106724084594e-07, "loss": 0.4149, "step": 1425 }, { "epoch": 2.803407601572739, "grad_norm": 0.18570895493030548, "learning_rate": 1.2554261080691076e-07, "loss": 0.44, "step": 1426 }, { "epoch": 2.8053735255570116, "grad_norm": 0.18735641241073608, "learning_rate": 1.2300409228112541e-07, "loss": 0.3975, "step": 1427 }, { "epoch": 2.8073394495412844, "grad_norm": 0.1925758272409439, "learning_rate": 1.2049118183646403e-07, "loss": 0.4266, "step": 1428 }, { "epoch": 2.809305373525557, "grad_norm": 0.1925830841064453, "learning_rate": 1.1800389266769242e-07, "loss": 0.4065, "step": 1429 }, { "epoch": 2.8112712975098297, "grad_norm": 0.18923647701740265, "learning_rate": 1.1554223783504348e-07, "loss": 0.4028, "step": 1430 }, { "epoch": 2.813237221494102, "grad_norm": 0.1853543221950531, "learning_rate": 1.1310623026414891e-07, "loss": 0.4208, "step": 1431 }, { "epoch": 2.8152031454783746, "grad_norm": 0.18889391422271729, "learning_rate": 1.1069588274597365e-07, "loss": 0.4289, "step": 1432 }, { "epoch": 2.8171690694626474, "grad_norm": 0.19322317838668823, "learning_rate": 1.0831120793674598e-07, "loss": 0.4118, "step": 1433 }, { "epoch": 2.8191349934469203, "grad_norm": 0.18789125978946686, "learning_rate": 1.059522183578926e-07, "loss": 0.4263, "step": 1434 }, { "epoch": 2.8211009174311927, "grad_norm": 0.1874559223651886, "learning_rate": 1.0361892639597193e-07, "loss": 0.4064, "step": 1435 }, { "epoch": 2.823066841415465, "grad_norm": 0.18816040456295013, "learning_rate": 1.0131134430261036e-07, "loss": 0.4146, "step": 1436 }, { "epoch": 2.825032765399738, "grad_norm": 0.18052305281162262, "learning_rate": 9.902948419443669e-08, "loss": 0.414, "step": 1437 }, { "epoch": 2.8269986893840104, "grad_norm": 0.18447034060955048, "learning_rate": 9.67733580530189e-08, "loss": 0.4122, "step": 1438 }, { "epoch": 2.8289646133682833, "grad_norm": 0.19152402877807617, "learning_rate": 9.454297772480137e-08, "loss": 0.4309, "step": 1439 }, { "epoch": 2.8309305373525557, "grad_norm": 0.18504272401332855, "learning_rate": 9.233835492104326e-08, "loss": 0.4138, "step": 1440 }, { "epoch": 2.832896461336828, "grad_norm": 0.19270551204681396, "learning_rate": 9.015950121775474e-08, "loss": 0.4307, "step": 1441 }, { "epoch": 2.834862385321101, "grad_norm": 0.189494326710701, "learning_rate": 8.800642805563975e-08, "loss": 0.4145, "step": 1442 }, { "epoch": 2.8368283093053734, "grad_norm": 0.18586067855358124, "learning_rate": 8.587914674003384e-08, "loss": 0.4264, "step": 1443 }, { "epoch": 2.8387942332896463, "grad_norm": 0.18750880658626556, "learning_rate": 8.377766844084311e-08, "loss": 0.4302, "step": 1444 }, { "epoch": 2.8407601572739187, "grad_norm": 0.1872173547744751, "learning_rate": 8.170200419248931e-08, "loss": 0.4175, "step": 1445 }, { "epoch": 2.842726081258191, "grad_norm": 0.19608041644096375, "learning_rate": 7.965216489384919e-08, "loss": 0.4297, "step": 1446 }, { "epoch": 2.844692005242464, "grad_norm": 0.18357668817043304, "learning_rate": 7.762816130819862e-08, "loss": 0.4288, "step": 1447 }, { "epoch": 2.8466579292267364, "grad_norm": 0.18124645948410034, "learning_rate": 7.563000406315579e-08, "loss": 0.4069, "step": 1448 }, { "epoch": 2.8486238532110093, "grad_norm": 0.19631488621234894, "learning_rate": 7.365770365062308e-08, "loss": 0.4277, "step": 1449 }, { "epoch": 2.8505897771952817, "grad_norm": 0.18377605080604553, "learning_rate": 7.171127042673753e-08, "loss": 0.406, "step": 1450 }, { "epoch": 2.852555701179554, "grad_norm": 0.18855401873588562, "learning_rate": 6.979071461181042e-08, "loss": 0.4212, "step": 1451 }, { "epoch": 2.854521625163827, "grad_norm": 0.19027647376060486, "learning_rate": 6.789604629027614e-08, "loss": 0.4386, "step": 1452 }, { "epoch": 2.8564875491481, "grad_norm": 0.18420332670211792, "learning_rate": 6.602727541064114e-08, "loss": 0.4125, "step": 1453 }, { "epoch": 2.8584534731323723, "grad_norm": 0.19108708202838898, "learning_rate": 6.418441178542845e-08, "loss": 0.4264, "step": 1454 }, { "epoch": 2.8604193971166447, "grad_norm": 0.19463279843330383, "learning_rate": 6.236746509112824e-08, "loss": 0.4392, "step": 1455 }, { "epoch": 2.8623853211009176, "grad_norm": 0.21023961901664734, "learning_rate": 6.057644486814507e-08, "loss": 0.4326, "step": 1456 }, { "epoch": 2.86435124508519, "grad_norm": 0.18252646923065186, "learning_rate": 5.881136052075076e-08, "loss": 0.4186, "step": 1457 }, { "epoch": 2.866317169069463, "grad_norm": 0.19727899134159088, "learning_rate": 5.707222131703216e-08, "loss": 0.4393, "step": 1458 }, { "epoch": 2.8682830930537353, "grad_norm": 0.18230728805065155, "learning_rate": 5.535903638884399e-08, "loss": 0.4199, "step": 1459 }, { "epoch": 2.8702490170380077, "grad_norm": 0.18964985013008118, "learning_rate": 5.367181473176053e-08, "loss": 0.4114, "step": 1460 }, { "epoch": 2.8722149410222806, "grad_norm": 0.1857885718345642, "learning_rate": 5.201056520502734e-08, "loss": 0.4117, "step": 1461 }, { "epoch": 2.874180865006553, "grad_norm": 0.18905998766422272, "learning_rate": 5.0375296531518516e-08, "loss": 0.4263, "step": 1462 }, { "epoch": 2.876146788990826, "grad_norm": 0.18672683835029602, "learning_rate": 4.876601729768504e-08, "loss": 0.403, "step": 1463 }, { "epoch": 2.8781127129750983, "grad_norm": 0.18833793699741364, "learning_rate": 4.718273595351486e-08, "loss": 0.4229, "step": 1464 }, { "epoch": 2.8800786369593707, "grad_norm": 0.18816186487674713, "learning_rate": 4.5625460812485644e-08, "loss": 0.4273, "step": 1465 }, { "epoch": 2.8820445609436436, "grad_norm": 0.1809256672859192, "learning_rate": 4.409420005152154e-08, "loss": 0.4101, "step": 1466 }, { "epoch": 2.884010484927916, "grad_norm": 0.19573166966438293, "learning_rate": 4.25889617109515e-08, "loss": 0.4299, "step": 1467 }, { "epoch": 2.885976408912189, "grad_norm": 0.19006499648094177, "learning_rate": 4.1109753694466014e-08, "loss": 0.4442, "step": 1468 }, { "epoch": 2.8879423328964613, "grad_norm": 0.18832284212112427, "learning_rate": 3.965658376907544e-08, "loss": 0.4221, "step": 1469 }, { "epoch": 2.8899082568807337, "grad_norm": 0.189764603972435, "learning_rate": 3.8229459565070074e-08, "loss": 0.4172, "step": 1470 }, { "epoch": 2.8918741808650066, "grad_norm": 0.18678680062294006, "learning_rate": 3.682838857597959e-08, "loss": 0.44, "step": 1471 }, { "epoch": 2.893840104849279, "grad_norm": 0.19102615118026733, "learning_rate": 3.545337815853256e-08, "loss": 0.4117, "step": 1472 }, { "epoch": 2.895806028833552, "grad_norm": 0.18888624012470245, "learning_rate": 3.410443553262033e-08, "loss": 0.4203, "step": 1473 }, { "epoch": 2.8977719528178243, "grad_norm": 0.18853890895843506, "learning_rate": 3.2781567781258185e-08, "loss": 0.4149, "step": 1474 }, { "epoch": 2.8997378768020967, "grad_norm": 0.19942954182624817, "learning_rate": 3.1484781850545375e-08, "loss": 0.4416, "step": 1475 }, { "epoch": 2.9017038007863696, "grad_norm": 0.18686319887638092, "learning_rate": 3.0214084549632925e-08, "loss": 0.4301, "step": 1476 }, { "epoch": 2.9036697247706424, "grad_norm": 0.18849965929985046, "learning_rate": 2.8969482550686435e-08, "loss": 0.4187, "step": 1477 }, { "epoch": 2.905635648754915, "grad_norm": 0.18626683950424194, "learning_rate": 2.7750982388848324e-08, "loss": 0.4053, "step": 1478 }, { "epoch": 2.9076015727391873, "grad_norm": 0.1893082559108734, "learning_rate": 2.6558590462207322e-08, "loss": 0.4257, "step": 1479 }, { "epoch": 2.90956749672346, "grad_norm": 0.18734963238239288, "learning_rate": 2.539231303176293e-08, "loss": 0.4164, "step": 1480 }, { "epoch": 2.9115334207077326, "grad_norm": 0.18158259987831116, "learning_rate": 2.4252156221393764e-08, "loss": 0.4407, "step": 1481 }, { "epoch": 2.9134993446920054, "grad_norm": 0.19056791067123413, "learning_rate": 2.3138126017822614e-08, "loss": 0.4275, "step": 1482 }, { "epoch": 2.915465268676278, "grad_norm": 0.17401531338691711, "learning_rate": 2.205022827058867e-08, "loss": 0.3968, "step": 1483 }, { "epoch": 2.9174311926605503, "grad_norm": 0.1860005408525467, "learning_rate": 2.0988468692014208e-08, "loss": 0.4114, "step": 1484 }, { "epoch": 2.919397116644823, "grad_norm": 0.18477967381477356, "learning_rate": 1.99528528571763e-08, "loss": 0.4195, "step": 1485 }, { "epoch": 2.9213630406290956, "grad_norm": 0.18349048495292664, "learning_rate": 1.8943386203875702e-08, "loss": 0.4166, "step": 1486 }, { "epoch": 2.9233289646133684, "grad_norm": 0.18444041907787323, "learning_rate": 1.7960074032610243e-08, "loss": 0.3953, "step": 1487 }, { "epoch": 2.925294888597641, "grad_norm": 0.18297380208969116, "learning_rate": 1.7002921506544812e-08, "loss": 0.4158, "step": 1488 }, { "epoch": 2.9272608125819133, "grad_norm": 0.18708384037017822, "learning_rate": 1.607193365148696e-08, "loss": 0.4354, "step": 1489 }, { "epoch": 2.929226736566186, "grad_norm": 0.2040756493806839, "learning_rate": 1.516711535585802e-08, "loss": 0.4298, "step": 1490 }, { "epoch": 2.9311926605504586, "grad_norm": 0.18153029680252075, "learning_rate": 1.4288471370669244e-08, "loss": 0.4141, "step": 1491 }, { "epoch": 2.9331585845347314, "grad_norm": 0.1862143576145172, "learning_rate": 1.3436006309495152e-08, "loss": 0.4239, "step": 1492 }, { "epoch": 2.935124508519004, "grad_norm": 0.20297810435295105, "learning_rate": 1.2609724648450228e-08, "loss": 0.4275, "step": 1493 }, { "epoch": 2.9370904325032763, "grad_norm": 0.18510276079177856, "learning_rate": 1.1809630726167808e-08, "loss": 0.4167, "step": 1494 }, { "epoch": 2.939056356487549, "grad_norm": 0.18417201936244965, "learning_rate": 1.1035728743771235e-08, "loss": 0.4285, "step": 1495 }, { "epoch": 2.941022280471822, "grad_norm": 0.1946430206298828, "learning_rate": 1.0288022764857741e-08, "loss": 0.4275, "step": 1496 }, { "epoch": 2.9429882044560944, "grad_norm": 0.19136624038219452, "learning_rate": 9.566516715474594e-09, "loss": 0.4281, "step": 1497 }, { "epoch": 2.944954128440367, "grad_norm": 0.20006603002548218, "learning_rate": 8.871214384097993e-09, "loss": 0.4273, "step": 1498 }, { "epoch": 2.9469200524246397, "grad_norm": 0.18556608259677887, "learning_rate": 8.202119421615306e-09, "loss": 0.4261, "step": 1499 }, { "epoch": 2.948885976408912, "grad_norm": 0.18632759153842926, "learning_rate": 7.559235341302872e-09, "loss": 0.4167, "step": 1500 }, { "epoch": 2.950851900393185, "grad_norm": 0.1911812722682953, "learning_rate": 6.942565518810451e-09, "loss": 0.4116, "step": 1501 }, { "epoch": 2.9528178243774574, "grad_norm": 0.18124179542064667, "learning_rate": 6.352113192141241e-09, "loss": 0.3971, "step": 1502 }, { "epoch": 2.95478374836173, "grad_norm": 0.18406546115875244, "learning_rate": 5.787881461636891e-09, "loss": 0.4305, "step": 1503 }, { "epoch": 2.9567496723460027, "grad_norm": 0.18451999127864838, "learning_rate": 5.24987328995974e-09, "loss": 0.4107, "step": 1504 }, { "epoch": 2.958715596330275, "grad_norm": 0.1855209767818451, "learning_rate": 4.738091502077269e-09, "loss": 0.4312, "step": 1505 }, { "epoch": 2.960681520314548, "grad_norm": 0.19284525513648987, "learning_rate": 4.252538785248228e-09, "loss": 0.4122, "step": 1506 }, { "epoch": 2.9626474442988204, "grad_norm": 0.1882369965314865, "learning_rate": 3.793217689008199e-09, "loss": 0.4288, "step": 1507 }, { "epoch": 2.964613368283093, "grad_norm": 0.181296244263649, "learning_rate": 3.360130625155722e-09, "loss": 0.4153, "step": 1508 }, { "epoch": 2.9665792922673657, "grad_norm": 0.19471704959869385, "learning_rate": 2.9532798677395226e-09, "loss": 0.4211, "step": 1509 }, { "epoch": 2.968545216251638, "grad_norm": 0.18209101259708405, "learning_rate": 2.5726675530479695e-09, "loss": 0.4123, "step": 1510 }, { "epoch": 2.970511140235911, "grad_norm": 0.19035595655441284, "learning_rate": 2.21829567959686e-09, "loss": 0.4341, "step": 1511 }, { "epoch": 2.9724770642201834, "grad_norm": 0.18587549030780792, "learning_rate": 1.8901661081172084e-09, "loss": 0.4095, "step": 1512 }, { "epoch": 2.974442988204456, "grad_norm": 0.1827526092529297, "learning_rate": 1.5882805615496931e-09, "loss": 0.4119, "step": 1513 }, { "epoch": 2.9764089121887287, "grad_norm": 0.18282373249530792, "learning_rate": 1.312640625030781e-09, "loss": 0.4085, "step": 1514 }, { "epoch": 2.9783748361730016, "grad_norm": 0.19089441001415253, "learning_rate": 1.0632477458888401e-09, "loss": 0.4281, "step": 1515 }, { "epoch": 2.980340760157274, "grad_norm": 0.18670634925365448, "learning_rate": 8.401032336330384e-10, "loss": 0.4218, "step": 1516 }, { "epoch": 2.9823066841415464, "grad_norm": 0.20464399456977844, "learning_rate": 6.432082599489021e-10, "loss": 0.4187, "step": 1517 }, { "epoch": 2.9842726081258193, "grad_norm": 0.1876835972070694, "learning_rate": 4.725638586894344e-10, "loss": 0.429, "step": 1518 }, { "epoch": 2.9862385321100917, "grad_norm": 0.18850575387477875, "learning_rate": 3.2817092587345e-10, "loss": 0.4134, "step": 1519 }, { "epoch": 2.9882044560943646, "grad_norm": 0.19078615307807922, "learning_rate": 2.1003021967780369e-10, "loss": 0.4259, "step": 1520 }, { "epoch": 2.990170380078637, "grad_norm": 0.17989006638526917, "learning_rate": 1.1814236043405924e-10, "loss": 0.4127, "step": 1521 }, { "epoch": 2.9921363040629094, "grad_norm": 0.18568894267082214, "learning_rate": 5.250783062682452e-11, "loss": 0.4284, "step": 1522 }, { "epoch": 2.9941022280471823, "grad_norm": 0.19495601952075958, "learning_rate": 1.3126974888200139e-11, "loss": 0.4265, "step": 1523 }, { "epoch": 2.9960681520314547, "grad_norm": 0.19474370777606964, "learning_rate": 0.0, "loss": 0.4213, "step": 1524 }, { "epoch": 2.9960681520314547, "step": 1524, "total_flos": 4792734141382656.0, "train_loss": 0.46163445664203073, "train_runtime": 82407.0132, "train_samples_per_second": 1.777, "train_steps_per_second": 0.018 } ], "logging_steps": 1.0, "max_steps": 1524, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4792734141382656.0, "train_batch_size": 1, "trial_name": null, "trial_params": null }